1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/`, `%` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing,
77 - Analyse the parsed program to ensure consistency,
79 - Execute the program.
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 This code must be compiled with `-fplan9-extensions` so that anonymous
92 structures can be used.
94 ###### File: oceani.mk
96 myCFLAGS := -Wall -g -fplan9-extensions
97 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
98 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
99 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
101 all :: $(LDLIBS) oceani
102 oceani.c oceani.h : oceani.mdc parsergen
103 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
104 oceani.mk: oceani.mdc md2c
107 oceani: oceani.o $(LDLIBS)
108 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
110 ###### Parser: header
113 struct parse_context {
114 struct token_config config;
122 #define container_of(ptr, type, member) ({ \
123 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
124 (type *)( (char *)__mptr - offsetof(type,member) );})
126 #define config2context(_conf) container_of(_conf, struct parse_context, \
135 #include <sys/mman.h>
154 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
155 "--section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_line_comment)
175 | (1 << TK_block_comment),
176 .number_chars = ".,_+-",
181 int doprint=0, dotrace=0, doexec=1, brackets=0;
184 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
187 case 't': dotrace=1; break;
188 case 'p': doprint=1; break;
189 case 'n': doexec=0; break;
190 case 'b': brackets=1; break;
191 case 's': section = optarg; break;
192 default: fprintf(stderr, Usage);
196 if (optind >= argc) {
197 fprintf(stderr, "oceani: no input file given\n");
200 fd = open(argv[optind], O_RDONLY);
202 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
205 context.file_name = argv[optind];
206 len = lseek(fd, 0, 2);
207 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
208 s = code_extract(file, file+len, NULL);
210 fprintf(stderr, "oceani: could not find any code in %s\n",
215 ## context initialization
219 for (ss = s; ss; ss = ss->next) {
220 struct text sec = ss->section;
221 if (sec.len == strlen(section) &&
222 strncmp(sec.txt, section, sec.len) == 0)
226 prog = parse_oceani(ss->code, &context.config,
227 dotrace ? stderr : NULL);
229 fprintf(stderr, "oceani: cannot find section %s\n",
234 prog = parse_oceani(s->code, &context.config,
235 dotrace ? stderr : NULL);
237 fprintf(stderr, "oceani: fatal parser error.\n");
238 context.parse_error = 1;
241 print_exec(*prog, 0, brackets);
242 if (prog && doexec && !context.parse_error) {
243 if (!analyse_prog(*prog, &context)) {
244 fprintf(stderr, "oceani: type error in program - not running.\n");
247 interp_prog(*prog, argv+optind+1);
254 struct section *t = s->next;
260 ## free context types
261 exit(context.parse_error ? 1 : 0);
266 The four requirements of parse, analyse, print, interpret apply to
267 each language element individually so that is how most of the code
270 Three of the four are fairly self explanatory. The one that requires
271 a little explanation is the analysis step.
273 The current language design does not require the types of variables to
274 be declared, but they must still have a single type. Different
275 operations impose different requirements on the variables, for example
276 addition requires both arguments to be numeric, and assignment
277 requires the variable on the left to have the same type as the
278 expression on the right.
280 Analysis involves propagating these type requirements around and
281 consequently setting the type of each variable. If any requirements
282 are violated (e.g. a string is compared with a number) or if a
283 variable needs to have two different types, then an error is raised
284 and the program will not run.
286 If the same variable is declared in both branchs of an 'if/else', or
287 in all cases of a 'switch' then the multiple instances may be merged
288 into just one variable if the variable is references after the
289 conditional statement. When this happens, the types must naturally be
290 consistent across all the branches. When the variable is not used
291 outside the if, the variables in the different branches are distinct
292 and can be of different types.
294 Determining the types of all variables early is important for
295 processing command line arguments. These can be assigned to any type
296 of variable, but we must first know the correct type so any required
297 conversion can happen. If a variable is associated with a command
298 line argument but no type can be interpreted (e.g. the variable is
299 only ever used in a `print` statement), then the type is set to
302 Undeclared names may only appear in "use" statements and "case" expressions.
303 These names are given a type of "label" and a unique value.
304 This allows them to fill the role of a name in an enumerated type, which
305 is useful for testing the `switch` statement.
307 As we will see, the condition part of a `while` statement can return
308 either a Boolean or some other type. This requires that the expect
309 type that gets passed around comprises a type (`enum vtype`) and a
310 flag to indicate that `Vbool` is also permitted.
312 As there are, as yet, no distinct types that are compatible, there
313 isn't much subtlety in the analysis. When we have distinct number
314 types, this will become more interesting.
318 When analysis discovers an inconsistency it needs to report an error;
319 just refusing to run the code ensures that the error doesn't cascade,
320 but by itself it isn't very useful. A clear understand of the sort of
321 error message that are useful will help guide the process of analysis.
323 At a simplistic level, the only sort of error that type analysis can
324 report is that the type of some construct doesn't match a contextual
325 requirement. For example, in `4 + "hello"` the addition provides a
326 contextual requirement for numbers, but `"hello"` is not a number. In
327 this particular example no further information is needed as the types
328 are obvious from local information. When a variable is involved that
329 isn't the case. It may be helpful to explain why the variable has a
330 particular type, by indicating the location where the type was set,
331 whether by declaration or usage.
333 Using a recursive-descent analysis we can easily detect a problem at
334 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
335 will detect that one argument is not a number and the usage of `hello`
336 will detect that a number was wanted, but not provided. In this
337 (early) version of the language, we will generate error reports at
338 multiple locations, so the use of `hello` will report an error and
339 explain were the value was set, and the addition will report an error
340 and say why numbers are needed. To be able to report locations for
341 errors, each language element will need to record a file location
342 (line and column) and each variable will need to record the language
343 element where its type was set. For now we will assume that each line
344 of an error message indicates one location in the file, and up to 2
345 types. So we provide a `printf`-like function which takes a format, a
346 language (a `struct exec` which has not yet been introduced), and 2
347 types. "`%1`" reports the first type, "`%2`" reports the second. We
348 will need a function to print the location, once we know how that is
349 stored. As will be explained later, there are sometimes extra rules for
350 type matching and they might affect error messages, we need to pass those
353 As well as type errors, we sometimes need to report problems with
354 tokens, which might be unexpected or might name a type that has not
355 been defined. For these we have `tok_err()` which reports an error
356 with a given token. Each of the error functions sets the flag in the
357 context so indicate that parsing failed.
361 static void fput_loc(struct exec *loc, FILE *f);
363 ###### core functions
365 static void type_err(struct parse_context *c,
366 char *fmt, struct exec *loc,
367 struct type *t1, int rules, struct type *t2)
369 fprintf(stderr, "%s:", c->file_name);
370 fput_loc(loc, stderr);
371 for (; *fmt ; fmt++) {
378 case '%': fputc(*fmt, stderr); break; // NOTEST
379 default: fputc('?', stderr); break; // NOTEST
381 type_print(t1, stderr);
384 type_print(t2, stderr);
393 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
395 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
396 t->txt.len, t->txt.txt);
402 One last introductory step before detailing the language elements and
403 providing their four requirements is to establish the data structures
404 to store these elements.
406 There are two key objects that we need to work with: executable
407 elements which comprise the program, and values which the program
408 works with. Between these are the variables in their various scopes
409 which hold the values, and types which classify the values stored and
410 manipulatd by executables.
414 Values come in a wide range of types, with more likely to be added.
415 Each type needs to be able to parse and print its own values (for
416 convenience at least) as well as to compare two values, at least for
417 equality and possibly for order. For now, values might need to be
418 duplicated and freed, though eventually such manipulations will be
419 better integrated into the language.
421 Rather than requiring every numeric type to support all numeric
422 operations (add, multiple, etc), we allow types to be able to present
423 as one of a few standard types: integer, float, and fraction. The
424 existance of these conversion functions enable types to determine if
425 they are compatible with other types.
427 Named type are stored in a simple linked list. Objects of each type are "values"
428 which are often passed around by value.
435 ## value union fields
442 struct value (*init)(struct type *type);
443 struct value (*prepare)(struct type *type);
444 struct value (*parse)(struct type *type, char *str);
445 void (*print)(struct value val);
446 void (*print_type)(struct type *type, FILE *f);
447 int (*cmp_order)(struct value v1, struct value v2);
448 int (*cmp_eq)(struct value v1, struct value v2);
449 struct value (*dup)(struct value val);
450 void (*free)(struct value val);
451 int (*compat)(struct type *this, struct type *other);
452 long long (*to_int)(struct value *v);
453 double (*to_float)(struct value *v);
454 int (*to_mpq)(mpq_t *q, struct value *v);
462 struct type *typelist;
466 static struct type *find_type(struct parse_context *c, struct text s)
468 struct type *l = c->typelist;
471 text_cmp(l->name, s) != 0)
476 static struct type *add_type(struct parse_context *c, struct text s,
481 n = calloc(1, sizeof(*n));
484 n->next = c->typelist;
489 static void free_type(struct type *t)
491 /* The type is always a reference to something in the
492 * context, so we don't need to free anything.
496 static void free_value(struct value v)
502 static int type_compat(struct type *require, struct type *have, int rules)
504 if ((rules & Rboolok) && have == Tbool)
506 if ((rules & Rnolabel) && have == Tlabel)
508 if (!require || !have)
512 return require->compat(require, have);
514 return require == have;
517 static void type_print(struct type *type, FILE *f)
520 fputs("*unknown*type*", f);
521 else if (type->name.len)
522 fprintf(f, "%.*s", type->name.len, type->name.txt);
523 else if (type->print_type)
524 type->print_type(type, f);
526 fputs("*invalid*type*", f); // NOTEST
529 static struct value val_prepare(struct type *type)
534 return type->prepare(type);
539 static struct value val_init(struct type *type)
544 return type->init(type);
549 static struct value dup_value(struct value v)
552 return v.type->dup(v);
556 static int value_cmp(struct value left, struct value right)
558 if (left.type && left.type->cmp_order)
559 return left.type->cmp_order(left, right);
560 if (left.type && left.type->cmp_eq)
561 return left.type->cmp_eq(left, right);
565 static void print_value(struct value v)
567 if (v.type && v.type->print)
570 printf("*Unknown*"); // NOTEST
573 static struct value parse_value(struct type *type, char *arg)
577 if (type && type->parse)
578 return type->parse(type, arg);
579 rv.type = NULL; // NOTEST
585 static void free_value(struct value v);
586 static int type_compat(struct type *require, struct type *have, int rules);
587 static void type_print(struct type *type, FILE *f);
588 static struct value val_init(struct type *type);
589 static struct value dup_value(struct value v);
590 static int value_cmp(struct value left, struct value right);
591 static void print_value(struct value v);
592 static struct value parse_value(struct type *type, char *arg);
594 ###### free context types
596 while (context.typelist) {
597 struct type *t = context.typelist;
599 context.typelist = t->next;
605 Values of the base types can be numbers, which we represent as
606 multi-precision fractions, strings, Booleans and labels. When
607 analysing the program we also need to allow for places where no value
608 is meaningful (type `Tnone`) and where we don't know what type to
609 expect yet (type is `NULL`).
611 Values are never shared, they are always copied when used, and freed
612 when no longer needed.
614 When propagating type information around the program, we need to
615 determine if two types are compatible, where type `NULL` is compatible
616 with anything. There are two special cases with type compatibility,
617 both related to the Conditional Statement which will be described
618 later. In some cases a Boolean can be accepted as well as some other
619 primary type, and in others any type is acceptable except a label (`Vlabel`).
620 A separate function encode these cases will simplify some code later.
622 When assigning command line arguments to variables, we need to be able
623 to parse each type from a string.
631 myLDLIBS := libnumber.o libstring.o -lgmp
632 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
634 ###### type union fields
635 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
637 ###### value union fields
644 static void _free_value(struct value v)
646 switch (v.type->vtype) {
648 case Vstr: free(v.str.txt); break;
649 case Vnum: mpq_clear(v.num); break;
655 ###### value functions
657 static struct value _val_prepare(struct type *type)
662 switch(type->vtype) {
666 memset(&rv.num, 0, sizeof(rv.num));
682 static struct value _val_init(struct type *type)
687 switch(type->vtype) {
688 case Vnone: // NOTEST
691 mpq_init(rv.num); break;
693 rv.str.txt = malloc(1);
699 case Vlabel: // NOTEST
700 rv.label = NULL; // NOTEST
706 static struct value _dup_value(struct value v)
710 switch (rv.type->vtype) {
711 case Vnone: // NOTEST
721 mpq_set(rv.num, v.num);
724 rv.str.len = v.str.len;
725 rv.str.txt = malloc(rv.str.len);
726 memcpy(rv.str.txt, v.str.txt, v.str.len);
732 static int _value_cmp(struct value left, struct value right)
735 if (left.type != right.type)
736 return left.type - right.type; // NOTEST
737 switch (left.type->vtype) {
738 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
739 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
740 case Vstr: cmp = text_cmp(left.str, right.str); break;
741 case Vbool: cmp = left.bool - right.bool; break;
742 case Vnone: cmp = 0; // NOTEST
747 static void _print_value(struct value v)
749 switch (v.type->vtype) {
750 case Vnone: // NOTEST
751 printf("*no-value*"); break; // NOTEST
752 case Vlabel: // NOTEST
753 printf("*label-%p*", v.label); break; // NOTEST
755 printf("%.*s", v.str.len, v.str.txt); break;
757 printf("%s", v.bool ? "True":"False"); break;
762 mpf_set_q(fl, v.num);
763 gmp_printf("%Fg", fl);
770 static struct value _parse_value(struct type *type, char *arg)
778 switch(type->vtype) {
779 case Vlabel: // NOTEST
780 case Vnone: // NOTEST
781 val.type = NULL; // NOTEST
784 val.str.len = strlen(arg);
785 val.str.txt = malloc(val.str.len);
786 memcpy(val.str.txt, arg, val.str.len);
793 tx.txt = arg; tx.len = strlen(tx.txt);
794 if (number_parse(val.num, tail, tx) == 0)
797 mpq_neg(val.num, val.num);
799 printf("Unsupported suffix: %s\n", arg);
804 if (strcasecmp(arg, "true") == 0 ||
805 strcmp(arg, "1") == 0)
807 else if (strcasecmp(arg, "false") == 0 ||
808 strcmp(arg, "0") == 0)
811 printf("Bad bool: %s\n", arg);
819 static void _free_value(struct value v);
821 static struct type base_prototype = {
823 .prepare = _val_prepare,
824 .parse = _parse_value,
825 .print = _print_value,
826 .cmp_order = _value_cmp,
827 .cmp_eq = _value_cmp,
832 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
835 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
837 struct text txt = { n, strlen(n) };
840 t = add_type(c, txt, &base_prototype);
845 ###### context initialization
847 Tbool = add_base_type(&context, "Boolean", Vbool);
848 Tstr = add_base_type(&context, "string", Vstr);
849 Tnum = add_base_type(&context, "number", Vnum);
850 Tnone = add_base_type(&context, "none", Vnone);
851 Tlabel = add_base_type(&context, "label", Vlabel);
855 Variables are scoped named values. We store the names in a linked
856 list of "bindings" sorted lexically, and use sequential search and
863 struct binding *next; // in lexical order
867 This linked list is stored in the parse context so that "reduce"
868 functions can find or add variables, and so the analysis phase can
869 ensure that every variable gets a type.
873 struct binding *varlist; // In lexical order
877 static struct binding *find_binding(struct parse_context *c, struct text s)
879 struct binding **l = &c->varlist;
884 (cmp = text_cmp((*l)->name, s)) < 0)
888 n = calloc(1, sizeof(*n));
895 Each name can be linked to multiple variables defined in different
896 scopes. Each scope starts where the name is declared and continues
897 until the end of the containing code block. Scopes of a given name
898 cannot nest, so a declaration while a name is in-scope is an error.
900 ###### binding fields
901 struct variable *var;
905 struct variable *previous;
907 struct binding *name;
908 struct exec *where_decl;// where name was declared
909 struct exec *where_set; // where type was set
913 While the naming seems strange, we include local constants in the
914 definition of variables. A name declared `var := value` can
915 subsequently be changed, but a name declared `var ::= value` cannot -
918 ###### variable fields
921 Scopes in parallel branches can be partially merged. More
922 specifically, if a given name is declared in both branches of an
923 if/else then its scope is a candidate for merging. Similarly if
924 every branch of an exhaustive switch (e.g. has an "else" clause)
925 declares a given name, then the scopes from the branches are
926 candidates for merging.
928 Note that names declared inside a loop (which is only parallel to
929 itself) are never visible after the loop. Similarly names defined in
930 scopes which are not parallel, such as those started by `for` and
931 `switch`, are never visible after the scope. Only variables defined in
932 both `then` and `else` (including the implicit then after an `if`, and
933 excluding `then` used with `for`) and in all `case`s and `else` of a
934 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
936 Labels, which are a bit like variables, follow different rules.
937 Labels are not explicitly declared, but if an undeclared name appears
938 in a context where a label is legal, that effectively declares the
939 name as a label. The declaration remains in force (or in scope) at
940 least to the end of the immediately containing block and conditionally
941 in any larger containing block which does not declare the name in some
942 other way. Importantly, the conditional scope extension happens even
943 if the label is only used in one parallel branch of a conditional --
944 when used in one branch it is treated as having been declared in all
947 Merge candidates are tentatively visible beyond the end of the
948 branching statement which creates them. If the name is used, the
949 merge is affirmed and they become a single variable visible at the
950 outer layer. If not - if it is redeclared first - the merge lapses.
952 To track scopes we have an extra stack, implemented as a linked list,
953 which roughly parallels the parse stack and which is used exclusively
954 for scoping. When a new scope is opened, a new frame is pushed and
955 the child-count of the parent frame is incremented. This child-count
956 is used to distinguish between the first of a set of parallel scopes,
957 in which declared variables must not be in scope, and subsequent
958 branches, whether they must already be conditionally scoped.
960 To push a new frame *before* any code in the frame is parsed, we need a
961 grammar reduction. This is most easily achieved with a grammar
962 element which derives the empty string, and creates the new scope when
963 it is recognized. This can be placed, for example, between a keyword
964 like "if" and the code following it.
968 struct scope *parent;
974 struct scope *scope_stack;
977 static void scope_pop(struct parse_context *c)
979 struct scope *s = c->scope_stack;
981 c->scope_stack = s->parent;
986 static void scope_push(struct parse_context *c)
988 struct scope *s = calloc(1, sizeof(*s));
990 c->scope_stack->child_count += 1;
991 s->parent = c->scope_stack;
999 OpenScope -> ${ scope_push(config2context(config)); }$
1002 Each variable records a scope depth and is in one of four states:
1004 - "in scope". This is the case between the declaration of the
1005 variable and the end of the containing block, and also between
1006 the usage with affirms a merge and the end of that block.
1008 The scope depth is not greater than the current parse context scope
1009 nest depth. When the block of that depth closes, the state will
1010 change. To achieve this, all "in scope" variables are linked
1011 together as a stack in nesting order.
1013 - "pending". The "in scope" block has closed, but other parallel
1014 scopes are still being processed. So far, every parallel block at
1015 the same level that has closed has declared the name.
1017 The scope depth is the depth of the last parallel block that
1018 enclosed the declaration, and that has closed.
1020 - "conditionally in scope". The "in scope" block and all parallel
1021 scopes have closed, and no further mention of the name has been
1022 seen. This state includes a secondary nest depth which records the
1023 outermost scope seen since the variable became conditionally in
1024 scope. If a use of the name is found, the variable becomes "in
1025 scope" and that secondary depth becomes the recorded scope depth.
1026 If the name is declared as a new variable, the old variable becomes
1027 "out of scope" and the recorded scope depth stays unchanged.
1029 - "out of scope". The variable is neither in scope nor conditionally
1030 in scope. It is permanently out of scope now and can be removed from
1031 the "in scope" stack.
1034 ###### variable fields
1035 int depth, min_depth;
1036 enum { OutScope, PendingScope, CondScope, InScope } scope;
1037 struct variable *in_scope;
1039 ###### parse context
1041 struct variable *in_scope;
1043 All variables with the same name are linked together using the
1044 'previous' link. Those variable that have
1045 been affirmatively merged all have a 'merged' pointer that points to
1046 one primary variable - the most recently declared instance. When
1047 merging variables, we need to also adjust the 'merged' pointer on any
1048 other variables that had previously been merged with the one that will
1049 no longer be primary.
1051 ###### variable fields
1052 struct variable *merged;
1054 ###### ast functions
1056 static void variable_merge(struct variable *primary, struct variable *secondary)
1060 if (primary->merged)
1062 primary = primary->merged;
1064 for (v = primary->previous; v; v=v->previous)
1065 if (v == secondary || v == secondary->merged ||
1066 v->merged == secondary ||
1067 (v->merged && v->merged == secondary->merged)) {
1068 v->scope = OutScope;
1069 v->merged = primary;
1073 ###### free context vars
1075 while (context.varlist) {
1076 struct binding *b = context.varlist;
1077 struct variable *v = b->var;
1078 context.varlist = b->next;
1081 struct variable *t = v;
1089 #### Manipulating Bindings
1091 When a name is conditionally visible, a new declaration discards the
1092 old binding - the condition lapses. Conversely a usage of the name
1093 affirms the visibility and extends it to the end of the containing
1094 block - i.e. the block that contains both the original declaration and
1095 the latest usage. This is determined from `min_depth`. When a
1096 conditionally visible variable gets affirmed like this, it is also
1097 merged with other conditionally visible variables with the same name.
1099 When we parse a variable declaration we either signal an error if the
1100 name is currently bound, or create a new variable at the current nest
1101 depth if the name is unbound or bound to a conditionally scoped or
1102 pending-scope variable. If the previous variable was conditionally
1103 scoped, it and its homonyms becomes out-of-scope.
1105 When we parse a variable reference (including non-declarative
1106 assignment) we signal an error if the name is not bound or is bound to
1107 a pending-scope variable; update the scope if the name is bound to a
1108 conditionally scoped variable; or just proceed normally if the named
1109 variable is in scope.
1111 When we exit a scope, any variables bound at this level are either
1112 marked out of scope or pending-scoped, depending on whether the
1113 scope was sequential or parallel.
1115 When exiting a parallel scope we check if there are any variables that
1116 were previously pending and are still visible. If there are, then
1117 there weren't redeclared in the most recent scope, so they cannot be
1118 merged and must become out-of-scope. If it is not the first of
1119 parallel scopes (based on `child_count`), we check that there was a
1120 previous binding that is still pending-scope. If there isn't, the new
1121 variable must now be out-of-scope.
1123 When exiting a sequential scope that immediately enclosed parallel
1124 scopes, we need to resolve any pending-scope variables. If there was
1125 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1126 we need to mark all pending-scope variable as out-of-scope. Otherwise
1127 all pending-scope variables become conditionally scoped.
1130 enum closetype { CloseSequential, CloseParallel, CloseElse };
1132 ###### ast functions
1134 static struct variable *var_decl(struct parse_context *c, struct text s)
1136 struct binding *b = find_binding(c, s);
1137 struct variable *v = b->var;
1139 switch (v ? v->scope : OutScope) {
1141 /* Caller will report the error */
1145 v && v->scope == CondScope;
1147 v->scope = OutScope;
1151 v = calloc(1, sizeof(*v));
1152 v->previous = b->var;
1155 v->min_depth = v->depth = c->scope_depth;
1157 v->in_scope = c->in_scope;
1159 v->val = val_prepare(NULL);
1163 static struct variable *var_ref(struct parse_context *c, struct text s)
1165 struct binding *b = find_binding(c, s);
1166 struct variable *v = b->var;
1167 struct variable *v2;
1169 switch (v ? v->scope : OutScope) {
1172 /* Signal an error - once that is possible */
1175 /* All CondScope variables of this name need to be merged
1176 * and become InScope
1178 v->depth = v->min_depth;
1180 for (v2 = v->previous;
1181 v2 && v2->scope == CondScope;
1183 variable_merge(v, v2);
1191 static void var_block_close(struct parse_context *c, enum closetype ct)
1193 /* close of all variables that are in_scope */
1194 struct variable *v, **vp, *v2;
1197 for (vp = &c->in_scope;
1198 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1202 case CloseParallel: /* handle PendingScope */
1206 if (c->scope_stack->child_count == 1)
1207 v->scope = PendingScope;
1208 else if (v->previous &&
1209 v->previous->scope == PendingScope)
1210 v->scope = PendingScope;
1211 else if (v->val.type == Tlabel)
1212 v->scope = PendingScope;
1213 else if (v->name->var == v)
1214 v->scope = OutScope;
1215 if (ct == CloseElse) {
1216 /* All Pending variables with this name
1217 * are now Conditional */
1219 v2 && v2->scope == PendingScope;
1221 v2->scope = CondScope;
1226 v2 && v2->scope == PendingScope;
1228 if (v2->val.type != Tlabel)
1229 v2->scope = OutScope;
1231 case OutScope: break;
1234 case CloseSequential:
1235 if (v->val.type == Tlabel)
1236 v->scope = PendingScope;
1239 v->scope = OutScope;
1242 /* There was no 'else', so we can only become
1243 * conditional if we know the cases were exhaustive,
1244 * and that doesn't mean anything yet.
1245 * So only labels become conditional..
1248 v2 && v2->scope == PendingScope;
1250 if (v2->val.type == Tlabel) {
1251 v2->scope = CondScope;
1252 v2->min_depth = c->scope_depth;
1254 v2->scope = OutScope;
1257 case OutScope: break;
1261 if (v->scope == OutScope)
1270 Executables can be lots of different things. In many cases an
1271 executable is just an operation combined with one or two other
1272 executables. This allows for expressions and lists etc. Other times
1273 an executable is something quite specific like a constant or variable
1274 name. So we define a `struct exec` to be a general executable with a
1275 type, and a `struct binode` which is a subclass of `exec`, forms a
1276 node in a binary tree, and holds an operation. There will be other
1277 subclasses, and to access these we need to be able to `cast` the
1278 `exec` into the various other types.
1281 #define cast(structname, pointer) ({ \
1282 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1283 if (__mptr && *__mptr != X##structname) abort(); \
1284 (struct structname *)( (char *)__mptr);})
1286 #define new(structname) ({ \
1287 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1288 __ptr->type = X##structname; \
1289 __ptr->line = -1; __ptr->column = -1; \
1292 #define new_pos(structname, token) ({ \
1293 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1294 __ptr->type = X##structname; \
1295 __ptr->line = token.line; __ptr->column = token.col; \
1304 enum exec_types type;
1312 struct exec *left, *right;
1315 ###### ast functions
1317 static int __fput_loc(struct exec *loc, FILE *f)
1321 if (loc->line >= 0) {
1322 fprintf(f, "%d:%d: ", loc->line, loc->column);
1325 if (loc->type == Xbinode)
1326 return __fput_loc(cast(binode,loc)->left, f) ||
1327 __fput_loc(cast(binode,loc)->right, f);
1330 static void fput_loc(struct exec *loc, FILE *f)
1332 if (!__fput_loc(loc, f))
1333 fprintf(f, "??:??: "); // NOTEST
1336 Each different type of `exec` node needs a number of functions
1337 defined, a bit like methods. We must be able to be able to free it,
1338 print it, analyse it and execute it. Once we have specific `exec`
1339 types we will need to parse them too. Let's take this a bit more
1344 The parser generator requires a `free_foo` function for each struct
1345 that stores attributes and they will be `exec`s and subtypes there-of.
1346 So we need `free_exec` which can handle all the subtypes, and we need
1349 ###### ast functions
1351 static void free_binode(struct binode *b)
1356 free_exec(b->right);
1360 ###### core functions
1361 static void free_exec(struct exec *e)
1370 ###### forward decls
1372 static void free_exec(struct exec *e);
1374 ###### free exec cases
1375 case Xbinode: free_binode(cast(binode, e)); break;
1379 Printing an `exec` requires that we know the current indent level for
1380 printing line-oriented components. As will become clear later, we
1381 also want to know what sort of bracketing to use.
1383 ###### ast functions
1385 static void do_indent(int i, char *str)
1392 ###### core functions
1393 static void print_binode(struct binode *b, int indent, int bracket)
1397 ## print binode cases
1401 static void print_exec(struct exec *e, int indent, int bracket)
1407 print_binode(cast(binode, e), indent, bracket); break;
1412 ###### forward decls
1414 static void print_exec(struct exec *e, int indent, int bracket);
1418 As discussed, analysis involves propagating type requirements around
1419 the program and looking for errors.
1421 So `propagate_types` is passed an expected type (being a `struct type`
1422 pointer together with some `val_rules` flags) that the `exec` is
1423 expected to return, and returns the type that it does return, either
1424 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1425 by reference. It is set to `0` when an error is found, and `2` when
1426 any change is made. If it remains unchanged at `1`, then no more
1427 propagation is needed.
1431 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1435 if (rules & Rnolabel)
1436 fputs(" (labels not permitted)", stderr);
1439 ###### core functions
1441 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1442 struct type *type, int rules)
1449 switch (prog->type) {
1452 struct binode *b = cast(binode, prog);
1454 ## propagate binode cases
1458 ## propagate exec cases
1465 Interpreting an `exec` doesn't require anything but the `exec`. State
1466 is stored in variables and each variable will be directly linked from
1467 within the `exec` tree. The exception to this is the whole `program`
1468 which needs to look at command line arguments. The `program` will be
1469 interpreted separately.
1471 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1473 ###### core functions
1476 struct value val, *lval;
1479 static struct lrval _interp_exec(struct exec *e);
1481 static struct value interp_exec(struct exec *e)
1483 struct lrval ret = _interp_exec(e);
1486 return dup_value(*ret.lval);
1491 static struct value *linterp_exec(struct exec *e)
1493 struct lrval ret = _interp_exec(e);
1498 static struct lrval _interp_exec(struct exec *e)
1501 struct value rv, *lrv = NULL;
1512 struct binode *b = cast(binode, e);
1513 struct value left, right, *lleft;
1514 left.type = right.type = Tnone;
1516 ## interp binode cases
1518 free_value(left); free_value(right);
1521 ## interp exec cases
1530 Now that we have the shape of the interpreter in place we can add some
1531 complex types and connected them in to the data structures and the
1532 different phases of parse, analyse, print, interpret.
1534 For now, just arrays.
1538 Arrays can be declared by giving a size and a type, as `[size]type' so
1539 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1540 size can be an arbitrary expression which is evaluated when the name
1543 Arrays cannot be assigned. When pointers are introduced we will also
1544 introduce array slices which can refer to part or all of an array -
1545 the assignment syntax will create a slice. For now, an array can only
1546 ever be referenced by the name it is declared with. It is likely that
1547 a "`copy`" primitive will eventually be define which can be used to
1548 make a copy of an array with controllable depth.
1550 ###### type union fields
1554 struct variable *vsize;
1555 struct type *member;
1558 ###### value union fields
1560 struct value *elmnts;
1563 ###### value functions
1565 static struct value array_prepare(struct type *type)
1570 ret.array.elmnts = NULL;
1574 static struct value array_init(struct type *type)
1580 if (type->array.vsize) {
1583 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1584 mpq_denref(type->array.vsize->val.num));
1585 type->array.size = mpz_get_si(q);
1588 ret.array.elmnts = calloc(type->array.size,
1589 sizeof(ret.array.elmnts[0]));
1590 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1591 ret.array.elmnts[i] = val_init(type->array.member);
1595 static void array_free(struct value val)
1599 if (val.array.elmnts)
1600 for (i = 0; i < val.type->array.size; i++)
1601 free_value(val.array.elmnts[i]);
1602 free(val.array.elmnts);
1605 static int array_compat(struct type *require, struct type *have)
1607 if (have->compat != require->compat)
1609 /* Both are arrays, so we can look at details */
1610 if (!type_compat(require->array.member, have->array.member, 0))
1612 if (require->array.vsize == NULL && have->array.vsize == NULL)
1613 return require->array.size == have->array.size;
1615 return require->array.vsize == have->array.vsize;
1618 static void array_print_type(struct type *type, FILE *f)
1621 if (type->array.vsize) {
1622 struct binding *b = type->array.vsize->name;
1623 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1625 fprintf(f, "%d]", type->array.size);
1626 type_print(type->array.member, f);
1629 static struct type array_prototype = {
1630 .prepare = array_prepare,
1632 .print_type = array_print_type,
1633 .compat = array_compat,
1639 | [ NUMBER ] Type ${
1640 $0 = calloc(1, sizeof(struct type));
1641 *($0) = array_prototype;
1642 $0->array.member = $<4;
1643 $0->array.vsize = NULL;
1645 struct parse_context *c = config2context(config);
1648 if (number_parse(num, tail, $2.txt) == 0)
1649 tok_err(c, "error: unrecognised number", &$2);
1651 tok_err(c, "error: unsupported number suffix", &$2);
1653 $0->array.size = mpz_get_ui(mpq_numref(num));
1654 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1655 tok_err(c, "error: array size must be an integer",
1657 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1658 tok_err(c, "error: array size is too large",
1662 $0->next= c->anon_typelist;
1663 c->anon_typelist = $0;
1667 | [ IDENTIFIER ] Type ${ {
1668 struct parse_context *c = config2context(config);
1669 struct variable *v = var_ref(c, $2.txt);
1672 tok_err(config2context(config), "error: name undeclared", &$2);
1673 else if (!v->constant)
1674 tok_err(config2context(config), "error: array size must be a constant", &$2);
1676 $0 = calloc(1, sizeof(struct type));
1677 *($0) = array_prototype;
1678 $0->array.member = $<4;
1680 $0->array.vsize = v;
1681 $0->next= c->anon_typelist;
1682 c->anon_typelist = $0;
1685 ###### parse context
1687 struct type *anon_typelist;
1689 ###### free context types
1691 while (context.anon_typelist) {
1692 struct type *t = context.anon_typelist;
1694 context.anon_typelist = t->next;
1701 ###### variable grammar
1703 | Variable [ Expression ] ${ {
1704 struct binode *b = new(binode);
1711 ###### print binode cases
1713 print_exec(b->left, -1, 0);
1715 print_exec(b->right, -1, 0);
1719 ###### propagate binode cases
1721 /* left must be an array, right must be a number,
1722 * result is the member type of the array
1724 propagate_types(b->right, c, ok, Tnum, 0);
1725 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1726 if (!t || t->compat != array_compat) {
1727 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1731 if (!type_compat(type, t->array.member, rules)) {
1732 type_err(c, "error: have %1 but need %2", prog,
1733 t->array.member, rules, type);
1736 return t->array.member;
1740 ###### interp binode cases
1745 lleft = linterp_exec(b->left);
1746 right = interp_exec(b->right);
1748 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1752 if (i >= 0 && i < lleft->type->array.size)
1753 lrv = &lleft->array.elmnts[i];
1755 rv = val_init(lleft->type->array.member);
1759 ## Language elements
1761 Each language element needs to be parsed, printed, analysed,
1762 interpreted, and freed. There are several, so let's just start with
1763 the easy ones and work our way up.
1767 We have already met values as separate objects. When manifest
1768 constants appear in the program text, that must result in an executable
1769 which has a constant value. So the `val` structure embeds a value in
1785 $0 = new_pos(val, $1);
1786 $0->val.type = Tbool;
1790 $0 = new_pos(val, $1);
1791 $0->val.type = Tbool;
1795 $0 = new_pos(val, $1);
1796 $0->val.type = Tnum;
1799 if (number_parse($0->val.num, tail, $1.txt) == 0)
1800 mpq_init($0->val.num);
1802 tok_err(config2context(config), "error: unsupported number suffix",
1807 $0 = new_pos(val, $1);
1808 $0->val.type = Tstr;
1811 string_parse(&$1, '\\', &$0->val.str, tail);
1813 tok_err(config2context(config), "error: unsupported string suffix",
1818 $0 = new_pos(val, $1);
1819 $0->val.type = Tstr;
1822 string_parse(&$1, '\\', &$0->val.str, tail);
1824 tok_err(config2context(config), "error: unsupported string suffix",
1829 ###### print exec cases
1832 struct val *v = cast(val, e);
1833 if (v->val.type == Tstr)
1835 print_value(v->val);
1836 if (v->val.type == Tstr)
1841 ###### propagate exec cases
1844 struct val *val = cast(val, prog);
1845 if (!type_compat(type, val->val.type, rules)) {
1846 type_err(c, "error: expected %1%r found %2",
1847 prog, type, rules, val->val.type);
1850 return val->val.type;
1853 ###### interp exec cases
1855 rv = dup_value(cast(val, e)->val);
1858 ###### ast functions
1859 static void free_val(struct val *v)
1867 ###### free exec cases
1868 case Xval: free_val(cast(val, e)); break;
1870 ###### ast functions
1871 // Move all nodes from 'b' to 'rv', reversing the order.
1872 // In 'b' 'left' is a list, and 'right' is the last node.
1873 // In 'rv', left' is the first node and 'right' is a list.
1874 static struct binode *reorder_bilist(struct binode *b)
1876 struct binode *rv = NULL;
1879 struct exec *t = b->right;
1883 b = cast(binode, b->left);
1893 Just as we used a `val` to wrap a value into an `exec`, we similarly
1894 need a `var` to wrap a `variable` into an exec. While each `val`
1895 contained a copy of the value, each `var` hold a link to the variable
1896 because it really is the same variable no matter where it appears.
1897 When a variable is used, we need to remember to follow the `->merged`
1898 link to find the primary instance.
1906 struct variable *var;
1912 VariableDecl -> IDENTIFIER : ${ {
1913 struct variable *v = var_decl(config2context(config), $1.txt);
1914 $0 = new_pos(var, $1);
1919 v = var_ref(config2context(config), $1.txt);
1921 type_err(config2context(config), "error: variable '%v' redeclared",
1922 $0, Tnone, 0, Tnone);
1923 type_err(config2context(config), "info: this is where '%v' was first declared",
1924 v->where_decl, Tnone, 0, Tnone);
1927 | IDENTIFIER :: ${ {
1928 struct variable *v = var_decl(config2context(config), $1.txt);
1929 $0 = new_pos(var, $1);
1935 v = var_ref(config2context(config), $1.txt);
1937 type_err(config2context(config), "error: variable '%v' redeclared",
1938 $0, Tnone, 0, Tnone);
1939 type_err(config2context(config), "info: this is where '%v' was first declared",
1940 v->where_decl, Tnone, 0, Tnone);
1943 | IDENTIFIER : Type ${ {
1944 struct variable *v = var_decl(config2context(config), $1.txt);
1945 $0 = new_pos(var, $1);
1950 v->val = val_prepare($<3);
1952 v = var_ref(config2context(config), $1.txt);
1954 type_err(config2context(config), "error: variable '%v' redeclared",
1955 $0, Tnone, 0, Tnone);
1956 type_err(config2context(config), "info: this is where '%v' was first declared",
1957 v->where_decl, Tnone, 0, Tnone);
1960 | IDENTIFIER :: Type ${ {
1961 struct variable *v = var_decl(config2context(config), $1.txt);
1962 $0 = new_pos(var, $1);
1967 v->val = val_prepare($<3);
1970 v = var_ref(config2context(config), $1.txt);
1972 type_err(config2context(config), "error: variable '%v' redeclared",
1973 $0, Tnone, 0, Tnone);
1974 type_err(config2context(config), "info: this is where '%v' was first declared",
1975 v->where_decl, Tnone, 0, Tnone);
1980 Variable -> IDENTIFIER ${ {
1981 struct variable *v = var_ref(config2context(config), $1.txt);
1982 $0 = new_pos(var, $1);
1984 /* This might be a label - allocate a var just in case */
1985 v = var_decl(config2context(config), $1.txt);
1987 v->val = val_prepare(Tlabel);
1988 v->val.label = &v->val;
1992 cast(var, $0)->var = v;
1997 Type -> IDENTIFIER ${
1998 $0 = find_type(config2context(config), $1.txt);
2000 tok_err(config2context(config),
2001 "error: undefined type", &$1);
2008 ###### print exec cases
2011 struct var *v = cast(var, e);
2013 struct binding *b = v->var->name;
2014 printf("%.*s", b->name.len, b->name.txt);
2021 if (loc->type == Xvar) {
2022 struct var *v = cast(var, loc);
2024 struct binding *b = v->var->name;
2025 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2027 fputs("???", stderr); // NOTEST
2029 fputs("NOTVAR", stderr); // NOTEST
2032 ###### propagate exec cases
2036 struct var *var = cast(var, prog);
2037 struct variable *v = var->var;
2039 type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone); // NOTEST
2041 return Tnone; // NOTEST
2045 if (v->constant && (rules & Rnoconstant)) {
2046 type_err(c, "error: Cannot assign to a constant: %v",
2047 prog, NULL, 0, NULL);
2048 type_err(c, "info: name was defined as a constant here",
2049 v->where_decl, NULL, 0, NULL);
2053 if (v->val.type == NULL) {
2054 if (type && *ok != 0) {
2055 v->val = val_prepare(type);
2056 v->where_set = prog;
2061 if (!type_compat(type, v->val.type, rules)) {
2062 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2063 type, rules, v->val.type);
2064 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2065 v->val.type, rules, Tnone);
2073 ###### interp exec cases
2076 struct var *var = cast(var, e);
2077 struct variable *v = var->var;
2085 ###### ast functions
2087 static void free_var(struct var *v)
2092 ###### free exec cases
2093 case Xvar: free_var(cast(var, e)); break;
2095 ### Expressions: Conditional
2097 Our first user of the `binode` will be conditional expressions, which
2098 is a bit odd as they actually have three components. That will be
2099 handled by having 2 binodes for each expression. The conditional
2100 expression is the lowest precedence operatior, so it gets to define
2101 what an "Expression" is. The next level up is "BoolExpr", which
2104 Conditional expressions are of the form "value `if` condition `else`
2105 other_value". There is no associativite with this operator: the
2106 values and conditions can only be other conditional expressions if
2107 they are enclosed in parentheses. Allowing nesting without
2108 parentheses would be too confusing.
2116 Expression -> BoolExpr if BoolExpr else BoolExpr ${ {
2117 struct binode *b1 = new(binode);
2118 struct binode *b2 = new(binode);
2127 | BoolExpr ${ $0 = $<1; }$
2129 ###### print binode cases
2132 b2 = cast(binode, b->right);
2133 print_exec(b2->left, -1, 0);
2135 print_exec(b->left, -1, 0);
2137 print_exec(b2->right, -1, 0);
2140 ###### propagate binode cases
2143 /* cond must be Tbool, others must match */
2144 struct binode *b2 = cast(binode, b->right);
2147 propagate_types(b->left, c, ok, Tbool, 0);
2148 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2149 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2153 ###### interp binode cases
2156 struct binode *b2 = cast(binode, b->right);
2157 left = interp_exec(b->left);
2159 rv = interp_exec(b2->left);
2161 rv = interp_exec(b2->right);
2165 ### Expressions: Boolean
2167 The next class of expressions to use the `binode` will be Boolean
2168 expressions. As I haven't implemented precedence in the parser
2169 generator yet, we need different names for each precedence level used
2170 by expressions. The outer most or lowest level precedence are
2171 conditional expressions are Boolean operators which form an `BoolExpr`
2172 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2173 have `and then` and `or else` which only evaluate the second operand
2174 if the result would make a difference.
2186 BoolExpr -> BoolExpr or BTerm ${ {
2187 struct binode *b = new(binode);
2193 | BoolExpr or else BTerm ${ {
2194 struct binode *b = new(binode);
2200 | BTerm ${ $0 = $<1; }$
2202 BTerm -> BTerm and BFact ${ {
2203 struct binode *b = new(binode);
2209 | BTerm and then BFact ${ {
2210 struct binode *b = new(binode);
2216 | BFact ${ $0 = $<1; }$
2218 BFact -> not BFact ${ {
2219 struct binode *b = new(binode);
2226 ###### print binode cases
2228 print_exec(b->left, -1, 0);
2230 print_exec(b->right, -1, 0);
2233 print_exec(b->left, -1, 0);
2234 printf(" and then ");
2235 print_exec(b->right, -1, 0);
2238 print_exec(b->left, -1, 0);
2240 print_exec(b->right, -1, 0);
2243 print_exec(b->left, -1, 0);
2244 printf(" or else ");
2245 print_exec(b->right, -1, 0);
2249 print_exec(b->right, -1, 0);
2252 ###### propagate binode cases
2258 /* both must be Tbool, result is Tbool */
2259 propagate_types(b->left, c, ok, Tbool, 0);
2260 propagate_types(b->right, c, ok, Tbool, 0);
2261 if (type && type != Tbool) {
2262 type_err(c, "error: %1 operation found where %2 expected", prog,
2268 ###### interp binode cases
2270 rv = interp_exec(b->left);
2271 right = interp_exec(b->right);
2272 rv.bool = rv.bool && right.bool;
2275 rv = interp_exec(b->left);
2277 rv = interp_exec(b->right);
2280 rv = interp_exec(b->left);
2281 right = interp_exec(b->right);
2282 rv.bool = rv.bool || right.bool;
2285 rv = interp_exec(b->left);
2287 rv = interp_exec(b->right);
2290 rv = interp_exec(b->right);
2294 ### Expressions: Comparison
2296 Of slightly higher precedence that Boolean expressions are
2298 A comparison takes arguments of any type, but the two types must be
2301 To simplify the parsing we introduce an `eop` which can record an
2302 expression operator.
2309 ###### ast functions
2310 static void free_eop(struct eop *e)
2325 | Expr CMPop Expr ${ {
2326 struct binode *b = new(binode);
2332 | Expr ${ $0 = $<1; }$
2337 CMPop -> < ${ $0.op = Less; }$
2338 | > ${ $0.op = Gtr; }$
2339 | <= ${ $0.op = LessEq; }$
2340 | >= ${ $0.op = GtrEq; }$
2341 | == ${ $0.op = Eql; }$
2342 | != ${ $0.op = NEql; }$
2344 ###### print binode cases
2352 print_exec(b->left, -1, 0);
2354 case Less: printf(" < "); break;
2355 case LessEq: printf(" <= "); break;
2356 case Gtr: printf(" > "); break;
2357 case GtrEq: printf(" >= "); break;
2358 case Eql: printf(" == "); break;
2359 case NEql: printf(" != "); break;
2360 default: abort(); // NOTEST
2362 print_exec(b->right, -1, 0);
2365 ###### propagate binode cases
2372 /* Both must match but not be labels, result is Tbool */
2373 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2375 propagate_types(b->right, c, ok, t, 0);
2377 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2379 t = propagate_types(b->left, c, ok, t, 0);
2381 if (!type_compat(type, Tbool, 0)) {
2382 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2383 Tbool, rules, type);
2388 ###### interp binode cases
2397 left = interp_exec(b->left);
2398 right = interp_exec(b->right);
2399 cmp = value_cmp(left, right);
2402 case Less: rv.bool = cmp < 0; break;
2403 case LessEq: rv.bool = cmp <= 0; break;
2404 case Gtr: rv.bool = cmp > 0; break;
2405 case GtrEq: rv.bool = cmp >= 0; break;
2406 case Eql: rv.bool = cmp == 0; break;
2407 case NEql: rv.bool = cmp != 0; break;
2408 default: rv.bool = 0; break; // NOTEST
2413 ### Expressions: The rest
2415 The remaining expressions with the highest precedence are arithmetic
2416 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2417 The `Factor` is where the `Value` and `Variable` that we already have
2420 `+` and `-` are both infix and prefix operations (where they are
2421 absolute value and negation). These have different operator names.
2423 We also have a 'Bracket' operator which records where parentheses were
2424 found. This makes it easy to reproduce these when printing. Once
2425 precedence is handled better I might be able to discard this.
2437 Expr -> Expr Eop Term ${ {
2438 struct binode *b = new(binode);
2444 | Term ${ $0 = $<1; }$
2446 Term -> Term Top Factor ${ {
2447 struct binode *b = new(binode);
2453 | Factor ${ $0 = $<1; }$
2455 Factor -> ( Expression ) ${ {
2456 struct binode *b = new_pos(binode, $1);
2462 struct binode *b = new(binode);
2467 | Value ${ $0 = $<1; }$
2468 | Variable ${ $0 = $<1; }$
2471 Eop -> + ${ $0.op = Plus; }$
2472 | - ${ $0.op = Minus; }$
2474 Uop -> + ${ $0.op = Absolute; }$
2475 | - ${ $0.op = Negate; }$
2477 Top -> * ${ $0.op = Times; }$
2478 | / ${ $0.op = Divide; }$
2479 | % ${ $0.op = Rem; }$
2480 | ++ ${ $0.op = Concat; }$
2482 ###### print binode cases
2489 print_exec(b->left, indent, 0);
2491 case Plus: fputs(" + ", stdout); break;
2492 case Minus: fputs(" - ", stdout); break;
2493 case Times: fputs(" * ", stdout); break;
2494 case Divide: fputs(" / ", stdout); break;
2495 case Rem: fputs(" % ", stdout); break;
2496 case Concat: fputs(" ++ ", stdout); break;
2497 default: abort(); // NOTEST
2499 print_exec(b->right, indent, 0);
2503 print_exec(b->right, indent, 0);
2507 print_exec(b->right, indent, 0);
2511 print_exec(b->right, indent, 0);
2515 ###### propagate binode cases
2521 /* both must be numbers, result is Tnum */
2524 /* as propagate_types ignores a NULL,
2525 * unary ops fit here too */
2526 propagate_types(b->left, c, ok, Tnum, 0);
2527 propagate_types(b->right, c, ok, Tnum, 0);
2528 if (!type_compat(type, Tnum, 0)) {
2529 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2536 /* both must be Tstr, result is Tstr */
2537 propagate_types(b->left, c, ok, Tstr, 0);
2538 propagate_types(b->right, c, ok, Tstr, 0);
2539 if (!type_compat(type, Tstr, 0)) {
2540 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2547 return propagate_types(b->right, c, ok, type, 0);
2549 ###### interp binode cases
2552 rv = interp_exec(b->left);
2553 right = interp_exec(b->right);
2554 mpq_add(rv.num, rv.num, right.num);
2557 rv = interp_exec(b->left);
2558 right = interp_exec(b->right);
2559 mpq_sub(rv.num, rv.num, right.num);
2562 rv = interp_exec(b->left);
2563 right = interp_exec(b->right);
2564 mpq_mul(rv.num, rv.num, right.num);
2567 rv = interp_exec(b->left);
2568 right = interp_exec(b->right);
2569 mpq_div(rv.num, rv.num, right.num);
2574 left = interp_exec(b->left);
2575 right = interp_exec(b->right);
2576 mpz_init(l); mpz_init(r); mpz_init(rem);
2577 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2578 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2579 mpz_tdiv_r(rem, l, r);
2580 rv = val_init(Tnum);
2581 mpq_set_z(rv.num, rem);
2582 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2586 rv = interp_exec(b->right);
2587 mpq_neg(rv.num, rv.num);
2590 rv = interp_exec(b->right);
2591 mpq_abs(rv.num, rv.num);
2594 rv = interp_exec(b->right);
2597 left = interp_exec(b->left);
2598 right = interp_exec(b->right);
2600 rv.str = text_join(left.str, right.str);
2604 ###### value functions
2606 static struct text text_join(struct text a, struct text b)
2609 rv.len = a.len + b.len;
2610 rv.txt = malloc(rv.len);
2611 memcpy(rv.txt, a.txt, a.len);
2612 memcpy(rv.txt+a.len, b.txt, b.len);
2617 ### Blocks, Statements, and Statement lists.
2619 Now that we have expressions out of the way we need to turn to
2620 statements. There are simple statements and more complex statements.
2621 Simple statements do not contain newlines, complex statements do.
2623 Statements often come in sequences and we have corresponding simple
2624 statement lists and complex statement lists.
2625 The former comprise only simple statements separated by semicolons.
2626 The later comprise complex statements and simple statement lists. They are
2627 separated by newlines. Thus the semicolon is only used to separate
2628 simple statements on the one line. This may be overly restrictive,
2629 but I'm not sure I ever want a complex statement to share a line with
2632 Note that a simple statement list can still use multiple lines if
2633 subsequent lines are indented, so
2635 ###### Example: wrapped simple statement list
2640 is a single simple statement list. This might allow room for
2641 confusion, so I'm not set on it yet.
2643 A simple statement list needs no extra syntax. A complex statement
2644 list has two syntactic forms. It can be enclosed in braces (much like
2645 C blocks), or it can be introduced by a colon and continue until an
2646 unindented newline (much like Python blocks). With this extra syntax
2647 it is referred to as a block.
2649 Note that a block does not have to include any newlines if it only
2650 contains simple statements. So both of:
2652 if condition: a=b; d=f
2654 if condition { a=b; print f }
2658 In either case the list is constructed from a `binode` list with
2659 `Block` as the operator. When parsing the list it is most convenient
2660 to append to the end, so a list is a list and a statement. When using
2661 the list it is more convenient to consider a list to be a statement
2662 and a list. So we need a function to re-order a list.
2663 `reorder_bilist` serves this purpose.
2665 The only stand-alone statement we introduce at this stage is `pass`
2666 which does nothing and is represented as a `NULL` pointer in a `Block`
2667 list. Other stand-alone statements will follow once the infrastructure
2687 Block -> Open Statementlist Close ${ $0 = $<2; }$
2688 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2689 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2690 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2691 | : Statementlist ${ $0 = $<2; }$
2692 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2694 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2696 ComplexStatements -> ComplexStatements ComplexStatement ${
2702 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2703 | ComplexStatement ${
2711 ComplexStatement -> SimpleStatements NEWLINE ${
2712 $0 = reorder_bilist($<1);
2714 ## ComplexStatement Grammar
2717 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2723 | SimpleStatement ${
2729 | SimpleStatements ; ${ $0 = $<1; }$
2731 SimpleStatement -> pass ${ $0 = NULL; }$
2732 ## SimpleStatement Grammar
2734 ###### print binode cases
2738 if (b->left == NULL)
2741 print_exec(b->left, indent, 0);
2744 print_exec(b->right, indent, 0);
2747 // block, one per line
2748 if (b->left == NULL)
2749 do_indent(indent, "pass\n");
2751 print_exec(b->left, indent, bracket);
2753 print_exec(b->right, indent, bracket);
2757 ###### propagate binode cases
2760 /* If any statement returns something other than Tnone
2761 * or Tbool then all such must return same type.
2762 * As each statement may be Tnone or something else,
2763 * we must always pass NULL (unknown) down, otherwise an incorrect
2764 * error might occur. We never return Tnone unless it is
2769 for (e = b; e; e = cast(binode, e->right)) {
2770 t = propagate_types(e->left, c, ok, NULL, rules);
2771 if ((rules & Rboolok) && t == Tbool)
2773 if (t && t != Tnone && t != Tbool) {
2776 else if (t != type) {
2777 type_err(c, "error: expected %1%r, found %2",
2778 e->left, type, rules, t);
2786 ###### interp binode cases
2788 while (rv.type == Tnone &&
2791 rv = interp_exec(b->left);
2792 b = cast(binode, b->right);
2796 ### The Print statement
2798 `print` is a simple statement that takes a comma-separated list of
2799 expressions and prints the values separated by spaces and terminated
2800 by a newline. No control of formatting is possible.
2802 `print` faces the same list-ordering issue as blocks, and uses the
2808 ###### SimpleStatement Grammar
2810 | print ExpressionList ${
2811 $0 = reorder_bilist($<2);
2813 | print ExpressionList , ${
2818 $0 = reorder_bilist($0);
2829 ExpressionList -> ExpressionList , Expression ${
2842 ###### print binode cases
2845 do_indent(indent, "print");
2849 print_exec(b->left, -1, 0);
2853 b = cast(binode, b->right);
2859 ###### propagate binode cases
2862 /* don't care but all must be consistent */
2863 propagate_types(b->left, c, ok, NULL, Rnolabel);
2864 propagate_types(b->right, c, ok, NULL, Rnolabel);
2867 ###### interp binode cases
2873 for ( ; b; b = cast(binode, b->right))
2877 left = interp_exec(b->left);
2890 ###### Assignment statement
2892 An assignment will assign a value to a variable, providing it hasn't
2893 be declared as a constant. The analysis phase ensures that the type
2894 will be correct so the interpreter just needs to perform the
2895 calculation. There is a form of assignment which declares a new
2896 variable as well as assigning a value. If a name is assigned before
2897 it is declared, and error will be raised as the name is created as
2898 `Tlabel` and it is illegal to assign to such names.
2904 ###### SimpleStatement Grammar
2905 | Variable = Expression ${
2911 | VariableDecl = Expression ${
2919 if ($1->var->where_set == NULL) {
2920 type_err(config2context(config), "Variable declared with no type or value: %v",
2930 ###### print binode cases
2933 do_indent(indent, "");
2934 print_exec(b->left, indent, 0);
2936 print_exec(b->right, indent, 0);
2943 struct variable *v = cast(var, b->left)->var;
2944 do_indent(indent, "");
2945 print_exec(b->left, indent, 0);
2946 if (cast(var, b->left)->var->constant) {
2947 if (v->where_decl == v->where_set) {
2949 type_print(v->val.type, stdout);
2954 if (v->where_decl == v->where_set) {
2956 type_print(v->val.type, stdout);
2963 print_exec(b->right, indent, 0);
2970 ###### propagate binode cases
2974 /* Both must match and not be labels,
2975 * Type must support 'dup',
2976 * For Assign, left must not be constant.
2979 t = propagate_types(b->left, c, ok, NULL,
2980 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
2985 if (propagate_types(b->right, c, ok, t, 0) != t)
2986 if (b->left->type == Xvar)
2987 type_err(c, "info: variable '%v' was set as %1 here.",
2988 cast(var, b->left)->var->where_set, t, rules, Tnone);
2990 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2992 propagate_types(b->left, c, ok, t,
2993 (b->op == Assign ? Rnoconstant : 0));
2995 if (t && t->dup == NULL) {
2996 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3003 ###### interp binode cases
3006 lleft = linterp_exec(b->left);
3007 right = interp_exec(b->right);
3012 free_value(right); // NOTEST
3018 struct variable *v = cast(var, b->left)->var;
3022 right = interp_exec(b->right);
3024 right = val_init(v->val.type);
3031 ### The `use` statement
3033 The `use` statement is the last "simple" statement. It is needed when
3034 the condition in a conditional statement is a block. `use` works much
3035 like `return` in C, but only completes the `condition`, not the whole
3041 ###### SimpleStatement Grammar
3043 $0 = new_pos(binode, $1);
3048 ###### print binode cases
3051 do_indent(indent, "use ");
3052 print_exec(b->right, -1, 0);
3057 ###### propagate binode cases
3060 /* result matches value */
3061 return propagate_types(b->right, c, ok, type, 0);
3063 ###### interp binode cases
3066 rv = interp_exec(b->right);
3069 ### The Conditional Statement
3071 This is the biggy and currently the only complex statement. This
3072 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3073 It is comprised of a number of parts, all of which are optional though
3074 set combinations apply. Each part is (usually) a key word (`then` is
3075 sometimes optional) followed by either an expression or a code block,
3076 except the `casepart` which is a "key word and an expression" followed
3077 by a code block. The code-block option is valid for all parts and,
3078 where an expression is also allowed, the code block can use the `use`
3079 statement to report a value. If the code block does not report a value
3080 the effect is similar to reporting `True`.
3082 The `else` and `case` parts, as well as `then` when combined with
3083 `if`, can contain a `use` statement which will apply to some
3084 containing conditional statement. `for` parts, `do` parts and `then`
3085 parts used with `for` can never contain a `use`, except in some
3086 subordinate conditional statement.
3088 If there is a `forpart`, it is executed first, only once.
3089 If there is a `dopart`, then it is executed repeatedly providing
3090 always that the `condpart` or `cond`, if present, does not return a non-True
3091 value. `condpart` can fail to return any value if it simply executes
3092 to completion. This is treated the same as returning `True`.
3094 If there is a `thenpart` it will be executed whenever the `condpart`
3095 or `cond` returns True (or does not return any value), but this will happen
3096 *after* `dopart` (when present).
3098 If `elsepart` is present it will be executed at most once when the
3099 condition returns `False` or some value that isn't `True` and isn't
3100 matched by any `casepart`. If there are any `casepart`s, they will be
3101 executed when the condition returns a matching value.
3103 The particular sorts of values allowed in case parts has not yet been
3104 determined in the language design, so nothing is prohibited.
3106 The various blocks in this complex statement potentially provide scope
3107 for variables as described earlier. Each such block must include the
3108 "OpenScope" nonterminal before parsing the block, and must call
3109 `var_block_close()` when closing the block.
3111 The code following "`if`", "`switch`" and "`for`" does not get its own
3112 scope, but is in a scope covering the whole statement, so names
3113 declared there cannot be redeclared elsewhere. Similarly the
3114 condition following "`while`" is in a scope the covers the body
3115 ("`do`" part) of the loop, and which does not allow conditional scope
3116 extension. Code following "`then`" (both looping and non-looping),
3117 "`else`" and "`case`" each get their own local scope.
3119 The type requirements on the code block in a `whilepart` are quite
3120 unusal. It is allowed to return a value of some identifiable type, in
3121 which case the loop aborts and an appropriate `casepart` is run, or it
3122 can return a Boolean, in which case the loop either continues to the
3123 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3124 This is different both from the `ifpart` code block which is expected to
3125 return a Boolean, or the `switchpart` code block which is expected to
3126 return the same type as the casepart values. The correct analysis of
3127 the type of the `whilepart` code block is the reason for the
3128 `Rboolok` flag which is passed to `propagate_types()`.
3130 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3139 struct exec *action;
3140 struct casepart *next;
3142 struct cond_statement {
3144 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3145 struct casepart *casepart;
3148 ###### ast functions
3150 static void free_casepart(struct casepart *cp)
3154 free_exec(cp->value);
3155 free_exec(cp->action);
3162 static void free_cond_statement(struct cond_statement *s)
3166 free_exec(s->forpart);
3167 free_exec(s->condpart);
3168 free_exec(s->dopart);
3169 free_exec(s->thenpart);
3170 free_exec(s->elsepart);
3171 free_casepart(s->casepart);
3175 ###### free exec cases
3176 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3178 ###### ComplexStatement Grammar
3179 | CondStatement ${ $0 = $<1; }$
3184 // both ForThen and Whilepart open scopes, and CondSuffix only
3185 // closes one - so in the first branch here we have another to close.
3186 CondStatement -> ForThen WhilePart CondSuffix ${
3188 $0->forpart = $1.forpart; $1.forpart = NULL;
3189 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3190 $0->condpart = $2.condpart; $2.condpart = NULL;
3191 $0->dopart = $2.dopart; $2.dopart = NULL;
3192 var_block_close(config2context(config), CloseSequential);
3194 | WhilePart CondSuffix ${
3196 $0->condpart = $1.condpart; $1.condpart = NULL;
3197 $0->dopart = $1.dopart; $1.dopart = NULL;
3199 | SwitchPart CondSuffix ${
3203 | IfPart IfSuffix ${
3205 $0->condpart = $1.condpart; $1.condpart = NULL;
3206 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3207 // This is where we close an "if" statement
3208 var_block_close(config2context(config), CloseSequential);
3211 CondSuffix -> IfSuffix ${
3213 // This is where we close scope of the whole
3214 // "for" or "while" statement
3215 var_block_close(config2context(config), CloseSequential);
3217 | CasePart CondSuffix ${
3219 $1->next = $0->casepart;
3224 CasePart -> Newlines case Expression OpenScope Block ${
3225 $0 = calloc(1,sizeof(struct casepart));
3228 var_block_close(config2context(config), CloseParallel);
3230 | case Expression OpenScope Block ${
3231 $0 = calloc(1,sizeof(struct casepart));
3234 var_block_close(config2context(config), CloseParallel);
3238 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3239 | Newlines else OpenScope Block ${
3240 $0 = new(cond_statement);
3242 var_block_close(config2context(config), CloseElse);
3244 | else OpenScope Block ${
3245 $0 = new(cond_statement);
3247 var_block_close(config2context(config), CloseElse);
3249 | Newlines else OpenScope CondStatement ${
3250 $0 = new(cond_statement);
3252 var_block_close(config2context(config), CloseElse);
3254 | else OpenScope CondStatement ${
3255 $0 = new(cond_statement);
3257 var_block_close(config2context(config), CloseElse);
3262 // These scopes are closed in CondSuffix
3263 ForPart -> for OpenScope SimpleStatements ${
3264 $0 = reorder_bilist($<3);
3266 | for OpenScope Block ${
3270 ThenPart -> then OpenScope SimpleStatements ${
3271 $0 = reorder_bilist($<3);
3272 var_block_close(config2context(config), CloseSequential);
3274 | then OpenScope Block ${
3276 var_block_close(config2context(config), CloseSequential);
3279 ThenPartNL -> ThenPart OptNL ${
3283 // This scope is closed in CondSuffix
3284 WhileHead -> while OpenScope Block ${
3289 ForThen -> ForPart OptNL ThenPartNL ${
3297 // This scope is closed in CondSuffix
3298 WhilePart -> while OpenScope Expression Block ${
3299 $0.type = Xcond_statement;
3303 | WhileHead OptNL do Block ${
3304 $0.type = Xcond_statement;
3309 IfPart -> if OpenScope Expression OpenScope Block ${
3310 $0.type = Xcond_statement;
3313 var_block_close(config2context(config), CloseParallel);
3315 | if OpenScope Block OptNL then OpenScope Block ${
3316 $0.type = Xcond_statement;
3319 var_block_close(config2context(config), CloseParallel);
3323 // This scope is closed in CondSuffix
3324 SwitchPart -> switch OpenScope Expression ${
3327 | switch OpenScope Block ${
3331 ###### print exec cases
3333 case Xcond_statement:
3335 struct cond_statement *cs = cast(cond_statement, e);
3336 struct casepart *cp;
3338 do_indent(indent, "for");
3339 if (bracket) printf(" {\n"); else printf(":\n");
3340 print_exec(cs->forpart, indent+1, bracket);
3343 do_indent(indent, "} then {\n");
3345 do_indent(indent, "then:\n");
3346 print_exec(cs->thenpart, indent+1, bracket);
3348 if (bracket) do_indent(indent, "}\n");
3352 if (cs->condpart && cs->condpart->type == Xbinode &&
3353 cast(binode, cs->condpart)->op == Block) {
3355 do_indent(indent, "while {\n");
3357 do_indent(indent, "while:\n");
3358 print_exec(cs->condpart, indent+1, bracket);
3360 do_indent(indent, "} do {\n");
3362 do_indent(indent, "do:\n");
3363 print_exec(cs->dopart, indent+1, bracket);
3365 do_indent(indent, "}\n");
3367 do_indent(indent, "while ");
3368 print_exec(cs->condpart, 0, bracket);
3373 print_exec(cs->dopart, indent+1, bracket);
3375 do_indent(indent, "}\n");
3380 do_indent(indent, "switch");
3382 do_indent(indent, "if");
3383 if (cs->condpart && cs->condpart->type == Xbinode &&
3384 cast(binode, cs->condpart)->op == Block) {
3389 print_exec(cs->condpart, indent+1, bracket);
3391 do_indent(indent, "}\n");
3393 do_indent(indent, "then:\n");
3394 print_exec(cs->thenpart, indent+1, bracket);
3398 print_exec(cs->condpart, 0, bracket);
3404 print_exec(cs->thenpart, indent+1, bracket);
3406 do_indent(indent, "}\n");
3411 for (cp = cs->casepart; cp; cp = cp->next) {
3412 do_indent(indent, "case ");
3413 print_exec(cp->value, -1, 0);
3418 print_exec(cp->action, indent+1, bracket);
3420 do_indent(indent, "}\n");
3423 do_indent(indent, "else");
3428 print_exec(cs->elsepart, indent+1, bracket);
3430 do_indent(indent, "}\n");
3435 ###### propagate exec cases
3436 case Xcond_statement:
3438 // forpart and dopart must return Tnone
3439 // thenpart must return Tnone if there is a dopart,
3440 // otherwise it is like elsepart.
3442 // be bool if there is no casepart
3443 // match casepart->values if there is a switchpart
3444 // either be bool or match casepart->value if there
3446 // elsepart and casepart->action must match the return type
3447 // expected of this statement.
3448 struct cond_statement *cs = cast(cond_statement, prog);
3449 struct casepart *cp;
3451 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3452 if (!type_compat(Tnone, t, 0))
3454 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3455 if (!type_compat(Tnone, t, 0))
3458 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3459 if (!type_compat(Tnone, t, 0))
3462 if (cs->casepart == NULL)
3463 propagate_types(cs->condpart, c, ok, Tbool, 0);
3465 /* Condpart must match case values, with bool permitted */
3467 for (cp = cs->casepart;
3468 cp && !t; cp = cp->next)
3469 t = propagate_types(cp->value, c, ok, NULL, 0);
3470 if (!t && cs->condpart)
3471 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3472 // Now we have a type (I hope) push it down
3474 for (cp = cs->casepart; cp; cp = cp->next)
3475 propagate_types(cp->value, c, ok, t, 0);
3476 propagate_types(cs->condpart, c, ok, t, Rboolok);
3479 // (if)then, else, and case parts must return expected type.
3480 if (!cs->dopart && !type)
3481 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3483 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3484 for (cp = cs->casepart;
3487 type = propagate_types(cp->action, c, ok, NULL, rules);
3490 propagate_types(cs->thenpart, c, ok, type, rules);
3491 propagate_types(cs->elsepart, c, ok, type, rules);
3492 for (cp = cs->casepart; cp ; cp = cp->next)
3493 propagate_types(cp->action, c, ok, type, rules);
3499 ###### interp exec cases
3500 case Xcond_statement:
3502 struct value v, cnd;
3503 struct casepart *cp;
3504 struct cond_statement *c = cast(cond_statement, e);
3507 interp_exec(c->forpart);
3510 cnd = interp_exec(c->condpart);
3513 if (!(cnd.type == Tnone ||
3514 (cnd.type == Tbool && cnd.bool != 0)))
3516 // cnd is Tnone or Tbool, doesn't need to be freed
3518 interp_exec(c->dopart);
3521 rv = interp_exec(c->thenpart);
3522 if (rv.type != Tnone || !c->dopart)
3526 } while (c->dopart);
3528 for (cp = c->casepart; cp; cp = cp->next) {
3529 v = interp_exec(cp->value);
3530 if (value_cmp(v, cnd) == 0) {
3533 rv = interp_exec(cp->action);
3540 rv = interp_exec(c->elsepart);
3547 ### Finally the whole program.
3549 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3550 the keyword "program" and a list of variable names which are assigned
3551 values from command line arguments. Following this is a `block` which
3552 is the code to execute.
3554 As this is the top level, several things are handled a bit
3556 The whole program is not interpreted by `interp_exec` as that isn't
3557 passed the argument list which the program requires. Similarly type
3558 analysis is a bit more interesting at this level.
3563 ###### Parser: grammar
3566 Program -> program OpenScope Varlist Block OptNL ${
3569 $0->left = reorder_bilist($<3);
3571 var_block_close(config2context(config), CloseSequential);
3572 if (config2context(config)->scope_stack) abort();
3575 tok_err(config2context(config),
3576 "error: unhandled parse error", &$1);
3579 Varlist -> Varlist ArgDecl ${
3588 ArgDecl -> IDENTIFIER ${ {
3589 struct variable *v = var_decl(config2context(config), $1.txt);
3596 ###### print binode cases
3598 do_indent(indent, "program");
3599 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3601 print_exec(b2->left, 0, 0);
3607 print_exec(b->right, indent+1, bracket);
3609 do_indent(indent, "}\n");
3612 ###### propagate binode cases
3613 case Program: abort(); // NOTEST
3615 ###### core functions
3617 static int analyse_prog(struct exec *prog, struct parse_context *c)
3619 struct binode *b = cast(binode, prog);
3626 propagate_types(b->right, c, &ok, Tnone, 0);
3631 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3632 struct var *v = cast(var, b->left);
3633 if (!v->var->val.type) {
3634 v->var->where_set = b;
3635 v->var->val = val_prepare(Tstr);
3638 b = cast(binode, prog);
3641 propagate_types(b->right, c, &ok, Tnone, 0);
3646 /* Make sure everything is still consistent */
3647 propagate_types(b->right, c, &ok, Tnone, 0);
3651 static void interp_prog(struct exec *prog, char **argv)
3653 struct binode *p = cast(binode, prog);
3659 al = cast(binode, p->left);
3661 struct var *v = cast(var, al->left);
3662 struct value *vl = &v->var->val;
3664 if (argv[0] == NULL) {
3665 printf("Not enough args\n");
3668 al = cast(binode, al->right);
3670 *vl = parse_value(vl->type, argv[0]);
3671 if (vl->type == NULL)
3675 v = interp_exec(p->right);
3679 ###### interp binode cases
3680 case Program: abort(); // NOTEST
3682 ## And now to test it out.
3684 Having a language requires having a "hello world" program. I'll
3685 provide a little more than that: a program that prints "Hello world"
3686 finds the GCD of two numbers, prints the first few elements of
3687 Fibonacci, and performs a binary search for a number.
3689 ###### File: oceani.mk
3692 @echo "===== TEST ====="
3693 ./oceani --section "test: hello" oceani.mdc 55 33
3698 print "Hello World, what lovely oceans you have!"
3699 /* When a variable is defined in both branches of an 'if',
3700 * and used afterwards, the variables are merged.
3706 print "Is", A, "bigger than", B,"? ", bigger
3707 /* If a variable is not used after the 'if', no
3708 * merge happens, so types can be different
3711 double:string = "yes"
3712 print A, "is more than twice", B, "?", double
3715 print "double", B, "is", double
3720 if a > 0 and then b > 0:
3726 print "GCD of", A, "and", B,"is", a
3728 print a, "is not positive, cannot calculate GCD"
3730 print b, "is not positive, cannot calculate GCD"
3735 print "Fibonacci:", f1,f2,
3736 then togo = togo - 1
3744 /* Binary search... */
3749 mid := (lo + hi) / 2
3761 print "Yay, I found", target
3763 print "Closest I found was", mid
3768 for i:=1; then i = i + 1; while i < size:
3769 n := list[i-1] * list[i-1]
3770 list[i] = (n / 100) % 10000
3772 print "Before sort:"
3773 for i:=0; then i = i + 1; while i < size:
3774 print "list[",i,"]=",list[i]
3776 for i := 1; then i=i+1; while i < size:
3777 for j:=i-1; then j=j-1; while j >= 0:
3778 if list[j] > list[j+1]:
3783 for i:=0; then i = i + 1; while i < size:
3784 print "list[",i,"]=",list[i]