1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/`, `%` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing,
77 - Analyse the parsed program to ensure consistency,
79 - Execute the program.
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 This code must be compiled with `-fplan9-extensions` so that anonymous
92 structures can be used.
94 ###### File: oceani.mk
96 myCFLAGS := -Wall -g -fplan9-extensions
97 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
98 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
99 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
101 all :: $(LDLIBS) oceani
102 oceani.c oceani.h : oceani.mdc parsergen
103 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
104 oceani.mk: oceani.mdc md2c
107 oceani: oceani.o $(LDLIBS)
108 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
110 ###### Parser: header
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
136 #include <sys/mman.h>
155 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
156 "--section=SectionName prog.ocn\n";
157 static const struct option long_options[] = {
158 {"trace", 0, NULL, 't'},
159 {"print", 0, NULL, 'p'},
160 {"noexec", 0, NULL, 'n'},
161 {"brackets", 0, NULL, 'b'},
162 {"section", 1, NULL, 's'},
165 const char *options = "tpnbs";
166 int main(int argc, char *argv[])
172 char *section = NULL;
173 struct parse_context context = {
175 .ignored = (1 << TK_line_comment)
176 | (1 << TK_block_comment),
177 .number_chars = ".,_+-",
182 int doprint=0, dotrace=0, doexec=1, brackets=0;
184 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
187 case 't': dotrace=1; break;
188 case 'p': doprint=1; break;
189 case 'n': doexec=0; break;
190 case 'b': brackets=1; break;
191 case 's': section = optarg; break;
192 default: fprintf(stderr, Usage);
196 if (optind >= argc) {
197 fprintf(stderr, "oceani: no input file given\n");
200 fd = open(argv[optind], O_RDONLY);
202 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
205 context.file_name = argv[optind];
206 len = lseek(fd, 0, 2);
207 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
208 s = code_extract(file, file+len, NULL);
210 fprintf(stderr, "oceani: could not find any code in %s\n",
215 ## context initialization
219 for (ss = s; ss; ss = ss->next) {
220 struct text sec = ss->section;
221 if (sec.len == strlen(section) &&
222 strncmp(sec.txt, section, sec.len) == 0)
226 parse_oceani(ss->code, &context.config,
227 dotrace ? stderr : NULL);
229 fprintf(stderr, "oceani: cannot find section %s\n",
234 parse_oceani(s->code, &context.config,
235 dotrace ? stderr : NULL);
237 fprintf(stderr, "oceani: no program found.\n");
238 context.parse_error = 1;
240 if (context.prog && doprint)
241 print_exec(context.prog, 0, brackets);
242 if (context.prog && doexec && !context.parse_error) {
243 if (!analyse_prog(context.prog, &context)) {
244 fprintf(stderr, "oceani: type error in program - not running.\n");
247 interp_prog(context.prog, argv+optind+1);
250 free_exec(context.prog);
253 struct section *t = s->next;
259 ## free context types
260 exit(context.parse_error ? 1 : 0);
265 The four requirements of parse, analyse, print, interpret apply to
266 each language element individually so that is how most of the code
269 Three of the four are fairly self explanatory. The one that requires
270 a little explanation is the analysis step.
272 The current language design does not require the types of variables to
273 be declared, but they must still have a single type. Different
274 operations impose different requirements on the variables, for example
275 addition requires both arguments to be numeric, and assignment
276 requires the variable on the left to have the same type as the
277 expression on the right.
279 Analysis involves propagating these type requirements around and
280 consequently setting the type of each variable. If any requirements
281 are violated (e.g. a string is compared with a number) or if a
282 variable needs to have two different types, then an error is raised
283 and the program will not run.
285 If the same variable is declared in both branchs of an 'if/else', or
286 in all cases of a 'switch' then the multiple instances may be merged
287 into just one variable if the variable is references after the
288 conditional statement. When this happens, the types must naturally be
289 consistent across all the branches. When the variable is not used
290 outside the if, the variables in the different branches are distinct
291 and can be of different types.
293 Determining the types of all variables early is important for
294 processing command line arguments. These can be assigned to any type
295 of variable, but we must first know the correct type so any required
296 conversion can happen. If a variable is associated with a command
297 line argument but no type can be interpreted (e.g. the variable is
298 only ever used in a `print` statement), then the type is set to
301 Undeclared names may only appear in "use" statements and "case" expressions.
302 These names are given a type of "label" and a unique value.
303 This allows them to fill the role of a name in an enumerated type, which
304 is useful for testing the `switch` statement.
306 As we will see, the condition part of a `while` statement can return
307 either a Boolean or some other type. This requires that the expect
308 type that gets passed around comprises a type (`enum vtype`) and a
309 flag to indicate that `Vbool` is also permitted.
311 As there are, as yet, no distinct types that are compatible, there
312 isn't much subtlety in the analysis. When we have distinct number
313 types, this will become more interesting.
317 When analysis discovers an inconsistency it needs to report an error;
318 just refusing to run the code ensures that the error doesn't cascade,
319 but by itself it isn't very useful. A clear understand of the sort of
320 error message that are useful will help guide the process of analysis.
322 At a simplistic level, the only sort of error that type analysis can
323 report is that the type of some construct doesn't match a contextual
324 requirement. For example, in `4 + "hello"` the addition provides a
325 contextual requirement for numbers, but `"hello"` is not a number. In
326 this particular example no further information is needed as the types
327 are obvious from local information. When a variable is involved that
328 isn't the case. It may be helpful to explain why the variable has a
329 particular type, by indicating the location where the type was set,
330 whether by declaration or usage.
332 Using a recursive-descent analysis we can easily detect a problem at
333 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
334 will detect that one argument is not a number and the usage of `hello`
335 will detect that a number was wanted, but not provided. In this
336 (early) version of the language, we will generate error reports at
337 multiple locations, so the use of `hello` will report an error and
338 explain were the value was set, and the addition will report an error
339 and say why numbers are needed. To be able to report locations for
340 errors, each language element will need to record a file location
341 (line and column) and each variable will need to record the language
342 element where its type was set. For now we will assume that each line
343 of an error message indicates one location in the file, and up to 2
344 types. So we provide a `printf`-like function which takes a format, a
345 language (a `struct exec` which has not yet been introduced), and 2
346 types. "`%1`" reports the first type, "`%2`" reports the second. We
347 will need a function to print the location, once we know how that is
348 stored. As will be explained later, there are sometimes extra rules for
349 type matching and they might affect error messages, we need to pass those
352 As well as type errors, we sometimes need to report problems with
353 tokens, which might be unexpected or might name a type that has not
354 been defined. For these we have `tok_err()` which reports an error
355 with a given token. Each of the error functions sets the flag in the
356 context so indicate that parsing failed.
360 static void fput_loc(struct exec *loc, FILE *f);
362 ###### core functions
364 static void type_err(struct parse_context *c,
365 char *fmt, struct exec *loc,
366 struct type *t1, int rules, struct type *t2)
368 fprintf(stderr, "%s:", c->file_name);
369 fput_loc(loc, stderr);
370 for (; *fmt ; fmt++) {
377 case '%': fputc(*fmt, stderr); break; // NOTEST
378 default: fputc('?', stderr); break; // NOTEST
380 type_print(t1, stderr);
383 type_print(t2, stderr);
392 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
394 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
395 t->txt.len, t->txt.txt);
401 One last introductory step before detailing the language elements and
402 providing their four requirements is to establish the data structures
403 to store these elements.
405 There are two key objects that we need to work with: executable
406 elements which comprise the program, and values which the program
407 works with. Between these are the variables in their various scopes
408 which hold the values, and types which classify the values stored and
409 manipulatd by executables.
413 Values come in a wide range of types, with more likely to be added.
414 Each type needs to be able to parse and print its own values (for
415 convenience at least) as well as to compare two values, at least for
416 equality and possibly for order. For now, values might need to be
417 duplicated and freed, though eventually such manipulations will be
418 better integrated into the language.
420 Rather than requiring every numeric type to support all numeric
421 operations (add, multiple, etc), we allow types to be able to present
422 as one of a few standard types: integer, float, and fraction. The
423 existance of these conversion functions enable types to determine if
424 they are compatible with other types.
426 Named type are stored in a simple linked list. Objects of each type are "values"
427 which are often passed around by value.
434 ## value union fields
441 struct value (*init)(struct type *type);
442 struct value (*prepare)(struct type *type);
443 struct value (*parse)(struct type *type, char *str);
444 void (*print)(struct value val);
445 void (*print_type)(struct type *type, FILE *f);
446 int (*cmp_order)(struct value v1, struct value v2);
447 int (*cmp_eq)(struct value v1, struct value v2);
448 struct value (*dup)(struct value val);
449 void (*free)(struct value val);
450 int (*compat)(struct type *this, struct type *other);
451 long long (*to_int)(struct value *v);
452 double (*to_float)(struct value *v);
453 int (*to_mpq)(mpq_t *q, struct value *v);
461 struct type *typelist;
465 static struct type *find_type(struct parse_context *c, struct text s)
467 struct type *l = c->typelist;
470 text_cmp(l->name, s) != 0)
475 static struct type *add_type(struct parse_context *c, struct text s,
480 n = calloc(1, sizeof(*n));
483 n->next = c->typelist;
488 static void free_type(struct type *t)
490 /* The type is always a reference to something in the
491 * context, so we don't need to free anything.
495 static void free_value(struct value v)
501 static int type_compat(struct type *require, struct type *have, int rules)
503 if ((rules & Rboolok) && have == Tbool)
505 if ((rules & Rnolabel) && have == Tlabel)
507 if (!require || !have)
511 return require->compat(require, have);
513 return require == have;
516 static void type_print(struct type *type, FILE *f)
519 fputs("*unknown*type*", f);
520 else if (type->name.len)
521 fprintf(f, "%.*s", type->name.len, type->name.txt);
522 else if (type->print_type)
523 type->print_type(type, f);
525 fputs("*invalid*type*", f); // NOTEST
528 static struct value val_prepare(struct type *type)
533 return type->prepare(type);
538 static struct value val_init(struct type *type)
543 return type->init(type);
548 static struct value dup_value(struct value v)
551 return v.type->dup(v);
555 static int value_cmp(struct value left, struct value right)
557 if (left.type && left.type->cmp_order)
558 return left.type->cmp_order(left, right);
559 if (left.type && left.type->cmp_eq)
560 return left.type->cmp_eq(left, right);
564 static void print_value(struct value v)
566 if (v.type && v.type->print)
569 printf("*Unknown*"); // NOTEST
572 static struct value parse_value(struct type *type, char *arg)
576 if (type && type->parse)
577 return type->parse(type, arg);
578 rv.type = NULL; // NOTEST
584 static void free_value(struct value v);
585 static int type_compat(struct type *require, struct type *have, int rules);
586 static void type_print(struct type *type, FILE *f);
587 static struct value val_init(struct type *type);
588 static struct value dup_value(struct value v);
589 static int value_cmp(struct value left, struct value right);
590 static void print_value(struct value v);
591 static struct value parse_value(struct type *type, char *arg);
593 ###### free context types
595 while (context.typelist) {
596 struct type *t = context.typelist;
598 context.typelist = t->next;
604 Values of the base types can be numbers, which we represent as
605 multi-precision fractions, strings, Booleans and labels. When
606 analysing the program we also need to allow for places where no value
607 is meaningful (type `Tnone`) and where we don't know what type to
608 expect yet (type is `NULL`).
610 Values are never shared, they are always copied when used, and freed
611 when no longer needed.
613 When propagating type information around the program, we need to
614 determine if two types are compatible, where type `NULL` is compatible
615 with anything. There are two special cases with type compatibility,
616 both related to the Conditional Statement which will be described
617 later. In some cases a Boolean can be accepted as well as some other
618 primary type, and in others any type is acceptable except a label (`Vlabel`).
619 A separate function encode these cases will simplify some code later.
621 When assigning command line arguments to variables, we need to be able
622 to parse each type from a string.
630 myLDLIBS := libnumber.o libstring.o -lgmp
631 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
633 ###### type union fields
634 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
636 ###### value union fields
643 static void _free_value(struct value v)
645 switch (v.type->vtype) {
647 case Vstr: free(v.str.txt); break;
648 case Vnum: mpq_clear(v.num); break;
654 ###### value functions
656 static struct value _val_prepare(struct type *type)
661 switch(type->vtype) {
665 memset(&rv.num, 0, sizeof(rv.num));
681 static struct value _val_init(struct type *type)
686 switch(type->vtype) {
687 case Vnone: // NOTEST
690 mpq_init(rv.num); break;
692 rv.str.txt = malloc(1);
698 case Vlabel: // NOTEST
699 rv.label = NULL; // NOTEST
705 static struct value _dup_value(struct value v)
709 switch (rv.type->vtype) {
710 case Vnone: // NOTEST
720 mpq_set(rv.num, v.num);
723 rv.str.len = v.str.len;
724 rv.str.txt = malloc(rv.str.len);
725 memcpy(rv.str.txt, v.str.txt, v.str.len);
731 static int _value_cmp(struct value left, struct value right)
734 if (left.type != right.type)
735 return left.type - right.type; // NOTEST
736 switch (left.type->vtype) {
737 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
738 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
739 case Vstr: cmp = text_cmp(left.str, right.str); break;
740 case Vbool: cmp = left.bool - right.bool; break;
741 case Vnone: cmp = 0; // NOTEST
746 static void _print_value(struct value v)
748 switch (v.type->vtype) {
749 case Vnone: // NOTEST
750 printf("*no-value*"); break; // NOTEST
751 case Vlabel: // NOTEST
752 printf("*label-%p*", v.label); break; // NOTEST
754 printf("%.*s", v.str.len, v.str.txt); break;
756 printf("%s", v.bool ? "True":"False"); break;
761 mpf_set_q(fl, v.num);
762 gmp_printf("%Fg", fl);
769 static struct value _parse_value(struct type *type, char *arg)
777 switch(type->vtype) {
778 case Vlabel: // NOTEST
779 case Vnone: // NOTEST
780 val.type = NULL; // NOTEST
783 val.str.len = strlen(arg);
784 val.str.txt = malloc(val.str.len);
785 memcpy(val.str.txt, arg, val.str.len);
792 tx.txt = arg; tx.len = strlen(tx.txt);
793 if (number_parse(val.num, tail, tx) == 0)
796 mpq_neg(val.num, val.num);
798 printf("Unsupported suffix: %s\n", arg);
803 if (strcasecmp(arg, "true") == 0 ||
804 strcmp(arg, "1") == 0)
806 else if (strcasecmp(arg, "false") == 0 ||
807 strcmp(arg, "0") == 0)
810 printf("Bad bool: %s\n", arg);
818 static void _free_value(struct value v);
820 static struct type base_prototype = {
822 .prepare = _val_prepare,
823 .parse = _parse_value,
824 .print = _print_value,
825 .cmp_order = _value_cmp,
826 .cmp_eq = _value_cmp,
831 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
834 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
836 struct text txt = { n, strlen(n) };
839 t = add_type(c, txt, &base_prototype);
844 ###### context initialization
846 Tbool = add_base_type(&context, "Boolean", Vbool);
847 Tstr = add_base_type(&context, "string", Vstr);
848 Tnum = add_base_type(&context, "number", Vnum);
849 Tnone = add_base_type(&context, "none", Vnone);
850 Tlabel = add_base_type(&context, "label", Vlabel);
854 Variables are scoped named values. We store the names in a linked
855 list of "bindings" sorted lexically, and use sequential search and
862 struct binding *next; // in lexical order
866 This linked list is stored in the parse context so that "reduce"
867 functions can find or add variables, and so the analysis phase can
868 ensure that every variable gets a type.
872 struct binding *varlist; // In lexical order
876 static struct binding *find_binding(struct parse_context *c, struct text s)
878 struct binding **l = &c->varlist;
883 (cmp = text_cmp((*l)->name, s)) < 0)
887 n = calloc(1, sizeof(*n));
894 Each name can be linked to multiple variables defined in different
895 scopes. Each scope starts where the name is declared and continues
896 until the end of the containing code block. Scopes of a given name
897 cannot nest, so a declaration while a name is in-scope is an error.
899 ###### binding fields
900 struct variable *var;
904 struct variable *previous;
906 struct binding *name;
907 struct exec *where_decl;// where name was declared
908 struct exec *where_set; // where type was set
912 While the naming seems strange, we include local constants in the
913 definition of variables. A name declared `var := value` can
914 subsequently be changed, but a name declared `var ::= value` cannot -
917 ###### variable fields
920 Scopes in parallel branches can be partially merged. More
921 specifically, if a given name is declared in both branches of an
922 if/else then its scope is a candidate for merging. Similarly if
923 every branch of an exhaustive switch (e.g. has an "else" clause)
924 declares a given name, then the scopes from the branches are
925 candidates for merging.
927 Note that names declared inside a loop (which is only parallel to
928 itself) are never visible after the loop. Similarly names defined in
929 scopes which are not parallel, such as those started by `for` and
930 `switch`, are never visible after the scope. Only variables defined in
931 both `then` and `else` (including the implicit then after an `if`, and
932 excluding `then` used with `for`) and in all `case`s and `else` of a
933 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
935 Labels, which are a bit like variables, follow different rules.
936 Labels are not explicitly declared, but if an undeclared name appears
937 in a context where a label is legal, that effectively declares the
938 name as a label. The declaration remains in force (or in scope) at
939 least to the end of the immediately containing block and conditionally
940 in any larger containing block which does not declare the name in some
941 other way. Importantly, the conditional scope extension happens even
942 if the label is only used in one parallel branch of a conditional --
943 when used in one branch it is treated as having been declared in all
946 Merge candidates are tentatively visible beyond the end of the
947 branching statement which creates them. If the name is used, the
948 merge is affirmed and they become a single variable visible at the
949 outer layer. If not - if it is redeclared first - the merge lapses.
951 To track scopes we have an extra stack, implemented as a linked list,
952 which roughly parallels the parse stack and which is used exclusively
953 for scoping. When a new scope is opened, a new frame is pushed and
954 the child-count of the parent frame is incremented. This child-count
955 is used to distinguish between the first of a set of parallel scopes,
956 in which declared variables must not be in scope, and subsequent
957 branches, whether they must already be conditionally scoped.
959 To push a new frame *before* any code in the frame is parsed, we need a
960 grammar reduction. This is most easily achieved with a grammar
961 element which derives the empty string, and creates the new scope when
962 it is recognized. This can be placed, for example, between a keyword
963 like "if" and the code following it.
967 struct scope *parent;
973 struct scope *scope_stack;
976 static void scope_pop(struct parse_context *c)
978 struct scope *s = c->scope_stack;
980 c->scope_stack = s->parent;
985 static void scope_push(struct parse_context *c)
987 struct scope *s = calloc(1, sizeof(*s));
989 c->scope_stack->child_count += 1;
990 s->parent = c->scope_stack;
998 OpenScope -> ${ scope_push(config2context(config)); }$
1001 Each variable records a scope depth and is in one of four states:
1003 - "in scope". This is the case between the declaration of the
1004 variable and the end of the containing block, and also between
1005 the usage with affirms a merge and the end of that block.
1007 The scope depth is not greater than the current parse context scope
1008 nest depth. When the block of that depth closes, the state will
1009 change. To achieve this, all "in scope" variables are linked
1010 together as a stack in nesting order.
1012 - "pending". The "in scope" block has closed, but other parallel
1013 scopes are still being processed. So far, every parallel block at
1014 the same level that has closed has declared the name.
1016 The scope depth is the depth of the last parallel block that
1017 enclosed the declaration, and that has closed.
1019 - "conditionally in scope". The "in scope" block and all parallel
1020 scopes have closed, and no further mention of the name has been
1021 seen. This state includes a secondary nest depth which records the
1022 outermost scope seen since the variable became conditionally in
1023 scope. If a use of the name is found, the variable becomes "in
1024 scope" and that secondary depth becomes the recorded scope depth.
1025 If the name is declared as a new variable, the old variable becomes
1026 "out of scope" and the recorded scope depth stays unchanged.
1028 - "out of scope". The variable is neither in scope nor conditionally
1029 in scope. It is permanently out of scope now and can be removed from
1030 the "in scope" stack.
1033 ###### variable fields
1034 int depth, min_depth;
1035 enum { OutScope, PendingScope, CondScope, InScope } scope;
1036 struct variable *in_scope;
1038 ###### parse context
1040 struct variable *in_scope;
1042 All variables with the same name are linked together using the
1043 'previous' link. Those variable that have
1044 been affirmatively merged all have a 'merged' pointer that points to
1045 one primary variable - the most recently declared instance. When
1046 merging variables, we need to also adjust the 'merged' pointer on any
1047 other variables that had previously been merged with the one that will
1048 no longer be primary.
1050 ###### variable fields
1051 struct variable *merged;
1053 ###### ast functions
1055 static void variable_merge(struct variable *primary, struct variable *secondary)
1059 if (primary->merged)
1061 primary = primary->merged;
1063 for (v = primary->previous; v; v=v->previous)
1064 if (v == secondary || v == secondary->merged ||
1065 v->merged == secondary ||
1066 (v->merged && v->merged == secondary->merged)) {
1067 v->scope = OutScope;
1068 v->merged = primary;
1072 ###### free context vars
1074 while (context.varlist) {
1075 struct binding *b = context.varlist;
1076 struct variable *v = b->var;
1077 context.varlist = b->next;
1080 struct variable *t = v;
1084 if (t->min_depth == 0)
1085 free_exec(t->where_decl);
1090 #### Manipulating Bindings
1092 When a name is conditionally visible, a new declaration discards the
1093 old binding - the condition lapses. Conversely a usage of the name
1094 affirms the visibility and extends it to the end of the containing
1095 block - i.e. the block that contains both the original declaration and
1096 the latest usage. This is determined from `min_depth`. When a
1097 conditionally visible variable gets affirmed like this, it is also
1098 merged with other conditionally visible variables with the same name.
1100 When we parse a variable declaration we either signal an error if the
1101 name is currently bound, or create a new variable at the current nest
1102 depth if the name is unbound or bound to a conditionally scoped or
1103 pending-scope variable. If the previous variable was conditionally
1104 scoped, it and its homonyms becomes out-of-scope.
1106 When we parse a variable reference (including non-declarative
1107 assignment) we signal an error if the name is not bound or is bound to
1108 a pending-scope variable; update the scope if the name is bound to a
1109 conditionally scoped variable; or just proceed normally if the named
1110 variable is in scope.
1112 When we exit a scope, any variables bound at this level are either
1113 marked out of scope or pending-scoped, depending on whether the
1114 scope was sequential or parallel.
1116 When exiting a parallel scope we check if there are any variables that
1117 were previously pending and are still visible. If there are, then
1118 there weren't redeclared in the most recent scope, so they cannot be
1119 merged and must become out-of-scope. If it is not the first of
1120 parallel scopes (based on `child_count`), we check that there was a
1121 previous binding that is still pending-scope. If there isn't, the new
1122 variable must now be out-of-scope.
1124 When exiting a sequential scope that immediately enclosed parallel
1125 scopes, we need to resolve any pending-scope variables. If there was
1126 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1127 we need to mark all pending-scope variable as out-of-scope. Otherwise
1128 all pending-scope variables become conditionally scoped.
1131 enum closetype { CloseSequential, CloseParallel, CloseElse };
1133 ###### ast functions
1135 static struct variable *var_decl(struct parse_context *c, struct text s)
1137 struct binding *b = find_binding(c, s);
1138 struct variable *v = b->var;
1140 switch (v ? v->scope : OutScope) {
1142 /* Caller will report the error */
1146 v && v->scope == CondScope;
1148 v->scope = OutScope;
1152 v = calloc(1, sizeof(*v));
1153 v->previous = b->var;
1156 v->min_depth = v->depth = c->scope_depth;
1158 v->in_scope = c->in_scope;
1160 v->val = val_prepare(NULL);
1164 static struct variable *var_ref(struct parse_context *c, struct text s)
1166 struct binding *b = find_binding(c, s);
1167 struct variable *v = b->var;
1168 struct variable *v2;
1170 switch (v ? v->scope : OutScope) {
1173 /* Signal an error - once that is possible */
1176 /* All CondScope variables of this name need to be merged
1177 * and become InScope
1179 v->depth = v->min_depth;
1181 for (v2 = v->previous;
1182 v2 && v2->scope == CondScope;
1184 variable_merge(v, v2);
1192 static void var_block_close(struct parse_context *c, enum closetype ct)
1194 /* close of all variables that are in_scope */
1195 struct variable *v, **vp, *v2;
1198 for (vp = &c->in_scope;
1199 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1203 case CloseParallel: /* handle PendingScope */
1207 if (c->scope_stack->child_count == 1)
1208 v->scope = PendingScope;
1209 else if (v->previous &&
1210 v->previous->scope == PendingScope)
1211 v->scope = PendingScope;
1212 else if (v->val.type == Tlabel)
1213 v->scope = PendingScope;
1214 else if (v->name->var == v)
1215 v->scope = OutScope;
1216 if (ct == CloseElse) {
1217 /* All Pending variables with this name
1218 * are now Conditional */
1220 v2 && v2->scope == PendingScope;
1222 v2->scope = CondScope;
1227 v2 && v2->scope == PendingScope;
1229 if (v2->val.type != Tlabel)
1230 v2->scope = OutScope;
1232 case OutScope: break;
1235 case CloseSequential:
1236 if (v->val.type == Tlabel)
1237 v->scope = PendingScope;
1240 v->scope = OutScope;
1243 /* There was no 'else', so we can only become
1244 * conditional if we know the cases were exhaustive,
1245 * and that doesn't mean anything yet.
1246 * So only labels become conditional..
1249 v2 && v2->scope == PendingScope;
1251 if (v2->val.type == Tlabel) {
1252 v2->scope = CondScope;
1253 v2->min_depth = c->scope_depth;
1255 v2->scope = OutScope;
1258 case OutScope: break;
1262 if (v->scope == OutScope)
1271 Executables can be lots of different things. In many cases an
1272 executable is just an operation combined with one or two other
1273 executables. This allows for expressions and lists etc. Other times
1274 an executable is something quite specific like a constant or variable
1275 name. So we define a `struct exec` to be a general executable with a
1276 type, and a `struct binode` which is a subclass of `exec`, forms a
1277 node in a binary tree, and holds an operation. There will be other
1278 subclasses, and to access these we need to be able to `cast` the
1279 `exec` into the various other types.
1282 #define cast(structname, pointer) ({ \
1283 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1284 if (__mptr && *__mptr != X##structname) abort(); \
1285 (struct structname *)( (char *)__mptr);})
1287 #define new(structname) ({ \
1288 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1289 __ptr->type = X##structname; \
1290 __ptr->line = -1; __ptr->column = -1; \
1293 #define new_pos(structname, token) ({ \
1294 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1295 __ptr->type = X##structname; \
1296 __ptr->line = token.line; __ptr->column = token.col; \
1305 enum exec_types type;
1313 struct exec *left, *right;
1316 ###### ast functions
1318 static int __fput_loc(struct exec *loc, FILE *f)
1322 if (loc->line >= 0) {
1323 fprintf(f, "%d:%d: ", loc->line, loc->column);
1326 if (loc->type == Xbinode)
1327 return __fput_loc(cast(binode,loc)->left, f) ||
1328 __fput_loc(cast(binode,loc)->right, f);
1331 static void fput_loc(struct exec *loc, FILE *f)
1333 if (!__fput_loc(loc, f))
1334 fprintf(f, "??:??: "); // NOTEST
1337 Each different type of `exec` node needs a number of functions
1338 defined, a bit like methods. We must be able to be able to free it,
1339 print it, analyse it and execute it. Once we have specific `exec`
1340 types we will need to parse them too. Let's take this a bit more
1345 The parser generator requires a `free_foo` function for each struct
1346 that stores attributes and they will be `exec`s and subtypes there-of.
1347 So we need `free_exec` which can handle all the subtypes, and we need
1350 ###### ast functions
1352 static void free_binode(struct binode *b)
1357 free_exec(b->right);
1361 ###### core functions
1362 static void free_exec(struct exec *e)
1371 ###### forward decls
1373 static void free_exec(struct exec *e);
1375 ###### free exec cases
1376 case Xbinode: free_binode(cast(binode, e)); break;
1380 Printing an `exec` requires that we know the current indent level for
1381 printing line-oriented components. As will become clear later, we
1382 also want to know what sort of bracketing to use.
1384 ###### ast functions
1386 static void do_indent(int i, char *str)
1393 ###### core functions
1394 static void print_binode(struct binode *b, int indent, int bracket)
1398 ## print binode cases
1402 static void print_exec(struct exec *e, int indent, int bracket)
1408 print_binode(cast(binode, e), indent, bracket); break;
1413 ###### forward decls
1415 static void print_exec(struct exec *e, int indent, int bracket);
1419 As discussed, analysis involves propagating type requirements around
1420 the program and looking for errors.
1422 So `propagate_types` is passed an expected type (being a `struct type`
1423 pointer together with some `val_rules` flags) that the `exec` is
1424 expected to return, and returns the type that it does return, either
1425 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1426 by reference. It is set to `0` when an error is found, and `2` when
1427 any change is made. If it remains unchanged at `1`, then no more
1428 propagation is needed.
1432 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1436 if (rules & Rnolabel)
1437 fputs(" (labels not permitted)", stderr);
1440 ###### core functions
1442 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1443 struct type *type, int rules)
1450 switch (prog->type) {
1453 struct binode *b = cast(binode, prog);
1455 ## propagate binode cases
1459 ## propagate exec cases
1466 Interpreting an `exec` doesn't require anything but the `exec`. State
1467 is stored in variables and each variable will be directly linked from
1468 within the `exec` tree. The exception to this is the whole `program`
1469 which needs to look at command line arguments. The `program` will be
1470 interpreted separately.
1472 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1474 ###### core functions
1477 struct value val, *lval;
1480 static struct lrval _interp_exec(struct exec *e);
1482 static struct value interp_exec(struct exec *e)
1484 struct lrval ret = _interp_exec(e);
1487 return dup_value(*ret.lval);
1492 static struct value *linterp_exec(struct exec *e)
1494 struct lrval ret = _interp_exec(e);
1499 static struct lrval _interp_exec(struct exec *e)
1502 struct value rv, *lrv = NULL;
1513 struct binode *b = cast(binode, e);
1514 struct value left, right, *lleft;
1515 left.type = right.type = Tnone;
1517 ## interp binode cases
1519 free_value(left); free_value(right);
1522 ## interp exec cases
1531 Now that we have the shape of the interpreter in place we can add some
1532 complex types and connected them in to the data structures and the
1533 different phases of parse, analyse, print, interpret.
1535 For now, just arrays.
1539 Arrays can be declared by giving a size and a type, as `[size]type' so
1540 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1541 size can be an arbitrary expression which is evaluated when the name
1544 Arrays cannot be assigned. When pointers are introduced we will also
1545 introduce array slices which can refer to part or all of an array -
1546 the assignment syntax will create a slice. For now, an array can only
1547 ever be referenced by the name it is declared with. It is likely that
1548 a "`copy`" primitive will eventually be define which can be used to
1549 make a copy of an array with controllable depth.
1551 ###### type union fields
1555 struct variable *vsize;
1556 struct type *member;
1559 ###### value union fields
1561 struct value *elmnts;
1564 ###### value functions
1566 static struct value array_prepare(struct type *type)
1571 ret.array.elmnts = NULL;
1575 static struct value array_init(struct type *type)
1581 if (type->array.vsize) {
1584 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1585 mpq_denref(type->array.vsize->val.num));
1586 type->array.size = mpz_get_si(q);
1589 ret.array.elmnts = calloc(type->array.size,
1590 sizeof(ret.array.elmnts[0]));
1591 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1592 ret.array.elmnts[i] = val_init(type->array.member);
1596 static void array_free(struct value val)
1600 if (val.array.elmnts)
1601 for (i = 0; i < val.type->array.size; i++)
1602 free_value(val.array.elmnts[i]);
1603 free(val.array.elmnts);
1606 static int array_compat(struct type *require, struct type *have)
1608 if (have->compat != require->compat)
1610 /* Both are arrays, so we can look at details */
1611 if (!type_compat(require->array.member, have->array.member, 0))
1613 if (require->array.vsize == NULL && have->array.vsize == NULL)
1614 return require->array.size == have->array.size;
1616 return require->array.vsize == have->array.vsize;
1619 static void array_print_type(struct type *type, FILE *f)
1622 if (type->array.vsize) {
1623 struct binding *b = type->array.vsize->name;
1624 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1626 fprintf(f, "%d]", type->array.size);
1627 type_print(type->array.member, f);
1630 static struct type array_prototype = {
1631 .prepare = array_prepare,
1633 .print_type = array_print_type,
1634 .compat = array_compat,
1640 | [ NUMBER ] Type ${
1641 $0 = calloc(1, sizeof(struct type));
1642 *($0) = array_prototype;
1643 $0->array.member = $<4;
1644 $0->array.vsize = NULL;
1646 struct parse_context *c = config2context(config);
1649 if (number_parse(num, tail, $2.txt) == 0)
1650 tok_err(c, "error: unrecognised number", &$2);
1652 tok_err(c, "error: unsupported number suffix", &$2);
1654 $0->array.size = mpz_get_ui(mpq_numref(num));
1655 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1656 tok_err(c, "error: array size must be an integer",
1658 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1659 tok_err(c, "error: array size is too large",
1663 $0->next= c->anon_typelist;
1664 c->anon_typelist = $0;
1668 | [ IDENTIFIER ] Type ${ {
1669 struct parse_context *c = config2context(config);
1670 struct variable *v = var_ref(c, $2.txt);
1673 tok_err(config2context(config), "error: name undeclared", &$2);
1674 else if (!v->constant)
1675 tok_err(config2context(config), "error: array size must be a constant", &$2);
1677 $0 = calloc(1, sizeof(struct type));
1678 *($0) = array_prototype;
1679 $0->array.member = $<4;
1681 $0->array.vsize = v;
1682 $0->next= c->anon_typelist;
1683 c->anon_typelist = $0;
1686 ###### parse context
1688 struct type *anon_typelist;
1690 ###### free context types
1692 while (context.anon_typelist) {
1693 struct type *t = context.anon_typelist;
1695 context.anon_typelist = t->next;
1702 ###### variable grammar
1704 | Variable [ Expression ] ${ {
1705 struct binode *b = new(binode);
1712 ###### print binode cases
1714 print_exec(b->left, -1, 0);
1716 print_exec(b->right, -1, 0);
1720 ###### propagate binode cases
1722 /* left must be an array, right must be a number,
1723 * result is the member type of the array
1725 propagate_types(b->right, c, ok, Tnum, 0);
1726 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1727 if (!t || t->compat != array_compat) {
1728 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1732 if (!type_compat(type, t->array.member, rules)) {
1733 type_err(c, "error: have %1 but need %2", prog,
1734 t->array.member, rules, type);
1737 return t->array.member;
1741 ###### interp binode cases
1746 lleft = linterp_exec(b->left);
1747 right = interp_exec(b->right);
1749 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1753 if (i >= 0 && i < lleft->type->array.size)
1754 lrv = &lleft->array.elmnts[i];
1756 rv = val_init(lleft->type->array.member);
1760 ## Language elements
1762 Each language element needs to be parsed, printed, analysed,
1763 interpreted, and freed. There are several, so let's just start with
1764 the easy ones and work our way up.
1768 We have already met values as separate objects. When manifest
1769 constants appear in the program text, that must result in an executable
1770 which has a constant value. So the `val` structure embeds a value in
1786 $0 = new_pos(val, $1);
1787 $0->val.type = Tbool;
1791 $0 = new_pos(val, $1);
1792 $0->val.type = Tbool;
1796 $0 = new_pos(val, $1);
1797 $0->val.type = Tnum;
1800 if (number_parse($0->val.num, tail, $1.txt) == 0)
1801 mpq_init($0->val.num);
1803 tok_err(config2context(config), "error: unsupported number suffix",
1808 $0 = new_pos(val, $1);
1809 $0->val.type = Tstr;
1812 string_parse(&$1, '\\', &$0->val.str, tail);
1814 tok_err(config2context(config), "error: unsupported string suffix",
1819 $0 = new_pos(val, $1);
1820 $0->val.type = Tstr;
1823 string_parse(&$1, '\\', &$0->val.str, tail);
1825 tok_err(config2context(config), "error: unsupported string suffix",
1830 ###### print exec cases
1833 struct val *v = cast(val, e);
1834 if (v->val.type == Tstr)
1836 print_value(v->val);
1837 if (v->val.type == Tstr)
1842 ###### propagate exec cases
1845 struct val *val = cast(val, prog);
1846 if (!type_compat(type, val->val.type, rules)) {
1847 type_err(c, "error: expected %1%r found %2",
1848 prog, type, rules, val->val.type);
1851 return val->val.type;
1854 ###### interp exec cases
1856 rv = dup_value(cast(val, e)->val);
1859 ###### ast functions
1860 static void free_val(struct val *v)
1868 ###### free exec cases
1869 case Xval: free_val(cast(val, e)); break;
1871 ###### ast functions
1872 // Move all nodes from 'b' to 'rv', reversing the order.
1873 // In 'b' 'left' is a list, and 'right' is the last node.
1874 // In 'rv', left' is the first node and 'right' is a list.
1875 static struct binode *reorder_bilist(struct binode *b)
1877 struct binode *rv = NULL;
1880 struct exec *t = b->right;
1884 b = cast(binode, b->left);
1894 Just as we used a `val` to wrap a value into an `exec`, we similarly
1895 need a `var` to wrap a `variable` into an exec. While each `val`
1896 contained a copy of the value, each `var` hold a link to the variable
1897 because it really is the same variable no matter where it appears.
1898 When a variable is used, we need to remember to follow the `->merged`
1899 link to find the primary instance.
1907 struct variable *var;
1913 VariableDecl -> IDENTIFIER : ${ {
1914 struct variable *v = var_decl(config2context(config), $1.txt);
1915 $0 = new_pos(var, $1);
1920 v = var_ref(config2context(config), $1.txt);
1922 type_err(config2context(config), "error: variable '%v' redeclared",
1923 $0, Tnone, 0, Tnone);
1924 type_err(config2context(config), "info: this is where '%v' was first declared",
1925 v->where_decl, Tnone, 0, Tnone);
1928 | IDENTIFIER :: ${ {
1929 struct variable *v = var_decl(config2context(config), $1.txt);
1930 $0 = new_pos(var, $1);
1936 v = var_ref(config2context(config), $1.txt);
1938 type_err(config2context(config), "error: variable '%v' redeclared",
1939 $0, Tnone, 0, Tnone);
1940 type_err(config2context(config), "info: this is where '%v' was first declared",
1941 v->where_decl, Tnone, 0, Tnone);
1944 | IDENTIFIER : Type ${ {
1945 struct variable *v = var_decl(config2context(config), $1.txt);
1946 $0 = new_pos(var, $1);
1951 v->val = val_prepare($<3);
1953 v = var_ref(config2context(config), $1.txt);
1955 type_err(config2context(config), "error: variable '%v' redeclared",
1956 $0, Tnone, 0, Tnone);
1957 type_err(config2context(config), "info: this is where '%v' was first declared",
1958 v->where_decl, Tnone, 0, Tnone);
1961 | IDENTIFIER :: Type ${ {
1962 struct variable *v = var_decl(config2context(config), $1.txt);
1963 $0 = new_pos(var, $1);
1968 v->val = val_prepare($<3);
1971 v = var_ref(config2context(config), $1.txt);
1973 type_err(config2context(config), "error: variable '%v' redeclared",
1974 $0, Tnone, 0, Tnone);
1975 type_err(config2context(config), "info: this is where '%v' was first declared",
1976 v->where_decl, Tnone, 0, Tnone);
1981 Variable -> IDENTIFIER ${ {
1982 struct variable *v = var_ref(config2context(config), $1.txt);
1983 $0 = new_pos(var, $1);
1985 /* This might be a label - allocate a var just in case */
1986 v = var_decl(config2context(config), $1.txt);
1988 v->val = val_prepare(Tlabel);
1989 v->val.label = &v->val;
1993 cast(var, $0)->var = v;
1998 Type -> IDENTIFIER ${
1999 $0 = find_type(config2context(config), $1.txt);
2001 tok_err(config2context(config),
2002 "error: undefined type", &$1);
2009 ###### print exec cases
2012 struct var *v = cast(var, e);
2014 struct binding *b = v->var->name;
2015 printf("%.*s", b->name.len, b->name.txt);
2022 if (loc->type == Xvar) {
2023 struct var *v = cast(var, loc);
2025 struct binding *b = v->var->name;
2026 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2028 fputs("???", stderr); // NOTEST
2030 fputs("NOTVAR", stderr); // NOTEST
2033 ###### propagate exec cases
2037 struct var *var = cast(var, prog);
2038 struct variable *v = var->var;
2040 type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone); // NOTEST
2042 return Tnone; // NOTEST
2046 if (v->constant && (rules & Rnoconstant)) {
2047 type_err(c, "error: Cannot assign to a constant: %v",
2048 prog, NULL, 0, NULL);
2049 type_err(c, "info: name was defined as a constant here",
2050 v->where_decl, NULL, 0, NULL);
2054 if (v->val.type == NULL) {
2055 if (type && *ok != 0) {
2056 v->val = val_prepare(type);
2057 v->where_set = prog;
2062 if (!type_compat(type, v->val.type, rules)) {
2063 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2064 type, rules, v->val.type);
2065 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2066 v->val.type, rules, Tnone);
2074 ###### interp exec cases
2077 struct var *var = cast(var, e);
2078 struct variable *v = var->var;
2086 ###### ast functions
2088 static void free_var(struct var *v)
2093 ###### free exec cases
2094 case Xvar: free_var(cast(var, e)); break;
2096 ### Expressions: Conditional
2098 Our first user of the `binode` will be conditional expressions, which
2099 is a bit odd as they actually have three components. That will be
2100 handled by having 2 binodes for each expression. The conditional
2101 expression is the lowest precedence operatior, so it gets to define
2102 what an "Expression" is. The next level up is "BoolExpr", which
2105 Conditional expressions are of the form "value `if` condition `else`
2106 other_value". There is no associativite with this operator: the
2107 values and conditions can only be other conditional expressions if
2108 they are enclosed in parentheses. Allowing nesting without
2109 parentheses would be too confusing.
2117 Expression -> BoolExpr if BoolExpr else BoolExpr ${ {
2118 struct binode *b1 = new(binode);
2119 struct binode *b2 = new(binode);
2128 | BoolExpr ${ $0 = $<1; }$
2130 ###### print binode cases
2133 b2 = cast(binode, b->right);
2134 print_exec(b2->left, -1, 0);
2136 print_exec(b->left, -1, 0);
2138 print_exec(b2->right, -1, 0);
2141 ###### propagate binode cases
2144 /* cond must be Tbool, others must match */
2145 struct binode *b2 = cast(binode, b->right);
2148 propagate_types(b->left, c, ok, Tbool, 0);
2149 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2150 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2154 ###### interp binode cases
2157 struct binode *b2 = cast(binode, b->right);
2158 left = interp_exec(b->left);
2160 rv = interp_exec(b2->left);
2162 rv = interp_exec(b2->right);
2166 ### Expressions: Boolean
2168 The next class of expressions to use the `binode` will be Boolean
2169 expressions. As I haven't implemented precedence in the parser
2170 generator yet, we need different names for each precedence level used
2171 by expressions. The outer most or lowest level precedence are
2172 conditional expressions are Boolean operators which form an `BoolExpr`
2173 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2174 have `and then` and `or else` which only evaluate the second operand
2175 if the result would make a difference.
2187 BoolExpr -> BoolExpr or BTerm ${ {
2188 struct binode *b = new(binode);
2194 | BoolExpr or else BTerm ${ {
2195 struct binode *b = new(binode);
2201 | BTerm ${ $0 = $<1; }$
2203 BTerm -> BTerm and BFact ${ {
2204 struct binode *b = new(binode);
2210 | BTerm and then BFact ${ {
2211 struct binode *b = new(binode);
2217 | BFact ${ $0 = $<1; }$
2219 BFact -> not BFact ${ {
2220 struct binode *b = new(binode);
2227 ###### print binode cases
2229 print_exec(b->left, -1, 0);
2231 print_exec(b->right, -1, 0);
2234 print_exec(b->left, -1, 0);
2235 printf(" and then ");
2236 print_exec(b->right, -1, 0);
2239 print_exec(b->left, -1, 0);
2241 print_exec(b->right, -1, 0);
2244 print_exec(b->left, -1, 0);
2245 printf(" or else ");
2246 print_exec(b->right, -1, 0);
2250 print_exec(b->right, -1, 0);
2253 ###### propagate binode cases
2259 /* both must be Tbool, result is Tbool */
2260 propagate_types(b->left, c, ok, Tbool, 0);
2261 propagate_types(b->right, c, ok, Tbool, 0);
2262 if (type && type != Tbool) {
2263 type_err(c, "error: %1 operation found where %2 expected", prog,
2269 ###### interp binode cases
2271 rv = interp_exec(b->left);
2272 right = interp_exec(b->right);
2273 rv.bool = rv.bool && right.bool;
2276 rv = interp_exec(b->left);
2278 rv = interp_exec(b->right);
2281 rv = interp_exec(b->left);
2282 right = interp_exec(b->right);
2283 rv.bool = rv.bool || right.bool;
2286 rv = interp_exec(b->left);
2288 rv = interp_exec(b->right);
2291 rv = interp_exec(b->right);
2295 ### Expressions: Comparison
2297 Of slightly higher precedence that Boolean expressions are
2299 A comparison takes arguments of any type, but the two types must be
2302 To simplify the parsing we introduce an `eop` which can record an
2303 expression operator.
2310 ###### ast functions
2311 static void free_eop(struct eop *e)
2326 | Expr CMPop Expr ${ {
2327 struct binode *b = new(binode);
2333 | Expr ${ $0 = $<1; }$
2338 CMPop -> < ${ $0.op = Less; }$
2339 | > ${ $0.op = Gtr; }$
2340 | <= ${ $0.op = LessEq; }$
2341 | >= ${ $0.op = GtrEq; }$
2342 | == ${ $0.op = Eql; }$
2343 | != ${ $0.op = NEql; }$
2345 ###### print binode cases
2353 print_exec(b->left, -1, 0);
2355 case Less: printf(" < "); break;
2356 case LessEq: printf(" <= "); break;
2357 case Gtr: printf(" > "); break;
2358 case GtrEq: printf(" >= "); break;
2359 case Eql: printf(" == "); break;
2360 case NEql: printf(" != "); break;
2361 default: abort(); // NOTEST
2363 print_exec(b->right, -1, 0);
2366 ###### propagate binode cases
2373 /* Both must match but not be labels, result is Tbool */
2374 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2376 propagate_types(b->right, c, ok, t, 0);
2378 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2380 t = propagate_types(b->left, c, ok, t, 0);
2382 if (!type_compat(type, Tbool, 0)) {
2383 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2384 Tbool, rules, type);
2389 ###### interp binode cases
2398 left = interp_exec(b->left);
2399 right = interp_exec(b->right);
2400 cmp = value_cmp(left, right);
2403 case Less: rv.bool = cmp < 0; break;
2404 case LessEq: rv.bool = cmp <= 0; break;
2405 case Gtr: rv.bool = cmp > 0; break;
2406 case GtrEq: rv.bool = cmp >= 0; break;
2407 case Eql: rv.bool = cmp == 0; break;
2408 case NEql: rv.bool = cmp != 0; break;
2409 default: rv.bool = 0; break; // NOTEST
2414 ### Expressions: The rest
2416 The remaining expressions with the highest precedence are arithmetic
2417 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2418 The `Factor` is where the `Value` and `Variable` that we already have
2421 `+` and `-` are both infix and prefix operations (where they are
2422 absolute value and negation). These have different operator names.
2424 We also have a 'Bracket' operator which records where parentheses were
2425 found. This makes it easy to reproduce these when printing. Once
2426 precedence is handled better I might be able to discard this.
2438 Expr -> Expr Eop Term ${ {
2439 struct binode *b = new(binode);
2445 | Term ${ $0 = $<1; }$
2447 Term -> Term Top Factor ${ {
2448 struct binode *b = new(binode);
2454 | Factor ${ $0 = $<1; }$
2456 Factor -> ( Expression ) ${ {
2457 struct binode *b = new_pos(binode, $1);
2463 struct binode *b = new(binode);
2468 | Value ${ $0 = $<1; }$
2469 | Variable ${ $0 = $<1; }$
2472 Eop -> + ${ $0.op = Plus; }$
2473 | - ${ $0.op = Minus; }$
2475 Uop -> + ${ $0.op = Absolute; }$
2476 | - ${ $0.op = Negate; }$
2478 Top -> * ${ $0.op = Times; }$
2479 | / ${ $0.op = Divide; }$
2480 | % ${ $0.op = Rem; }$
2481 | ++ ${ $0.op = Concat; }$
2483 ###### print binode cases
2490 print_exec(b->left, indent, 0);
2492 case Plus: fputs(" + ", stdout); break;
2493 case Minus: fputs(" - ", stdout); break;
2494 case Times: fputs(" * ", stdout); break;
2495 case Divide: fputs(" / ", stdout); break;
2496 case Rem: fputs(" % ", stdout); break;
2497 case Concat: fputs(" ++ ", stdout); break;
2498 default: abort(); // NOTEST
2500 print_exec(b->right, indent, 0);
2504 print_exec(b->right, indent, 0);
2508 print_exec(b->right, indent, 0);
2512 print_exec(b->right, indent, 0);
2516 ###### propagate binode cases
2522 /* both must be numbers, result is Tnum */
2525 /* as propagate_types ignores a NULL,
2526 * unary ops fit here too */
2527 propagate_types(b->left, c, ok, Tnum, 0);
2528 propagate_types(b->right, c, ok, Tnum, 0);
2529 if (!type_compat(type, Tnum, 0)) {
2530 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2537 /* both must be Tstr, result is Tstr */
2538 propagate_types(b->left, c, ok, Tstr, 0);
2539 propagate_types(b->right, c, ok, Tstr, 0);
2540 if (!type_compat(type, Tstr, 0)) {
2541 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2548 return propagate_types(b->right, c, ok, type, 0);
2550 ###### interp binode cases
2553 rv = interp_exec(b->left);
2554 right = interp_exec(b->right);
2555 mpq_add(rv.num, rv.num, right.num);
2558 rv = interp_exec(b->left);
2559 right = interp_exec(b->right);
2560 mpq_sub(rv.num, rv.num, right.num);
2563 rv = interp_exec(b->left);
2564 right = interp_exec(b->right);
2565 mpq_mul(rv.num, rv.num, right.num);
2568 rv = interp_exec(b->left);
2569 right = interp_exec(b->right);
2570 mpq_div(rv.num, rv.num, right.num);
2575 left = interp_exec(b->left);
2576 right = interp_exec(b->right);
2577 mpz_init(l); mpz_init(r); mpz_init(rem);
2578 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2579 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2580 mpz_tdiv_r(rem, l, r);
2581 rv = val_init(Tnum);
2582 mpq_set_z(rv.num, rem);
2583 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2587 rv = interp_exec(b->right);
2588 mpq_neg(rv.num, rv.num);
2591 rv = interp_exec(b->right);
2592 mpq_abs(rv.num, rv.num);
2595 rv = interp_exec(b->right);
2598 left = interp_exec(b->left);
2599 right = interp_exec(b->right);
2601 rv.str = text_join(left.str, right.str);
2605 ###### value functions
2607 static struct text text_join(struct text a, struct text b)
2610 rv.len = a.len + b.len;
2611 rv.txt = malloc(rv.len);
2612 memcpy(rv.txt, a.txt, a.len);
2613 memcpy(rv.txt+a.len, b.txt, b.len);
2618 ### Blocks, Statements, and Statement lists.
2620 Now that we have expressions out of the way we need to turn to
2621 statements. There are simple statements and more complex statements.
2622 Simple statements do not contain newlines, complex statements do.
2624 Statements often come in sequences and we have corresponding simple
2625 statement lists and complex statement lists.
2626 The former comprise only simple statements separated by semicolons.
2627 The later comprise complex statements and simple statement lists. They are
2628 separated by newlines. Thus the semicolon is only used to separate
2629 simple statements on the one line. This may be overly restrictive,
2630 but I'm not sure I ever want a complex statement to share a line with
2633 Note that a simple statement list can still use multiple lines if
2634 subsequent lines are indented, so
2636 ###### Example: wrapped simple statement list
2641 is a single simple statement list. This might allow room for
2642 confusion, so I'm not set on it yet.
2644 A simple statement list needs no extra syntax. A complex statement
2645 list has two syntactic forms. It can be enclosed in braces (much like
2646 C blocks), or it can be introduced by a colon and continue until an
2647 unindented newline (much like Python blocks). With this extra syntax
2648 it is referred to as a block.
2650 Note that a block does not have to include any newlines if it only
2651 contains simple statements. So both of:
2653 if condition: a=b; d=f
2655 if condition { a=b; print f }
2659 In either case the list is constructed from a `binode` list with
2660 `Block` as the operator. When parsing the list it is most convenient
2661 to append to the end, so a list is a list and a statement. When using
2662 the list it is more convenient to consider a list to be a statement
2663 and a list. So we need a function to re-order a list.
2664 `reorder_bilist` serves this purpose.
2666 The only stand-alone statement we introduce at this stage is `pass`
2667 which does nothing and is represented as a `NULL` pointer in a `Block`
2668 list. Other stand-alone statements will follow once the infrastructure
2688 Block -> Open Statementlist Close ${ $0 = $<2; }$
2689 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2690 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2691 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2692 | : Statementlist ${ $0 = $<2; }$
2693 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2695 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2697 ComplexStatements -> ComplexStatements ComplexStatement ${
2703 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2704 | ComplexStatement ${
2712 ComplexStatement -> SimpleStatements NEWLINE ${
2713 $0 = reorder_bilist($<1);
2715 ## ComplexStatement Grammar
2718 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2724 | SimpleStatement ${
2730 | SimpleStatements ; ${ $0 = $<1; }$
2732 SimpleStatement -> pass ${ $0 = NULL; }$
2733 ## SimpleStatement Grammar
2735 ###### print binode cases
2739 if (b->left == NULL)
2742 print_exec(b->left, indent, 0);
2745 print_exec(b->right, indent, 0);
2748 // block, one per line
2749 if (b->left == NULL)
2750 do_indent(indent, "pass\n");
2752 print_exec(b->left, indent, bracket);
2754 print_exec(b->right, indent, bracket);
2758 ###### propagate binode cases
2761 /* If any statement returns something other than Tnone
2762 * or Tbool then all such must return same type.
2763 * As each statement may be Tnone or something else,
2764 * we must always pass NULL (unknown) down, otherwise an incorrect
2765 * error might occur. We never return Tnone unless it is
2770 for (e = b; e; e = cast(binode, e->right)) {
2771 t = propagate_types(e->left, c, ok, NULL, rules);
2772 if ((rules & Rboolok) && t == Tbool)
2774 if (t && t != Tnone && t != Tbool) {
2777 else if (t != type) {
2778 type_err(c, "error: expected %1%r, found %2",
2779 e->left, type, rules, t);
2787 ###### interp binode cases
2789 while (rv.type == Tnone &&
2792 rv = interp_exec(b->left);
2793 b = cast(binode, b->right);
2797 ### The Print statement
2799 `print` is a simple statement that takes a comma-separated list of
2800 expressions and prints the values separated by spaces and terminated
2801 by a newline. No control of formatting is possible.
2803 `print` faces the same list-ordering issue as blocks, and uses the
2809 ###### SimpleStatement Grammar
2811 | print ExpressionList ${
2812 $0 = reorder_bilist($<2);
2814 | print ExpressionList , ${
2819 $0 = reorder_bilist($0);
2830 ExpressionList -> ExpressionList , Expression ${
2843 ###### print binode cases
2846 do_indent(indent, "print");
2850 print_exec(b->left, -1, 0);
2854 b = cast(binode, b->right);
2860 ###### propagate binode cases
2863 /* don't care but all must be consistent */
2864 propagate_types(b->left, c, ok, NULL, Rnolabel);
2865 propagate_types(b->right, c, ok, NULL, Rnolabel);
2868 ###### interp binode cases
2874 for ( ; b; b = cast(binode, b->right))
2878 left = interp_exec(b->left);
2891 ###### Assignment statement
2893 An assignment will assign a value to a variable, providing it hasn't
2894 be declared as a constant. The analysis phase ensures that the type
2895 will be correct so the interpreter just needs to perform the
2896 calculation. There is a form of assignment which declares a new
2897 variable as well as assigning a value. If a name is assigned before
2898 it is declared, and error will be raised as the name is created as
2899 `Tlabel` and it is illegal to assign to such names.
2905 ###### SimpleStatement Grammar
2906 | Variable = Expression ${
2912 | VariableDecl = Expression ${
2920 if ($1->var->where_set == NULL) {
2921 type_err(config2context(config), "Variable declared with no type or value: %v",
2931 ###### print binode cases
2934 do_indent(indent, "");
2935 print_exec(b->left, indent, 0);
2937 print_exec(b->right, indent, 0);
2944 struct variable *v = cast(var, b->left)->var;
2945 do_indent(indent, "");
2946 print_exec(b->left, indent, 0);
2947 if (cast(var, b->left)->var->constant) {
2948 if (v->where_decl == v->where_set) {
2950 type_print(v->val.type, stdout);
2955 if (v->where_decl == v->where_set) {
2957 type_print(v->val.type, stdout);
2964 print_exec(b->right, indent, 0);
2971 ###### propagate binode cases
2975 /* Both must match and not be labels,
2976 * Type must support 'dup',
2977 * For Assign, left must not be constant.
2980 t = propagate_types(b->left, c, ok, NULL,
2981 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
2986 if (propagate_types(b->right, c, ok, t, 0) != t)
2987 if (b->left->type == Xvar)
2988 type_err(c, "info: variable '%v' was set as %1 here.",
2989 cast(var, b->left)->var->where_set, t, rules, Tnone);
2991 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2993 propagate_types(b->left, c, ok, t,
2994 (b->op == Assign ? Rnoconstant : 0));
2996 if (t && t->dup == NULL) {
2997 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3004 ###### interp binode cases
3007 lleft = linterp_exec(b->left);
3008 right = interp_exec(b->right);
3013 free_value(right); // NOTEST
3019 struct variable *v = cast(var, b->left)->var;
3023 right = interp_exec(b->right);
3025 right = val_init(v->val.type);
3032 ### The `use` statement
3034 The `use` statement is the last "simple" statement. It is needed when
3035 the condition in a conditional statement is a block. `use` works much
3036 like `return` in C, but only completes the `condition`, not the whole
3042 ###### SimpleStatement Grammar
3044 $0 = new_pos(binode, $1);
3049 ###### print binode cases
3052 do_indent(indent, "use ");
3053 print_exec(b->right, -1, 0);
3058 ###### propagate binode cases
3061 /* result matches value */
3062 return propagate_types(b->right, c, ok, type, 0);
3064 ###### interp binode cases
3067 rv = interp_exec(b->right);
3070 ### The Conditional Statement
3072 This is the biggy and currently the only complex statement. This
3073 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3074 It is comprised of a number of parts, all of which are optional though
3075 set combinations apply. Each part is (usually) a key word (`then` is
3076 sometimes optional) followed by either an expression or a code block,
3077 except the `casepart` which is a "key word and an expression" followed
3078 by a code block. The code-block option is valid for all parts and,
3079 where an expression is also allowed, the code block can use the `use`
3080 statement to report a value. If the code block does not report a value
3081 the effect is similar to reporting `True`.
3083 The `else` and `case` parts, as well as `then` when combined with
3084 `if`, can contain a `use` statement which will apply to some
3085 containing conditional statement. `for` parts, `do` parts and `then`
3086 parts used with `for` can never contain a `use`, except in some
3087 subordinate conditional statement.
3089 If there is a `forpart`, it is executed first, only once.
3090 If there is a `dopart`, then it is executed repeatedly providing
3091 always that the `condpart` or `cond`, if present, does not return a non-True
3092 value. `condpart` can fail to return any value if it simply executes
3093 to completion. This is treated the same as returning `True`.
3095 If there is a `thenpart` it will be executed whenever the `condpart`
3096 or `cond` returns True (or does not return any value), but this will happen
3097 *after* `dopart` (when present).
3099 If `elsepart` is present it will be executed at most once when the
3100 condition returns `False` or some value that isn't `True` and isn't
3101 matched by any `casepart`. If there are any `casepart`s, they will be
3102 executed when the condition returns a matching value.
3104 The particular sorts of values allowed in case parts has not yet been
3105 determined in the language design, so nothing is prohibited.
3107 The various blocks in this complex statement potentially provide scope
3108 for variables as described earlier. Each such block must include the
3109 "OpenScope" nonterminal before parsing the block, and must call
3110 `var_block_close()` when closing the block.
3112 The code following "`if`", "`switch`" and "`for`" does not get its own
3113 scope, but is in a scope covering the whole statement, so names
3114 declared there cannot be redeclared elsewhere. Similarly the
3115 condition following "`while`" is in a scope the covers the body
3116 ("`do`" part) of the loop, and which does not allow conditional scope
3117 extension. Code following "`then`" (both looping and non-looping),
3118 "`else`" and "`case`" each get their own local scope.
3120 The type requirements on the code block in a `whilepart` are quite
3121 unusal. It is allowed to return a value of some identifiable type, in
3122 which case the loop aborts and an appropriate `casepart` is run, or it
3123 can return a Boolean, in which case the loop either continues to the
3124 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3125 This is different both from the `ifpart` code block which is expected to
3126 return a Boolean, or the `switchpart` code block which is expected to
3127 return the same type as the casepart values. The correct analysis of
3128 the type of the `whilepart` code block is the reason for the
3129 `Rboolok` flag which is passed to `propagate_types()`.
3131 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3140 struct exec *action;
3141 struct casepart *next;
3143 struct cond_statement {
3145 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3146 struct casepart *casepart;
3149 ###### ast functions
3151 static void free_casepart(struct casepart *cp)
3155 free_exec(cp->value);
3156 free_exec(cp->action);
3163 static void free_cond_statement(struct cond_statement *s)
3167 free_exec(s->forpart);
3168 free_exec(s->condpart);
3169 free_exec(s->dopart);
3170 free_exec(s->thenpart);
3171 free_exec(s->elsepart);
3172 free_casepart(s->casepart);
3176 ###### free exec cases
3177 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3179 ###### ComplexStatement Grammar
3180 | CondStatement ${ $0 = $<1; }$
3185 // both ForThen and Whilepart open scopes, and CondSuffix only
3186 // closes one - so in the first branch here we have another to close.
3187 CondStatement -> ForThen WhilePart CondSuffix ${
3189 $0->forpart = $1.forpart; $1.forpart = NULL;
3190 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3191 $0->condpart = $2.condpart; $2.condpart = NULL;
3192 $0->dopart = $2.dopart; $2.dopart = NULL;
3193 var_block_close(config2context(config), CloseSequential);
3195 | WhilePart CondSuffix ${
3197 $0->condpart = $1.condpart; $1.condpart = NULL;
3198 $0->dopart = $1.dopart; $1.dopart = NULL;
3200 | SwitchPart CondSuffix ${
3204 | IfPart IfSuffix ${
3206 $0->condpart = $1.condpart; $1.condpart = NULL;
3207 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3208 // This is where we close an "if" statement
3209 var_block_close(config2context(config), CloseSequential);
3212 CondSuffix -> IfSuffix ${
3214 // This is where we close scope of the whole
3215 // "for" or "while" statement
3216 var_block_close(config2context(config), CloseSequential);
3218 | CasePart CondSuffix ${
3220 $1->next = $0->casepart;
3225 CasePart -> Newlines case Expression OpenScope Block ${
3226 $0 = calloc(1,sizeof(struct casepart));
3229 var_block_close(config2context(config), CloseParallel);
3231 | case Expression OpenScope Block ${
3232 $0 = calloc(1,sizeof(struct casepart));
3235 var_block_close(config2context(config), CloseParallel);
3239 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3240 | Newlines else OpenScope Block ${
3241 $0 = new(cond_statement);
3243 var_block_close(config2context(config), CloseElse);
3245 | else OpenScope Block ${
3246 $0 = new(cond_statement);
3248 var_block_close(config2context(config), CloseElse);
3250 | Newlines else OpenScope CondStatement ${
3251 $0 = new(cond_statement);
3253 var_block_close(config2context(config), CloseElse);
3255 | else OpenScope CondStatement ${
3256 $0 = new(cond_statement);
3258 var_block_close(config2context(config), CloseElse);
3263 // These scopes are closed in CondSuffix
3264 ForPart -> for OpenScope SimpleStatements ${
3265 $0 = reorder_bilist($<3);
3267 | for OpenScope Block ${
3271 ThenPart -> then OpenScope SimpleStatements ${
3272 $0 = reorder_bilist($<3);
3273 var_block_close(config2context(config), CloseSequential);
3275 | then OpenScope Block ${
3277 var_block_close(config2context(config), CloseSequential);
3280 ThenPartNL -> ThenPart OptNL ${
3284 // This scope is closed in CondSuffix
3285 WhileHead -> while OpenScope Block ${
3290 ForThen -> ForPart OptNL ThenPartNL ${
3298 // This scope is closed in CondSuffix
3299 WhilePart -> while OpenScope Expression Block ${
3300 $0.type = Xcond_statement;
3304 | WhileHead OptNL do Block ${
3305 $0.type = Xcond_statement;
3310 IfPart -> if OpenScope Expression OpenScope Block ${
3311 $0.type = Xcond_statement;
3314 var_block_close(config2context(config), CloseParallel);
3316 | if OpenScope Block OptNL then OpenScope Block ${
3317 $0.type = Xcond_statement;
3320 var_block_close(config2context(config), CloseParallel);
3324 // This scope is closed in CondSuffix
3325 SwitchPart -> switch OpenScope Expression ${
3328 | switch OpenScope Block ${
3332 ###### print exec cases
3334 case Xcond_statement:
3336 struct cond_statement *cs = cast(cond_statement, e);
3337 struct casepart *cp;
3339 do_indent(indent, "for");
3340 if (bracket) printf(" {\n"); else printf(":\n");
3341 print_exec(cs->forpart, indent+1, bracket);
3344 do_indent(indent, "} then {\n");
3346 do_indent(indent, "then:\n");
3347 print_exec(cs->thenpart, indent+1, bracket);
3349 if (bracket) do_indent(indent, "}\n");
3353 if (cs->condpart && cs->condpart->type == Xbinode &&
3354 cast(binode, cs->condpart)->op == Block) {
3356 do_indent(indent, "while {\n");
3358 do_indent(indent, "while:\n");
3359 print_exec(cs->condpart, indent+1, bracket);
3361 do_indent(indent, "} do {\n");
3363 do_indent(indent, "do:\n");
3364 print_exec(cs->dopart, indent+1, bracket);
3366 do_indent(indent, "}\n");
3368 do_indent(indent, "while ");
3369 print_exec(cs->condpart, 0, bracket);
3374 print_exec(cs->dopart, indent+1, bracket);
3376 do_indent(indent, "}\n");
3381 do_indent(indent, "switch");
3383 do_indent(indent, "if");
3384 if (cs->condpart && cs->condpart->type == Xbinode &&
3385 cast(binode, cs->condpart)->op == Block) {
3390 print_exec(cs->condpart, indent+1, bracket);
3392 do_indent(indent, "}\n");
3394 do_indent(indent, "then:\n");
3395 print_exec(cs->thenpart, indent+1, bracket);
3399 print_exec(cs->condpart, 0, bracket);
3405 print_exec(cs->thenpart, indent+1, bracket);
3407 do_indent(indent, "}\n");
3412 for (cp = cs->casepart; cp; cp = cp->next) {
3413 do_indent(indent, "case ");
3414 print_exec(cp->value, -1, 0);
3419 print_exec(cp->action, indent+1, bracket);
3421 do_indent(indent, "}\n");
3424 do_indent(indent, "else");
3429 print_exec(cs->elsepart, indent+1, bracket);
3431 do_indent(indent, "}\n");
3436 ###### propagate exec cases
3437 case Xcond_statement:
3439 // forpart and dopart must return Tnone
3440 // thenpart must return Tnone if there is a dopart,
3441 // otherwise it is like elsepart.
3443 // be bool if there is no casepart
3444 // match casepart->values if there is a switchpart
3445 // either be bool or match casepart->value if there
3447 // elsepart and casepart->action must match the return type
3448 // expected of this statement.
3449 struct cond_statement *cs = cast(cond_statement, prog);
3450 struct casepart *cp;
3452 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3453 if (!type_compat(Tnone, t, 0))
3455 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3456 if (!type_compat(Tnone, t, 0))
3459 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3460 if (!type_compat(Tnone, t, 0))
3463 if (cs->casepart == NULL)
3464 propagate_types(cs->condpart, c, ok, Tbool, 0);
3466 /* Condpart must match case values, with bool permitted */
3468 for (cp = cs->casepart;
3469 cp && !t; cp = cp->next)
3470 t = propagate_types(cp->value, c, ok, NULL, 0);
3471 if (!t && cs->condpart)
3472 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3473 // Now we have a type (I hope) push it down
3475 for (cp = cs->casepart; cp; cp = cp->next)
3476 propagate_types(cp->value, c, ok, t, 0);
3477 propagate_types(cs->condpart, c, ok, t, Rboolok);
3480 // (if)then, else, and case parts must return expected type.
3481 if (!cs->dopart && !type)
3482 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3484 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3485 for (cp = cs->casepart;
3488 type = propagate_types(cp->action, c, ok, NULL, rules);
3491 propagate_types(cs->thenpart, c, ok, type, rules);
3492 propagate_types(cs->elsepart, c, ok, type, rules);
3493 for (cp = cs->casepart; cp ; cp = cp->next)
3494 propagate_types(cp->action, c, ok, type, rules);
3500 ###### interp exec cases
3501 case Xcond_statement:
3503 struct value v, cnd;
3504 struct casepart *cp;
3505 struct cond_statement *c = cast(cond_statement, e);
3508 interp_exec(c->forpart);
3511 cnd = interp_exec(c->condpart);
3514 if (!(cnd.type == Tnone ||
3515 (cnd.type == Tbool && cnd.bool != 0)))
3517 // cnd is Tnone or Tbool, doesn't need to be freed
3519 interp_exec(c->dopart);
3522 rv = interp_exec(c->thenpart);
3523 if (rv.type != Tnone || !c->dopart)
3527 } while (c->dopart);
3529 for (cp = c->casepart; cp; cp = cp->next) {
3530 v = interp_exec(cp->value);
3531 if (value_cmp(v, cnd) == 0) {
3534 rv = interp_exec(cp->action);
3541 rv = interp_exec(c->elsepart);
3548 ### Top level structure
3550 All the language elements so far can be used in various places. Now
3551 it is time to clarify what those places are.
3553 At the top level of a file there will be a number of declarations.
3554 Many of the things that can be declared haven't been described yet,
3555 such as functions, procedures, imports, named types, and probably
3557 For now there are two sorts of things that can appear at the top
3558 level. They are predefined constants and the main program. While the
3559 syntax will allow the main program to appear multiple times, that will
3560 trigger an error if it is actually attempted.
3562 The various declarations do not return anything. They store the
3563 various declarations in the parse context.
3565 ###### Parser: grammar
3568 Ocean -> DeclarationList
3570 DeclarationList -> Declaration
3571 | DeclarationList Declaration
3573 Declaration -> DeclareConstant
3577 ## top level grammar
3579 ### The `const` section
3581 As well as being defined in with the code that uses them, constants
3582 can be declared at the top level. These have full-file scope, so they
3583 are always `InScope`. The value of a top level constant can be given
3584 as an expression, and this is evaluated immediately rather than in the
3585 later interpretation stage. Once we add functions to the language, we
3586 will need rules concern which, if any, can be used to define a top
3589 Constants are defined in a sectiont that starts with the reserved word
3590 `const` and then has a block with a list of assignment statements.
3591 For syntactic consistency, these must use the double-colon syntax to
3592 make it clear that they are constants. Type can also be given: if
3593 not, the type will be determined during analysis, as with other
3596 ###### top level grammar
3598 DeclareConstant -> const Open ConstList Close
3599 | const Open Newlines ConstList Close
3600 | const Open SimpleConstList }
3601 | const Open Newlines SimpleConstList }
3603 | const SimpleConstList
3605 ConstList -> ComplexConsts
3606 ComplexConsts -> ComplexConst ComplexConsts
3608 ComplexConst -> SimpleConstList NEWLINE
3609 SimpleConstList -> Const ; SimpleConstList
3611 | Const ; SimpleConstList ;
3614 CType -> Type ${ $0 = $<1; }$
3617 Const -> IDENTIFIER :: CType = Expression ${ {
3621 v = var_decl(config2context(config), $1.txt);
3623 struct var *var = new_pos(var, $1);
3624 v->where_decl = var;
3629 v = var_ref(config2context(config), $1.txt);
3630 tok_err(config2context(config), "error: name already declared", &$1);
3631 type_err(config2context(config), "info: this is where '%v' was first declared",
3632 v->where_decl, NULL, 0, NULL);
3636 propagate_types($5, config2context(config), &ok, $3, 0);
3639 config2context(config)->parse_error = 1;
3641 v->val = interp_exec($5);
3646 ### Finally the whole program.
3648 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3649 the keyword "program" and a list of variable names which are assigned
3650 values from command line arguments. Following this is a `block` which
3651 is the code to execute. Unlike Pascal, constants and other
3652 declarations come *before* the program.
3654 As this is the top level, several things are handled a bit
3656 The whole program is not interpreted by `interp_exec` as that isn't
3657 passed the argument list which the program requires. Similarly type
3658 analysis is a bit more interesting at this level.
3663 ###### top level grammar
3665 DeclareProgram -> Program ${ {
3666 struct parse_context *c = config2context(config);
3668 type_err(c, "Program defined a second time",
3676 Program -> program OpenScope Varlist Block OptNL ${
3679 $0->left = reorder_bilist($<3);
3681 var_block_close(config2context(config), CloseSequential);
3682 if (config2context(config)->scope_stack) abort();
3685 tok_err(config2context(config),
3686 "error: unhandled parse error", &$1);
3689 Varlist -> Varlist ArgDecl ${
3698 ArgDecl -> IDENTIFIER ${ {
3699 struct variable *v = var_decl(config2context(config), $1.txt);
3706 ###### print binode cases
3708 do_indent(indent, "program");
3709 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3711 print_exec(b2->left, 0, 0);
3717 print_exec(b->right, indent+1, bracket);
3719 do_indent(indent, "}\n");
3722 ###### propagate binode cases
3723 case Program: abort(); // NOTEST
3725 ###### core functions
3727 static int analyse_prog(struct exec *prog, struct parse_context *c)
3729 struct binode *b = cast(binode, prog);
3736 propagate_types(b->right, c, &ok, Tnone, 0);
3741 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3742 struct var *v = cast(var, b->left);
3743 if (!v->var->val.type) {
3744 v->var->where_set = b;
3745 v->var->val = val_prepare(Tstr);
3748 b = cast(binode, prog);
3751 propagate_types(b->right, c, &ok, Tnone, 0);
3756 /* Make sure everything is still consistent */
3757 propagate_types(b->right, c, &ok, Tnone, 0);
3761 static void interp_prog(struct exec *prog, char **argv)
3763 struct binode *p = cast(binode, prog);
3769 al = cast(binode, p->left);
3771 struct var *v = cast(var, al->left);
3772 struct value *vl = &v->var->val;
3774 if (argv[0] == NULL) {
3775 printf("Not enough args\n");
3778 al = cast(binode, al->right);
3780 *vl = parse_value(vl->type, argv[0]);
3781 if (vl->type == NULL)
3785 v = interp_exec(p->right);
3789 ###### interp binode cases
3790 case Program: abort(); // NOTEST
3792 ## And now to test it out.
3794 Having a language requires having a "hello world" program. I'll
3795 provide a little more than that: a program that prints "Hello world"
3796 finds the GCD of two numbers, prints the first few elements of
3797 Fibonacci, and performs a binary search for a number.
3799 ###### File: oceani.mk
3802 @echo "===== TEST ====="
3803 ./oceani --section "test: hello" oceani.mdc 55 33
3809 four ::= 2 + 2 ; five ::= 10/2
3810 const pie ::= "I like Pie";
3811 cake ::= "The cake is"
3815 print "Hello World, what lovely oceans you have!"
3816 print "are there", five, "?"
3817 print pi, pie, "but", cake
3819 /* When a variable is defined in both branches of an 'if',
3820 * and used afterwards, the variables are merged.
3826 print "Is", A, "bigger than", B,"? ", bigger
3827 /* If a variable is not used after the 'if', no
3828 * merge happens, so types can be different
3831 double:string = "yes"
3832 print A, "is more than twice", B, "?", double
3835 print "double", B, "is", double
3840 if a > 0 and then b > 0:
3846 print "GCD of", A, "and", B,"is", a
3848 print a, "is not positive, cannot calculate GCD"
3850 print b, "is not positive, cannot calculate GCD"
3855 print "Fibonacci:", f1,f2,
3856 then togo = togo - 1
3864 /* Binary search... */
3869 mid := (lo + hi) / 2
3881 print "Yay, I found", target
3883 print "Closest I found was", mid
3888 for i:=1; then i = i + 1; while i < size:
3889 n := list[i-1] * list[i-1]
3890 list[i] = (n / 100) % 10000
3892 print "Before sort:"
3893 for i:=0; then i = i + 1; while i < size:
3894 print "list[",i,"]=",list[i]
3896 for i := 1; then i=i+1; while i < size:
3897 for j:=i-1; then j=j-1; while j >= 0:
3898 if list[j] > list[j+1]:
3903 for i:=0; then i = i + 1; while i < size:
3904 print "list[",i,"]=",list[i]