1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing,
77 - Analyse the parsed program to ensure consistency,
79 - Execute the program.
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 This code must be compiled with `-fplan9-extensions` so that anonymous
92 structures can be used.
94 ###### File: oceani.mk
96 myCFLAGS := -Wall -g -fplan9-extensions
97 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
98 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
99 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
101 all :: $(LDLIBS) oceani
102 oceani.c oceani.h : oceani.mdc parsergen
103 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
104 oceani.mk: oceani.mdc md2c
107 oceani: oceani.o $(LDLIBS)
108 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
110 ###### Parser: header
113 struct parse_context {
114 struct token_config config;
122 #define container_of(ptr, type, member) ({ \
123 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
124 (type *)( (char *)__mptr - offsetof(type,member) );})
126 #define config2context(_conf) container_of(_conf, struct parse_context, \
135 #include <sys/mman.h>
154 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
155 "--section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_line_comment)
175 | (1 << TK_block_comment),
176 .number_chars = ".,_+-",
181 int doprint=0, dotrace=0, doexec=1, brackets=0;
184 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
187 case 't': dotrace=1; break;
188 case 'p': doprint=1; break;
189 case 'n': doexec=0; break;
190 case 'b': brackets=1; break;
191 case 's': section = optarg; break;
192 default: fprintf(stderr, Usage);
196 if (optind >= argc) {
197 fprintf(stderr, "oceani: no input file given\n");
200 fd = open(argv[optind], O_RDONLY);
202 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
205 context.file_name = argv[optind];
206 len = lseek(fd, 0, 2);
207 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
208 s = code_extract(file, file+len, NULL);
210 fprintf(stderr, "oceani: could not find any code in %s\n",
215 ## context initialization
219 for (ss = s; ss; ss = ss->next) {
220 struct text sec = ss->section;
221 if (sec.len == strlen(section) &&
222 strncmp(sec.txt, section, sec.len) == 0)
226 prog = parse_oceani(ss->code, &context.config,
227 dotrace ? stderr : NULL);
229 fprintf(stderr, "oceani: cannot find section %s\n",
234 prog = parse_oceani(s->code, &context.config,
235 dotrace ? stderr : NULL);
237 fprintf(stderr, "oceani: fatal parser error.\n");
238 context.parse_error = 1;
241 print_exec(*prog, 0, brackets);
242 if (prog && doexec && !context.parse_error) {
243 if (!analyse_prog(*prog, &context)) {
244 fprintf(stderr, "oceani: type error in program - not running.\n");
247 interp_prog(*prog, argv+optind+1);
254 struct section *t = s->next;
260 ## free context types
261 exit(context.parse_error ? 1 : 0);
266 The four requirements of parse, analyse, print, interpret apply to
267 each language element individually so that is how most of the code
270 Three of the four are fairly self explanatory. The one that requires
271 a little explanation is the analysis step.
273 The current language design does not require the types of variables to
274 be declared, but they must still have a single type. Different
275 operations impose different requirements on the variables, for example
276 addition requires both arguments to be numeric, and assignment
277 requires the variable on the left to have the same type as the
278 expression on the right.
280 Analysis involves propagating these type requirements around and
281 consequently setting the type of each variable. If any requirements
282 are violated (e.g. a string is compared with a number) or if a
283 variable needs to have two different types, then an error is raised
284 and the program will not run.
286 If the same variable is declared in both branchs of an 'if/else', or
287 in all cases of a 'switch' then the multiple instances may be merged
288 into just one variable if the variable is references after the
289 conditional statement. When this happens, the types must naturally be
290 consistent across all the branches. When the variable is not used
291 outside the if, the variables in the different branches are distinct
292 and can be of different types.
294 Determining the types of all variables early is important for
295 processing command line arguments. These can be assigned to any type
296 of variable, but we must first know the correct type so any required
297 conversion can happen. If a variable is associated with a command
298 line argument but no type can be interpreted (e.g. the variable is
299 only ever used in a `print` statement), then the type is set to
302 Undeclared names may only appear in "use" statements and "case" expressions.
303 These names are given a type of "label" and a unique value.
304 This allows them to fill the role of a name in an enumerated type, which
305 is useful for testing the `switch` statement.
307 As we will see, the condition part of a `while` statement can return
308 either a Boolean or some other type. This requires that the expect
309 type that gets passed around comprises a type (`enum vtype`) and a
310 flag to indicate that `Vbool` is also permitted.
312 As there are, as yet, no distinct types that are compatible, there
313 isn't much subtlety in the analysis. When we have distinct number
314 types, this will become more interesting.
318 When analysis discovers an inconsistency it needs to report an error;
319 just refusing to run the code ensures that the error doesn't cascade,
320 but by itself it isn't very useful. A clear understand of the sort of
321 error message that are useful will help guide the process of analysis.
323 At a simplistic level, the only sort of error that type analysis can
324 report is that the type of some construct doesn't match a contextual
325 requirement. For example, in `4 + "hello"` the addition provides a
326 contextual requirement for numbers, but `"hello"` is not a number. In
327 this particular example no further information is needed as the types
328 are obvious from local information. When a variable is involved that
329 isn't the case. It may be helpful to explain why the variable has a
330 particular type, by indicating the location where the type was set,
331 whether by declaration or usage.
333 Using a recursive-descent analysis we can easily detect a problem at
334 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
335 will detect that one argument is not a number and the usage of `hello`
336 will detect that a number was wanted, but not provided. In this
337 (early) version of the language, we will generate error reports at
338 multiple locations, so the use of `hello` will report an error and
339 explain were the value was set, and the addition will report an error
340 and say why numbers are needed. To be able to report locations for
341 errors, each language element will need to record a file location
342 (line and column) and each variable will need to record the language
343 element where its type was set. For now we will assume that each line
344 of an error message indicates one location in the file, and up to 2
345 types. So we provide a `printf`-like function which takes a format, a
346 language (a `struct exec` which has not yet been introduced), and 2
347 types. "`%1`" reports the first type, "`%2`" reports the second. We
348 will need a function to print the location, once we know how that is
349 stored. As will be explained later, there are sometimes extra rules for
350 type matching and they might affect error messages, we need to pass those
353 As well as type errors, we sometimes need to report problems with
354 tokens, which might be unexpected or might name a type that has not
355 been defined. For these we have `tok_err()` which reports an error
356 with a given token. Each of the error functions sets the flag in the
357 context so indicate that parsing failed.
361 static void fput_loc(struct exec *loc, FILE *f);
363 ###### core functions
365 static void type_err(struct parse_context *c,
366 char *fmt, struct exec *loc,
367 struct type *t1, int rules, struct type *t2)
369 fprintf(stderr, "%s:", c->file_name);
370 fput_loc(loc, stderr);
371 for (; *fmt ; fmt++) {
378 case '%': fputc(*fmt, stderr); break;
379 default: fputc('?', stderr); break;
382 fprintf(stderr, "%.*s", t1->name.len, t1->name.txt);
384 fputs("*unknown*", stderr);
388 fprintf(stderr, "%.*s", t2->name.len, t2->name.txt);
390 fputs("*unknown*", stderr);
400 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
402 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
403 t->txt.len, t->txt.txt);
409 One last introductory step before detailing the language elements and
410 providing their four requirements is to establish the data structures
411 to store these elements.
413 There are two key objects that we need to work with: executable
414 elements which comprise the program, and values which the program
415 works with. Between these are the variables in their various scopes
416 which hold the values, and types which classify the values stored and
417 manipulatd by executables.
421 Values come in a wide range of types, with more likely to be added.
422 Each type needs to be able to parse and print its own values (for
423 convenience at least) as well as to compare two values, at least for
424 equality and possibly for order. For now, values might need to be
425 duplicated and freed, though eventually such manipulations will be
426 better integrated into the language.
428 Rather than requiring every numeric type to support all numeric
429 operations (add, multiple, etc), we allow types to be able to present
430 as one of a few standard types: integer, float, and fraction. The
431 existance of these conversion functions enable types to determine if
432 they are compatible with other types.
434 Named type are stored in a simple linked list. Objects of each type are "values"
435 which are often passed around by value.
442 ## value union fields
449 struct value (*init)(struct type *type);
450 struct value (*parse)(struct type *type, char *str);
451 void (*print)(struct value val);
452 int (*cmp_order)(struct value v1, struct value v2);
453 int (*cmp_eq)(struct value v1, struct value v2);
454 struct value (*dup)(struct value val);
455 void (*free)(struct value val);
456 struct type *(*compat)(struct type *this, struct type *other);
457 long long (*to_int)(struct value *v);
458 double (*to_float)(struct value *v);
459 int (*to_mpq)(mpq_t *q, struct value *v);
467 struct type *typelist;
471 static struct type *find_type(struct parse_context *c, struct text s)
473 struct type *l = c->typelist;
476 text_cmp(l->name, s) != 0)
481 static struct type *add_type(struct parse_context *c, struct text s,
486 n = calloc(1, sizeof(*n));
489 n->next = c->typelist;
494 static void free_type(struct type *t)
496 /* The type is always a reference to something in the
497 * context, so we don't need to free anything.
501 static void free_value(struct value v)
507 static struct value val_init(struct type *type)
512 return type->init(type);
517 static struct value dup_value(struct value v)
520 return v.type->dup(v);
524 static int value_cmp(struct value left, struct value right)
526 if (left.type && left.type->cmp_order)
527 return left.type->cmp_order(left, right);
528 if (left.type && left.type->cmp_eq)
529 return left.type->cmp_eq(left, right);
533 static void print_value(struct value v)
535 if (v.type && v.type->print)
541 static struct value parse_value(struct type *type, char *arg)
545 if (type && type->parse)
546 return type->parse(type, arg);
551 ###### free context types
553 while (context.typelist) {
554 struct type *t = context.typelist;
556 context.typelist = t->next;
562 Values of the base types can be numbers, which we represent as
563 multi-precision fractions, strings, Booleans and labels. When
564 analysing the program we also need to allow for places where no value
565 is meaningful (type `Tnone`) and where we don't know what type to
566 expect yet (type is `NULL`).
568 Values are never shared, they are always copied when used, and freed
569 when no longer needed.
571 When propagating type information around the program, we need to
572 determine if two types are compatible, where type `NULL` is compatible
573 with anything. There are two special cases with type compatibility,
574 both related to the Conditional Statement which will be described
575 later. In some cases a Boolean can be accepted as well as some other
576 primary type, and in others any type is acceptable except a label (`Vlabel`).
577 A separate function encode these cases will simplify some code later.
579 When assigning command line arguments to variables, we need to be able
580 to parse each type from a string.
588 myLDLIBS := libnumber.o libstring.o -lgmp
589 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
591 ###### type union fields
592 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
594 ###### value union fields
601 static void _free_value(struct value v)
603 switch (v.type->vtype) {
605 case Vstr: free(v.str.txt); break;
606 case Vnum: mpq_clear(v.num); break;
612 static int vtype_compat(struct type *require, struct type *have, int rules)
614 if ((rules & Rboolok) && have == Tbool)
616 if ((rules & Rnolabel) && have == Tlabel)
618 if (!require || !have)
621 return require == have;
624 ###### value functions
626 static struct value _val_init(struct type *type)
631 switch(type->vtype) {
635 mpq_init(rv.num); break;
637 rv.str.txt = malloc(1);
650 static struct value _dup_value(struct value v)
654 switch (rv.type->vtype) {
665 mpq_set(rv.num, v.num);
668 rv.str.len = v.str.len;
669 rv.str.txt = malloc(rv.str.len);
670 memcpy(rv.str.txt, v.str.txt, v.str.len);
676 static int _value_cmp(struct value left, struct value right)
679 if (left.type != right.type)
680 return left.type - right.type;
681 switch (left.type->vtype) {
682 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
683 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
684 case Vstr: cmp = text_cmp(left.str, right.str); break;
685 case Vbool: cmp = left.bool - right.bool; break;
691 static void _print_value(struct value v)
693 switch (v.type->vtype) {
695 printf("*no-value*"); break;
697 printf("*label-%p*", v.label); break;
699 printf("%.*s", v.str.len, v.str.txt); break;
701 printf("%s", v.bool ? "True":"False"); break;
706 mpf_set_q(fl, v.num);
707 gmp_printf("%Fg", fl);
714 static struct value _parse_value(struct type *type, char *arg)
722 switch(type->vtype) {
728 val.str.len = strlen(arg);
729 val.str.txt = malloc(val.str.len);
730 memcpy(val.str.txt, arg, val.str.len);
737 tx.txt = arg; tx.len = strlen(tx.txt);
738 if (number_parse(val.num, tail, tx) == 0)
741 mpq_neg(val.num, val.num);
743 printf("Unsupported suffix: %s\n", arg);
748 if (strcasecmp(arg, "true") == 0 ||
749 strcmp(arg, "1") == 0)
751 else if (strcasecmp(arg, "false") == 0 ||
752 strcmp(arg, "0") == 0)
755 printf("Bad bool: %s\n", arg);
763 static void _free_value(struct value v);
765 static struct type base_prototype = {
767 .parse = _parse_value,
768 .print = _print_value,
769 .cmp_order = _value_cmp,
770 .cmp_eq = _value_cmp,
775 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
778 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
780 struct text txt = { n, strlen(n) };
783 t = add_type(c, txt, &base_prototype);
788 ###### context initialization
790 Tbool = add_base_type(&context, "Boolean", Vbool);
791 Tstr = add_base_type(&context, "string", Vstr);
792 Tnum = add_base_type(&context, "number", Vnum);
793 Tnone = add_base_type(&context, "none", Vnone);
794 Tlabel = add_base_type(&context, "label", Vlabel);
798 Variables are scoped named values. We store the names in a linked
799 list of "bindings" sorted lexically, and use sequential search and
806 struct binding *next; // in lexical order
810 This linked list is stored in the parse context so that "reduce"
811 functions can find or add variables, and so the analysis phase can
812 ensure that every variable gets a type.
816 struct binding *varlist; // In lexical order
820 static struct binding *find_binding(struct parse_context *c, struct text s)
822 struct binding **l = &c->varlist;
827 (cmp = text_cmp((*l)->name, s)) < 0)
831 n = calloc(1, sizeof(*n));
838 Each name can be linked to multiple variables defined in different
839 scopes. Each scope starts where the name is declared and continues
840 until the end of the containing code block. Scopes of a given name
841 cannot nest, so a declaration while a name is in-scope is an error.
843 ###### binding fields
844 struct variable *var;
848 struct variable *previous;
850 struct binding *name;
851 struct exec *where_decl;// where name was declared
852 struct exec *where_set; // where type was set
856 While the naming seems strange, we include local constants in the
857 definition of variables. A name declared `var := value` can
858 subsequently be changed, but a name declared `var ::= value` cannot -
861 ###### variable fields
864 Scopes in parallel branches can be partially merged. More
865 specifically, if a given name is declared in both branches of an
866 if/else then its scope is a candidate for merging. Similarly if
867 every branch of an exhaustive switch (e.g. has an "else" clause)
868 declares a given name, then the scopes from the branches are
869 candidates for merging.
871 Note that names declared inside a loop (which is only parallel to
872 itself) are never visible after the loop. Similarly names defined in
873 scopes which are not parallel, such as those started by `for` and
874 `switch`, are never visible after the scope. Only variables defined in
875 both `then` and `else` (including the implicit then after an `if`, and
876 excluding `then` used with `for`) and in all `case`s and `else` of a
877 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
879 Labels, which are a bit like variables, follow different rules.
880 Labels are not explicitly declared, but if an undeclared name appears
881 in a context where a label is legal, that effectively declares the
882 name as a label. The declaration remains in force (or in scope) at
883 least to the end of the immediately containing block and conditionally
884 in any larger containing block which does not declare the name in some
885 other way. Importantly, the conditional scope extension happens even
886 if the label is only used in one parallel branch of a conditional --
887 when used in one branch it is treated as having been declared in all
890 Merge candidates are tentatively visible beyond the end of the
891 branching statement which creates them. If the name is used, the
892 merge is affirmed and they become a single variable visible at the
893 outer layer. If not - if it is redeclared first - the merge lapses.
895 To track scopes we have an extra stack, implemented as a linked list,
896 which roughly parallels the parse stack and which is used exclusively
897 for scoping. When a new scope is opened, a new frame is pushed and
898 the child-count of the parent frame is incremented. This child-count
899 is used to distinguish between the first of a set of parallel scopes,
900 in which declared variables must not be in scope, and subsequent
901 branches, whether they must already be conditionally scoped.
903 To push a new frame *before* any code in the frame is parsed, we need a
904 grammar reduction. This is most easily achieved with a grammar
905 element which derives the empty string, and creates the new scope when
906 it is recognized. This can be placed, for example, between a keyword
907 like "if" and the code following it.
911 struct scope *parent;
917 struct scope *scope_stack;
920 static void scope_pop(struct parse_context *c)
922 struct scope *s = c->scope_stack;
924 c->scope_stack = s->parent;
929 static void scope_push(struct parse_context *c)
931 struct scope *s = calloc(1, sizeof(*s));
933 c->scope_stack->child_count += 1;
934 s->parent = c->scope_stack;
942 OpenScope -> ${ scope_push(config2context(config)); }$
945 Each variable records a scope depth and is in one of four states:
947 - "in scope". This is the case between the declaration of the
948 variable and the end of the containing block, and also between
949 the usage with affirms a merge and the end of that block.
951 The scope depth is not greater than the current parse context scope
952 nest depth. When the block of that depth closes, the state will
953 change. To achieve this, all "in scope" variables are linked
954 together as a stack in nesting order.
956 - "pending". The "in scope" block has closed, but other parallel
957 scopes are still being processed. So far, every parallel block at
958 the same level that has closed has declared the name.
960 The scope depth is the depth of the last parallel block that
961 enclosed the declaration, and that has closed.
963 - "conditionally in scope". The "in scope" block and all parallel
964 scopes have closed, and no further mention of the name has been
965 seen. This state includes a secondary nest depth which records the
966 outermost scope seen since the variable became conditionally in
967 scope. If a use of the name is found, the variable becomes "in
968 scope" and that secondary depth becomes the recorded scope depth.
969 If the name is declared as a new variable, the old variable becomes
970 "out of scope" and the recorded scope depth stays unchanged.
972 - "out of scope". The variable is neither in scope nor conditionally
973 in scope. It is permanently out of scope now and can be removed from
974 the "in scope" stack.
977 ###### variable fields
978 int depth, min_depth;
979 enum { OutScope, PendingScope, CondScope, InScope } scope;
980 struct variable *in_scope;
984 struct variable *in_scope;
986 All variables with the same name are linked together using the
987 'previous' link. Those variable that have
988 been affirmatively merged all have a 'merged' pointer that points to
989 one primary variable - the most recently declared instance. When
990 merging variables, we need to also adjust the 'merged' pointer on any
991 other variables that had previously been merged with the one that will
992 no longer be primary.
994 ###### variable fields
995 struct variable *merged;
999 static void variable_merge(struct variable *primary, struct variable *secondary)
1003 if (primary->merged)
1005 primary = primary->merged;
1007 for (v = primary->previous; v; v=v->previous)
1008 if (v == secondary || v == secondary->merged ||
1009 v->merged == secondary ||
1010 (v->merged && v->merged == secondary->merged)) {
1011 v->scope = OutScope;
1012 v->merged = primary;
1016 ###### free context vars
1018 while (context.varlist) {
1019 struct binding *b = context.varlist;
1020 struct variable *v = b->var;
1021 context.varlist = b->next;
1024 struct variable *t = v;
1032 #### Manipulating Bindings
1034 When a name is conditionally visible, a new declaration discards the
1035 old binding - the condition lapses. Conversely a usage of the name
1036 affirms the visibility and extends it to the end of the containing
1037 block - i.e. the block that contains both the original declaration and
1038 the latest usage. This is determined from `min_depth`. When a
1039 conditionally visible variable gets affirmed like this, it is also
1040 merged with other conditionally visible variables with the same name.
1042 When we parse a variable declaration we either signal an error if the
1043 name is currently bound, or create a new variable at the current nest
1044 depth if the name is unbound or bound to a conditionally scoped or
1045 pending-scope variable. If the previous variable was conditionally
1046 scoped, it and its homonyms becomes out-of-scope.
1048 When we parse a variable reference (including non-declarative
1049 assignment) we signal an error if the name is not bound or is bound to
1050 a pending-scope variable; update the scope if the name is bound to a
1051 conditionally scoped variable; or just proceed normally if the named
1052 variable is in scope.
1054 When we exit a scope, any variables bound at this level are either
1055 marked out of scope or pending-scoped, depending on whether the
1056 scope was sequential or parallel.
1058 When exiting a parallel scope we check if there are any variables that
1059 were previously pending and are still visible. If there are, then
1060 there weren't redeclared in the most recent scope, so they cannot be
1061 merged and must become out-of-scope. If it is not the first of
1062 parallel scopes (based on `child_count`), we check that there was a
1063 previous binding that is still pending-scope. If there isn't, the new
1064 variable must now be out-of-scope.
1066 When exiting a sequential scope that immediately enclosed parallel
1067 scopes, we need to resolve any pending-scope variables. If there was
1068 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1069 we need to mark all pending-scope variable as out-of-scope. Otherwise
1070 all pending-scope variables become conditionally scoped.
1073 enum closetype { CloseSequential, CloseParallel, CloseElse };
1075 ###### ast functions
1077 static struct variable *var_decl(struct parse_context *c, struct text s)
1079 struct binding *b = find_binding(c, s);
1080 struct variable *v = b->var;
1082 switch (v ? v->scope : OutScope) {
1084 /* Caller will report the error */
1088 v && v->scope == CondScope;
1090 v->scope = OutScope;
1094 v = calloc(1, sizeof(*v));
1095 v->previous = b->var;
1098 v->min_depth = v->depth = c->scope_depth;
1100 v->in_scope = c->in_scope;
1102 v->val = val_init(NULL);
1106 static struct variable *var_ref(struct parse_context *c, struct text s)
1108 struct binding *b = find_binding(c, s);
1109 struct variable *v = b->var;
1110 struct variable *v2;
1112 switch (v ? v->scope : OutScope) {
1115 /* Signal an error - once that is possible */
1118 /* All CondScope variables of this name need to be merged
1119 * and become InScope
1121 v->depth = v->min_depth;
1123 for (v2 = v->previous;
1124 v2 && v2->scope == CondScope;
1126 variable_merge(v, v2);
1134 static void var_block_close(struct parse_context *c, enum closetype ct)
1136 /* close of all variables that are in_scope */
1137 struct variable *v, **vp, *v2;
1140 for (vp = &c->in_scope;
1141 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1145 case CloseParallel: /* handle PendingScope */
1149 if (c->scope_stack->child_count == 1)
1150 v->scope = PendingScope;
1151 else if (v->previous &&
1152 v->previous->scope == PendingScope)
1153 v->scope = PendingScope;
1154 else if (v->val.type == Tlabel)
1155 v->scope = PendingScope;
1156 else if (v->name->var == v)
1157 v->scope = OutScope;
1158 if (ct == CloseElse) {
1159 /* All Pending variables with this name
1160 * are now Conditional */
1162 v2 && v2->scope == PendingScope;
1164 v2->scope = CondScope;
1169 v2 && v2->scope == PendingScope;
1171 if (v2->val.type != Tlabel)
1172 v2->scope = OutScope;
1174 case OutScope: break;
1177 case CloseSequential:
1178 if (v->val.type == Tlabel)
1179 v->scope = PendingScope;
1182 v->scope = OutScope;
1185 /* There was no 'else', so we can only become
1186 * conditional if we know the cases were exhaustive,
1187 * and that doesn't mean anything yet.
1188 * So only labels become conditional..
1191 v2 && v2->scope == PendingScope;
1193 if (v2->val.type == Tlabel) {
1194 v2->scope = CondScope;
1195 v2->min_depth = c->scope_depth;
1197 v2->scope = OutScope;
1200 case OutScope: break;
1204 if (v->scope == OutScope)
1213 Executables can be lots of different things. In many cases an
1214 executable is just an operation combined with one or two other
1215 executables. This allows for expressions and lists etc. Other times
1216 an executable is something quite specific like a constant or variable
1217 name. So we define a `struct exec` to be a general executable with a
1218 type, and a `struct binode` which is a subclass of `exec`, forms a
1219 node in a binary tree, and holds an operation. There will be other
1220 subclasses, and to access these we need to be able to `cast` the
1221 `exec` into the various other types.
1224 #define cast(structname, pointer) ({ \
1225 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1226 if (__mptr && *__mptr != X##structname) abort(); \
1227 (struct structname *)( (char *)__mptr);})
1229 #define new(structname) ({ \
1230 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1231 __ptr->type = X##structname; \
1232 __ptr->line = -1; __ptr->column = -1; \
1235 #define new_pos(structname, token) ({ \
1236 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1237 __ptr->type = X##structname; \
1238 __ptr->line = token.line; __ptr->column = token.col; \
1247 enum exec_types type;
1255 struct exec *left, *right;
1258 ###### ast functions
1260 static int __fput_loc(struct exec *loc, FILE *f)
1262 if (loc->line >= 0) {
1263 fprintf(f, "%d:%d: ", loc->line, loc->column);
1266 if (loc->type == Xbinode)
1267 return __fput_loc(cast(binode,loc)->left, f) ||
1268 __fput_loc(cast(binode,loc)->right, f);
1271 static void fput_loc(struct exec *loc, FILE *f)
1273 if (!__fput_loc(loc, f))
1274 fprintf(f, "??:??: ");
1277 Each different type of `exec` node needs a number of functions
1278 defined, a bit like methods. We must be able to be able to free it,
1279 print it, analyse it and execute it. Once we have specific `exec`
1280 types we will need to parse them too. Let's take this a bit more
1285 The parser generator requires a `free_foo` function for each struct
1286 that stores attributes and they will be `exec`s and subtypes there-of.
1287 So we need `free_exec` which can handle all the subtypes, and we need
1290 ###### ast functions
1292 static void free_binode(struct binode *b)
1297 free_exec(b->right);
1301 ###### core functions
1302 static void free_exec(struct exec *e)
1311 ###### forward decls
1313 static void free_exec(struct exec *e);
1315 ###### free exec cases
1316 case Xbinode: free_binode(cast(binode, e)); break;
1320 Printing an `exec` requires that we know the current indent level for
1321 printing line-oriented components. As will become clear later, we
1322 also want to know what sort of bracketing to use.
1324 ###### ast functions
1326 static void do_indent(int i, char *str)
1333 ###### core functions
1334 static void print_binode(struct binode *b, int indent, int bracket)
1338 ## print binode cases
1342 static void print_exec(struct exec *e, int indent, int bracket)
1348 print_binode(cast(binode, e), indent, bracket); break;
1353 ###### forward decls
1355 static void print_exec(struct exec *e, int indent, int bracket);
1359 As discussed, analysis involves propagating type requirements around
1360 the program and looking for errors.
1362 So `propagate_types` is passed an expected type (being a `struct type`
1363 pointer together with some `val_rules` flags) that the `exec` is
1364 expected to return, and returns the type that it does return, either
1365 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1366 by reference. It is set to `0` when an error is found, and `2` when
1367 any change is made. If it remains unchanged at `1`, then no more
1368 propagation is needed.
1372 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
1376 if (rules & Rnolabel)
1377 fputs(" (labels not permitted)", stderr);
1380 ###### core functions
1382 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1383 struct type *type, int rules)
1390 switch (prog->type) {
1393 struct binode *b = cast(binode, prog);
1395 ## propagate binode cases
1399 ## propagate exec cases
1406 Interpreting an `exec` doesn't require anything but the `exec`. State
1407 is stored in variables and each variable will be directly linked from
1408 within the `exec` tree. The exception to this is the whole `program`
1409 which needs to look at command line arguments. The `program` will be
1410 interpreted separately.
1412 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1414 ###### core functions
1416 static struct value interp_exec(struct exec *e)
1426 struct binode *b = cast(binode, e);
1427 struct value left, right;
1428 left.type = right.type = Tnone;
1430 ## interp binode cases
1432 free_value(left); free_value(right);
1435 ## interp exec cases
1440 ## Language elements
1442 Each language element needs to be parsed, printed, analysed,
1443 interpreted, and freed. There are several, so let's just start with
1444 the easy ones and work our way up.
1448 We have already met values as separate objects. When manifest
1449 constants appear in the program text, that must result in an executable
1450 which has a constant value. So the `val` structure embeds a value in
1466 $0 = new_pos(val, $1);
1467 $0->val.type = Tbool;
1471 $0 = new_pos(val, $1);
1472 $0->val.type = Tbool;
1476 $0 = new_pos(val, $1);
1477 $0->val.type = Tnum;
1480 if (number_parse($0->val.num, tail, $1.txt) == 0)
1481 mpq_init($0->val.num);
1483 tok_err(config2context(config), "error: unsupported number suffix",
1488 $0 = new_pos(val, $1);
1489 $0->val.type = Tstr;
1492 string_parse(&$1, '\\', &$0->val.str, tail);
1494 tok_err(config2context(config), "error: unsupported string suffix",
1499 $0 = new_pos(val, $1);
1500 $0->val.type = Tstr;
1503 string_parse(&$1, '\\', &$0->val.str, tail);
1505 tok_err(config2context(config), "error: unsupported string suffix",
1510 ###### print exec cases
1513 struct val *v = cast(val, e);
1514 if (v->val.type == Tstr)
1516 print_value(v->val);
1517 if (v->val.type == Tstr)
1522 ###### propagate exec cases
1525 struct val *val = cast(val, prog);
1526 if (!vtype_compat(type, val->val.type, rules)) {
1527 type_err(c, "error: expected %1%r found %2",
1528 prog, type, rules, val->val.type);
1531 return val->val.type;
1534 ###### interp exec cases
1536 return dup_value(cast(val, e)->val);
1538 ###### ast functions
1539 static void free_val(struct val *v)
1547 ###### free exec cases
1548 case Xval: free_val(cast(val, e)); break;
1550 ###### ast functions
1551 // Move all nodes from 'b' to 'rv', reversing the order.
1552 // In 'b' 'left' is a list, and 'right' is the last node.
1553 // In 'rv', left' is the first node and 'right' is a list.
1554 static struct binode *reorder_bilist(struct binode *b)
1556 struct binode *rv = NULL;
1559 struct exec *t = b->right;
1563 b = cast(binode, b->left);
1573 Just as we used a `val` to wrap a value into an `exec`, we similarly
1574 need a `var` to wrap a `variable` into an exec. While each `val`
1575 contained a copy of the value, each `var` hold a link to the variable
1576 because it really is the same variable no matter where it appears.
1577 When a variable is used, we need to remember to follow the `->merged`
1578 link to find the primary instance.
1586 struct variable *var;
1592 VariableDecl -> IDENTIFIER : ${ {
1593 struct variable *v = var_decl(config2context(config), $1.txt);
1594 $0 = new_pos(var, $1);
1599 v = var_ref(config2context(config), $1.txt);
1601 type_err(config2context(config), "error: variable '%v' redeclared",
1602 $0, Tnone, 0, Tnone);
1603 type_err(config2context(config), "info: this is where '%v' was first declared",
1604 v->where_decl, Tnone, 0, Tnone);
1607 | IDENTIFIER :: ${ {
1608 struct variable *v = var_decl(config2context(config), $1.txt);
1609 $0 = new_pos(var, $1);
1615 v = var_ref(config2context(config), $1.txt);
1617 type_err(config2context(config), "error: variable '%v' redeclared",
1618 $0, Tnone, 0, Tnone);
1619 type_err(config2context(config), "info: this is where '%v' was first declared",
1620 v->where_decl, Tnone, 0, Tnone);
1623 | IDENTIFIER : Type ${ {
1624 struct variable *v = var_decl(config2context(config), $1.txt);
1625 $0 = new_pos(var, $1);
1630 v->val = val_init($<3);
1632 v = var_ref(config2context(config), $1.txt);
1634 type_err(config2context(config), "error: variable '%v' redeclared",
1635 $0, Tnone, 0, Tnone);
1636 type_err(config2context(config), "info: this is where '%v' was first declared",
1637 v->where_decl, Tnone, 0, Tnone);
1640 | IDENTIFIER :: Type ${ {
1641 struct variable *v = var_decl(config2context(config), $1.txt);
1642 $0 = new_pos(var, $1);
1647 v->val = val_init($<3);
1650 v = var_ref(config2context(config), $1.txt);
1652 type_err(config2context(config), "error: variable '%v' redeclared",
1653 $0, Tnone, 0, Tnone);
1654 type_err(config2context(config), "info: this is where '%v' was first declared",
1655 v->where_decl, Tnone, 0, Tnone);
1659 Variable -> IDENTIFIER ${ {
1660 struct variable *v = var_ref(config2context(config), $1.txt);
1661 $0 = new_pos(var, $1);
1663 /* This might be a label - allocate a var just in case */
1664 v = var_decl(config2context(config), $1.txt);
1666 v->val = val_init(Tlabel);
1667 v->val.label = &v->val;
1675 Type -> IDENTIFIER ${
1676 $0 = find_type(config2context(config), $1.txt);
1678 tok_err(config2context(config),
1679 "error: undefined type", &$1);
1685 ###### print exec cases
1688 struct var *v = cast(var, e);
1690 struct binding *b = v->var->name;
1691 printf("%.*s", b->name.len, b->name.txt);
1698 if (loc->type == Xvar) {
1699 struct var *v = cast(var, loc);
1701 struct binding *b = v->var->name;
1702 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1704 fputs("???", stderr);
1706 fputs("NOTVAR", stderr);
1709 ###### propagate exec cases
1713 struct var *var = cast(var, prog);
1714 struct variable *v = var->var;
1716 type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone);
1722 if (v->val.type == NULL) {
1723 if (type && *ok != 0) {
1724 v->val = val_init(type);
1725 v->where_set = prog;
1730 if (!vtype_compat(type, v->val.type, rules)) {
1731 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
1732 type, rules, v->val.type);
1733 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1734 v->val.type, rules, Tnone);
1742 ###### interp exec cases
1745 struct var *var = cast(var, e);
1746 struct variable *v = var->var;
1750 return dup_value(v->val);
1753 ###### ast functions
1755 static void free_var(struct var *v)
1760 ###### free exec cases
1761 case Xvar: free_var(cast(var, e)); break;
1763 ### Expressions: Boolean
1765 Our first user of the `binode` will be expressions, and particularly
1766 Boolean expressions. As I haven't implemented precedence in the
1767 parser generator yet, we need different names for each precedence
1768 level used by expressions. The outer most or lowest level precedence
1769 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1780 Expression -> Expression or BTerm ${ {
1781 struct binode *b = new(binode);
1787 | BTerm ${ $0 = $<1; }$
1789 BTerm -> BTerm and BFact ${ {
1790 struct binode *b = new(binode);
1796 | BFact ${ $0 = $<1; }$
1798 BFact -> not BFact ${ {
1799 struct binode *b = new(binode);
1806 ###### print binode cases
1808 print_exec(b->left, -1, 0);
1810 print_exec(b->right, -1, 0);
1813 print_exec(b->left, -1, 0);
1815 print_exec(b->right, -1, 0);
1819 print_exec(b->right, -1, 0);
1822 ###### propagate binode cases
1826 /* both must be Tbool, result is Tbool */
1827 propagate_types(b->left, c, ok, Tbool, 0);
1828 propagate_types(b->right, c, ok, Tbool, 0);
1829 if (type && type != Tbool) {
1830 type_err(c, "error: %1 operation found where %2 expected", prog,
1836 ###### interp binode cases
1838 rv = interp_exec(b->left);
1839 right = interp_exec(b->right);
1840 rv.bool = rv.bool && right.bool;
1843 rv = interp_exec(b->left);
1844 right = interp_exec(b->right);
1845 rv.bool = rv.bool || right.bool;
1848 rv = interp_exec(b->right);
1852 ### Expressions: Comparison
1854 Of slightly higher precedence that Boolean expressions are
1856 A comparison takes arguments of any type, but the two types must be
1859 To simplify the parsing we introduce an `eop` which can record an
1860 expression operator.
1867 ###### ast functions
1868 static void free_eop(struct eop *e)
1883 | Expr CMPop Expr ${ {
1884 struct binode *b = new(binode);
1890 | Expr ${ $0 = $<1; }$
1895 CMPop -> < ${ $0.op = Less; }$
1896 | > ${ $0.op = Gtr; }$
1897 | <= ${ $0.op = LessEq; }$
1898 | >= ${ $0.op = GtrEq; }$
1899 | == ${ $0.op = Eql; }$
1900 | != ${ $0.op = NEql; }$
1902 ###### print binode cases
1910 print_exec(b->left, -1, 0);
1912 case Less: printf(" < "); break;
1913 case LessEq: printf(" <= "); break;
1914 case Gtr: printf(" > "); break;
1915 case GtrEq: printf(" >= "); break;
1916 case Eql: printf(" == "); break;
1917 case NEql: printf(" != "); break;
1920 print_exec(b->right, -1, 0);
1923 ###### propagate binode cases
1930 /* Both must match but not be labels, result is Tbool */
1931 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
1933 propagate_types(b->right, c, ok, t, 0);
1935 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
1937 t = propagate_types(b->left, c, ok, t, 0);
1939 if (!vtype_compat(type, Tbool, 0)) {
1940 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1941 Tbool, rules, type);
1946 ###### interp binode cases
1955 left = interp_exec(b->left);
1956 right = interp_exec(b->right);
1957 cmp = value_cmp(left, right);
1960 case Less: rv.bool = cmp < 0; break;
1961 case LessEq: rv.bool = cmp <= 0; break;
1962 case Gtr: rv.bool = cmp > 0; break;
1963 case GtrEq: rv.bool = cmp >= 0; break;
1964 case Eql: rv.bool = cmp == 0; break;
1965 case NEql: rv.bool = cmp != 0; break;
1966 default: rv.bool = 0; break;
1971 ### Expressions: The rest
1973 The remaining expressions with the highest precedence are arithmetic
1974 and string concatenation. They are `Expr`, `Term`, and `Factor`.
1975 The `Factor` is where the `Value` and `Variable` that we already have
1978 `+` and `-` are both infix and prefix operations (where they are
1979 absolute value and negation). These have different operator names.
1981 We also have a 'Bracket' operator which records where parentheses were
1982 found. This makes it easy to reproduce these when printing. Once
1983 precedence is handled better I might be able to discard this.
1995 Expr -> Expr Eop Term ${ {
1996 struct binode *b = new(binode);
2002 | Term ${ $0 = $<1; }$
2004 Term -> Term Top Factor ${ {
2005 struct binode *b = new(binode);
2011 | Factor ${ $0 = $<1; }$
2013 Factor -> ( Expression ) ${ {
2014 struct binode *b = new_pos(binode, $1);
2020 struct binode *b = new(binode);
2025 | Value ${ $0 = $<1; }$
2026 | Variable ${ $0 = $<1; }$
2029 Eop -> + ${ $0.op = Plus; }$
2030 | - ${ $0.op = Minus; }$
2032 Uop -> + ${ $0.op = Absolute; }$
2033 | - ${ $0.op = Negate; }$
2035 Top -> * ${ $0.op = Times; }$
2036 | / ${ $0.op = Divide; }$
2037 | ++ ${ $0.op = Concat; }$
2039 ###### print binode cases
2045 print_exec(b->left, indent, 0);
2047 case Plus: printf(" + "); break;
2048 case Minus: printf(" - "); break;
2049 case Times: printf(" * "); break;
2050 case Divide: printf(" / "); break;
2051 case Concat: printf(" ++ "); break;
2054 print_exec(b->right, indent, 0);
2058 print_exec(b->right, indent, 0);
2062 print_exec(b->right, indent, 0);
2066 print_exec(b->right, indent, 0);
2070 ###### propagate binode cases
2075 /* both must be numbers, result is Tnum */
2078 /* as propagate_types ignores a NULL,
2079 * unary ops fit here too */
2080 propagate_types(b->left, c, ok, Tnum, 0);
2081 propagate_types(b->right, c, ok, Tnum, 0);
2082 if (!vtype_compat(type, Tnum, 0)) {
2083 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2090 /* both must be Tstr, result is Tstr */
2091 propagate_types(b->left, c, ok, Tstr, 0);
2092 propagate_types(b->right, c, ok, Tstr, 0);
2093 if (!vtype_compat(type, Tstr, 0)) {
2094 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2101 return propagate_types(b->right, c, ok, type, 0);
2103 ###### interp binode cases
2106 rv = interp_exec(b->left);
2107 right = interp_exec(b->right);
2108 mpq_add(rv.num, rv.num, right.num);
2111 rv = interp_exec(b->left);
2112 right = interp_exec(b->right);
2113 mpq_sub(rv.num, rv.num, right.num);
2116 rv = interp_exec(b->left);
2117 right = interp_exec(b->right);
2118 mpq_mul(rv.num, rv.num, right.num);
2121 rv = interp_exec(b->left);
2122 right = interp_exec(b->right);
2123 mpq_div(rv.num, rv.num, right.num);
2126 rv = interp_exec(b->right);
2127 mpq_neg(rv.num, rv.num);
2130 rv = interp_exec(b->right);
2131 mpq_abs(rv.num, rv.num);
2134 rv = interp_exec(b->right);
2137 left = interp_exec(b->left);
2138 right = interp_exec(b->right);
2140 rv.str = text_join(left.str, right.str);
2144 ###### value functions
2146 static struct text text_join(struct text a, struct text b)
2149 rv.len = a.len + b.len;
2150 rv.txt = malloc(rv.len);
2151 memcpy(rv.txt, a.txt, a.len);
2152 memcpy(rv.txt+a.len, b.txt, b.len);
2157 ### Blocks, Statements, and Statement lists.
2159 Now that we have expressions out of the way we need to turn to
2160 statements. There are simple statements and more complex statements.
2161 Simple statements do not contain newlines, complex statements do.
2163 Statements often come in sequences and we have corresponding simple
2164 statement lists and complex statement lists.
2165 The former comprise only simple statements separated by semicolons.
2166 The later comprise complex statements and simple statement lists. They are
2167 separated by newlines. Thus the semicolon is only used to separate
2168 simple statements on the one line. This may be overly restrictive,
2169 but I'm not sure I ever want a complex statement to share a line with
2172 Note that a simple statement list can still use multiple lines if
2173 subsequent lines are indented, so
2175 ###### Example: wrapped simple statement list
2180 is a single simple statement list. This might allow room for
2181 confusion, so I'm not set on it yet.
2183 A simple statement list needs no extra syntax. A complex statement
2184 list has two syntactic forms. It can be enclosed in braces (much like
2185 C blocks), or it can be introduced by a colon and continue until an
2186 unindented newline (much like Python blocks). With this extra syntax
2187 it is referred to as a block.
2189 Note that a block does not have to include any newlines if it only
2190 contains simple statements. So both of:
2192 if condition: a=b; d=f
2194 if condition { a=b; print f }
2198 In either case the list is constructed from a `binode` list with
2199 `Block` as the operator. When parsing the list it is most convenient
2200 to append to the end, so a list is a list and a statement. When using
2201 the list it is more convenient to consider a list to be a statement
2202 and a list. So we need a function to re-order a list.
2203 `reorder_bilist` serves this purpose.
2205 The only stand-alone statement we introduce at this stage is `pass`
2206 which does nothing and is represented as a `NULL` pointer in a `Block`
2207 list. Other stand-alone statements will follow once the infrastructure
2227 Block -> Open Statementlist Close ${ $0 = $<2; }$
2228 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2229 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2230 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2231 | : Statementlist ${ $0 = $<2; }$
2232 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2234 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2236 ComplexStatements -> ComplexStatements ComplexStatement ${
2242 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2243 | ComplexStatement ${
2251 ComplexStatement -> SimpleStatements NEWLINE ${
2252 $0 = reorder_bilist($<1);
2254 ## ComplexStatement Grammar
2257 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2263 | SimpleStatement ${
2269 | SimpleStatements ; ${ $0 = $<1; }$
2271 SimpleStatement -> pass ${ $0 = NULL; }$
2272 ## SimpleStatement Grammar
2274 ###### print binode cases
2278 if (b->left == NULL)
2281 print_exec(b->left, indent, 0);
2284 print_exec(b->right, indent, 0);
2287 // block, one per line
2288 if (b->left == NULL)
2289 do_indent(indent, "pass\n");
2291 print_exec(b->left, indent, bracket);
2293 print_exec(b->right, indent, bracket);
2297 ###### propagate binode cases
2300 /* If any statement returns something other than Tnone
2301 * or Tbool then all such must return same type.
2302 * As each statement may be Tnone or something else,
2303 * we must always pass NULL (unknown) down, otherwise an incorrect
2304 * error might occur. We never return Tnone unless it is
2309 for (e = b; e; e = cast(binode, e->right)) {
2310 t = propagate_types(e->left, c, ok, NULL, rules);
2311 if ((rules & Rboolok) && t == Tbool)
2313 if (t && t != Tnone && t != Tbool) {
2316 else if (t != type) {
2317 type_err(c, "error: expected %1%r, found %2",
2318 e->left, type, rules, t);
2326 ###### interp binode cases
2328 while (rv.type == Tnone &&
2331 rv = interp_exec(b->left);
2332 b = cast(binode, b->right);
2336 ### The Print statement
2338 `print` is a simple statement that takes a comma-separated list of
2339 expressions and prints the values separated by spaces and terminated
2340 by a newline. No control of formatting is possible.
2342 `print` faces the same list-ordering issue as blocks, and uses the
2348 ###### SimpleStatement Grammar
2350 | print ExpressionList ${
2351 $0 = reorder_bilist($<2);
2353 | print ExpressionList , ${
2358 $0 = reorder_bilist($0);
2369 ExpressionList -> ExpressionList , Expression ${
2382 ###### print binode cases
2385 do_indent(indent, "print");
2389 print_exec(b->left, -1, 0);
2393 b = cast(binode, b->right);
2399 ###### propagate binode cases
2402 /* don't care but all must be consistent */
2403 propagate_types(b->left, c, ok, NULL, Rnolabel);
2404 propagate_types(b->right, c, ok, NULL, Rnolabel);
2407 ###### interp binode cases
2413 for ( ; b; b = cast(binode, b->right))
2417 left = interp_exec(b->left);
2430 ###### Assignment statement
2432 An assignment will assign a value to a variable, providing it hasn't
2433 be declared as a constant. The analysis phase ensures that the type
2434 will be correct so the interpreter just needs to perform the
2435 calculation. There is a form of assignment which declares a new
2436 variable as well as assigning a value. If a name is assigned before
2437 it is declared, and error will be raised as the name is created as
2438 `Tlabel` and it is illegal to assign to such names.
2444 ###### SimpleStatement Grammar
2445 | Variable = Expression ${ {
2446 struct var *v = cast(var, $1);
2452 if (v->var && v->var->constant) {
2453 type_err(config2context(config), "Cannot assign to a constant: %v",
2454 $0->left, NULL, 0, NULL);
2455 type_err(config2context(config), "name was defined as a constant here",
2456 v->var->where_decl, NULL, 0, NULL);
2459 | VariableDecl = Expression ${
2467 if ($1->var->where_set == NULL) {
2468 type_err(config2context(config), "Variable declared with no type or value: %v",
2478 ###### print binode cases
2481 do_indent(indent, "");
2482 print_exec(b->left, indent, 0);
2484 print_exec(b->right, indent, 0);
2491 struct variable *v = cast(var, b->left)->var;
2492 do_indent(indent, "");
2493 print_exec(b->left, indent, 0);
2494 if (cast(var, b->left)->var->constant) {
2495 if (v->where_decl == v->where_set)
2496 printf("::%.*s ", v->val.type->name.len,
2497 v->val.type->name.txt);
2501 if (v->where_decl == v->where_set)
2502 printf(":%.*s ", v->val.type->name.len,
2503 v->val.type->name.txt);
2509 print_exec(b->right, indent, 0);
2516 ###### propagate binode cases
2520 /* Both must match and not be labels, result is Tnone */
2521 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2526 if (propagate_types(b->right, c, ok, t, 0) != t)
2527 if (b->left->type == Xvar)
2528 type_err(c, "info: variable '%v' was set as %1 here.",
2529 cast(var, b->left)->var->where_set, t, rules, Tnone);
2531 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2533 propagate_types(b->left, c, ok, t, 0);
2539 ###### interp binode cases
2543 struct variable *v = cast(var, b->left)->var;
2546 right = interp_exec(b->right);
2555 struct variable *v = cast(var, b->left)->var;
2559 right = interp_exec(b->right);
2561 right = val_init(v->val.type);
2568 ### The `use` statement
2570 The `use` statement is the last "simple" statement. It is needed when
2571 the condition in a conditional statement is a block. `use` works much
2572 like `return` in C, but only completes the `condition`, not the whole
2578 ###### SimpleStatement Grammar
2580 $0 = new_pos(binode, $1);
2585 ###### print binode cases
2588 do_indent(indent, "use ");
2589 print_exec(b->right, -1, 0);
2594 ###### propagate binode cases
2597 /* result matches value */
2598 return propagate_types(b->right, c, ok, type, 0);
2600 ###### interp binode cases
2603 rv = interp_exec(b->right);
2606 ### The Conditional Statement
2608 This is the biggy and currently the only complex statement. This
2609 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2610 It is comprised of a number of parts, all of which are optional though
2611 set combinations apply. Each part is (usually) a key word (`then` is
2612 sometimes optional) followed by either an expression or a code block,
2613 except the `casepart` which is a "key word and an expression" followed
2614 by a code block. The code-block option is valid for all parts and,
2615 where an expression is also allowed, the code block can use the `use`
2616 statement to report a value. If the code block does not report a value
2617 the effect is similar to reporting `True`.
2619 The `else` and `case` parts, as well as `then` when combined with
2620 `if`, can contain a `use` statement which will apply to some
2621 containing conditional statement. `for` parts, `do` parts and `then`
2622 parts used with `for` can never contain a `use`, except in some
2623 subordinate conditional statement.
2625 If there is a `forpart`, it is executed first, only once.
2626 If there is a `dopart`, then it is executed repeatedly providing
2627 always that the `condpart` or `cond`, if present, does not return a non-True
2628 value. `condpart` can fail to return any value if it simply executes
2629 to completion. This is treated the same as returning `True`.
2631 If there is a `thenpart` it will be executed whenever the `condpart`
2632 or `cond` returns True (or does not return any value), but this will happen
2633 *after* `dopart` (when present).
2635 If `elsepart` is present it will be executed at most once when the
2636 condition returns `False` or some value that isn't `True` and isn't
2637 matched by any `casepart`. If there are any `casepart`s, they will be
2638 executed when the condition returns a matching value.
2640 The particular sorts of values allowed in case parts has not yet been
2641 determined in the language design, so nothing is prohibited.
2643 The various blocks in this complex statement potentially provide scope
2644 for variables as described earlier. Each such block must include the
2645 "OpenScope" nonterminal before parsing the block, and must call
2646 `var_block_close()` when closing the block.
2648 The code following "`if`", "`switch`" and "`for`" does not get its own
2649 scope, but is in a scope covering the whole statement, so names
2650 declared there cannot be redeclared elsewhere. Similarly the
2651 condition following "`while`" is in a scope the covers the body
2652 ("`do`" part) of the loop, and which does not allow conditional scope
2653 extension. Code following "`then`" (both looping and non-looping),
2654 "`else`" and "`case`" each get their own local scope.
2656 The type requirements on the code block in a `whilepart` are quite
2657 unusal. It is allowed to return a value of some identifiable type, in
2658 which case the loop aborts and an appropriate `casepart` is run, or it
2659 can return a Boolean, in which case the loop either continues to the
2660 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2661 This is different both from the `ifpart` code block which is expected to
2662 return a Boolean, or the `switchpart` code block which is expected to
2663 return the same type as the casepart values. The correct analysis of
2664 the type of the `whilepart` code block is the reason for the
2665 `Rboolok` flag which is passed to `propagate_types()`.
2667 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2676 struct exec *action;
2677 struct casepart *next;
2679 struct cond_statement {
2681 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2682 struct casepart *casepart;
2685 ###### ast functions
2687 static void free_casepart(struct casepart *cp)
2691 free_exec(cp->value);
2692 free_exec(cp->action);
2699 static void free_cond_statement(struct cond_statement *s)
2703 free_exec(s->forpart);
2704 free_exec(s->condpart);
2705 free_exec(s->dopart);
2706 free_exec(s->thenpart);
2707 free_exec(s->elsepart);
2708 free_casepart(s->casepart);
2712 ###### free exec cases
2713 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2715 ###### ComplexStatement Grammar
2716 | CondStatement ${ $0 = $<1; }$
2721 // both ForThen and Whilepart open scopes, and CondSuffix only
2722 // closes one - so in the first branch here we have another to close.
2723 CondStatement -> ForThen WhilePart CondSuffix ${
2725 $0->forpart = $1.forpart; $1.forpart = NULL;
2726 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2727 $0->condpart = $2.condpart; $2.condpart = NULL;
2728 $0->dopart = $2.dopart; $2.dopart = NULL;
2729 var_block_close(config2context(config), CloseSequential);
2731 | WhilePart CondSuffix ${
2733 $0->condpart = $1.condpart; $1.condpart = NULL;
2734 $0->dopart = $1.dopart; $1.dopart = NULL;
2736 | SwitchPart CondSuffix ${
2740 | IfPart IfSuffix ${
2742 $0->condpart = $1.condpart; $1.condpart = NULL;
2743 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2744 // This is where we close an "if" statement
2745 var_block_close(config2context(config), CloseSequential);
2748 CondSuffix -> IfSuffix ${
2750 // This is where we close scope of the whole
2751 // "for" or "while" statement
2752 var_block_close(config2context(config), CloseSequential);
2754 | CasePart CondSuffix ${
2756 $1->next = $0->casepart;
2761 CasePart -> Newlines case Expression OpenScope Block ${
2762 $0 = calloc(1,sizeof(struct casepart));
2765 var_block_close(config2context(config), CloseParallel);
2767 | case Expression OpenScope Block ${
2768 $0 = calloc(1,sizeof(struct casepart));
2771 var_block_close(config2context(config), CloseParallel);
2775 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2776 | Newlines else OpenScope Block ${
2777 $0 = new(cond_statement);
2779 var_block_close(config2context(config), CloseElse);
2781 | else OpenScope Block ${
2782 $0 = new(cond_statement);
2784 var_block_close(config2context(config), CloseElse);
2786 | Newlines else OpenScope CondStatement ${
2787 $0 = new(cond_statement);
2789 var_block_close(config2context(config), CloseElse);
2791 | else OpenScope CondStatement ${
2792 $0 = new(cond_statement);
2794 var_block_close(config2context(config), CloseElse);
2799 // These scopes are closed in CondSuffix
2800 ForPart -> for OpenScope SimpleStatements ${
2801 $0 = reorder_bilist($<3);
2803 | for OpenScope Block ${
2807 ThenPart -> then OpenScope SimpleStatements ${
2808 $0 = reorder_bilist($<3);
2809 var_block_close(config2context(config), CloseSequential);
2811 | then OpenScope Block ${
2813 var_block_close(config2context(config), CloseSequential);
2816 ThenPartNL -> ThenPart OptNL ${
2820 // This scope is closed in CondSuffix
2821 WhileHead -> while OpenScope Block ${
2826 ForThen -> ForPart OptNL ThenPartNL ${
2834 // This scope is closed in CondSuffix
2835 WhilePart -> while OpenScope Expression Block ${
2836 $0.type = Xcond_statement;
2840 | WhileHead OptNL do Block ${
2841 $0.type = Xcond_statement;
2846 IfPart -> if OpenScope Expression OpenScope Block ${
2847 $0.type = Xcond_statement;
2850 var_block_close(config2context(config), CloseParallel);
2852 | if OpenScope Block OptNL then OpenScope Block ${
2853 $0.type = Xcond_statement;
2856 var_block_close(config2context(config), CloseParallel);
2860 // This scope is closed in CondSuffix
2861 SwitchPart -> switch OpenScope Expression ${
2864 | switch OpenScope Block ${
2868 ###### print exec cases
2870 case Xcond_statement:
2872 struct cond_statement *cs = cast(cond_statement, e);
2873 struct casepart *cp;
2875 do_indent(indent, "for");
2876 if (bracket) printf(" {\n"); else printf(":\n");
2877 print_exec(cs->forpart, indent+1, bracket);
2880 do_indent(indent, "} then {\n");
2882 do_indent(indent, "then:\n");
2883 print_exec(cs->thenpart, indent+1, bracket);
2885 if (bracket) do_indent(indent, "}\n");
2889 if (cs->condpart && cs->condpart->type == Xbinode &&
2890 cast(binode, cs->condpart)->op == Block) {
2892 do_indent(indent, "while {\n");
2894 do_indent(indent, "while:\n");
2895 print_exec(cs->condpart, indent+1, bracket);
2897 do_indent(indent, "} do {\n");
2899 do_indent(indent, "do:\n");
2900 print_exec(cs->dopart, indent+1, bracket);
2902 do_indent(indent, "}\n");
2904 do_indent(indent, "while ");
2905 print_exec(cs->condpart, 0, bracket);
2910 print_exec(cs->dopart, indent+1, bracket);
2912 do_indent(indent, "}\n");
2917 do_indent(indent, "switch");
2919 do_indent(indent, "if");
2920 if (cs->condpart && cs->condpart->type == Xbinode &&
2921 cast(binode, cs->condpart)->op == Block) {
2926 print_exec(cs->condpart, indent+1, bracket);
2928 do_indent(indent, "}\n");
2930 do_indent(indent, "then:\n");
2931 print_exec(cs->thenpart, indent+1, bracket);
2935 print_exec(cs->condpart, 0, bracket);
2941 print_exec(cs->thenpart, indent+1, bracket);
2943 do_indent(indent, "}\n");
2948 for (cp = cs->casepart; cp; cp = cp->next) {
2949 do_indent(indent, "case ");
2950 print_exec(cp->value, -1, 0);
2955 print_exec(cp->action, indent+1, bracket);
2957 do_indent(indent, "}\n");
2960 do_indent(indent, "else");
2965 print_exec(cs->elsepart, indent+1, bracket);
2967 do_indent(indent, "}\n");
2972 ###### propagate exec cases
2973 case Xcond_statement:
2975 // forpart and dopart must return Tnone
2976 // thenpart must return Tnone if there is a dopart,
2977 // otherwise it is like elsepart.
2979 // be bool if there is no casepart
2980 // match casepart->values if there is a switchpart
2981 // either be bool or match casepart->value if there
2983 // elsepart and casepart->action must match the return type
2984 // expected of this statement.
2985 struct cond_statement *cs = cast(cond_statement, prog);
2986 struct casepart *cp;
2988 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
2989 if (!vtype_compat(Tnone, t, 0))
2991 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
2992 if (!vtype_compat(Tnone, t, 0))
2995 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
2996 if (!vtype_compat(Tnone, t, 0))
2999 if (cs->casepart == NULL)
3000 propagate_types(cs->condpart, c, ok, Tbool, 0);
3002 /* Condpart must match case values, with bool permitted */
3004 for (cp = cs->casepart;
3005 cp && !t; cp = cp->next)
3006 t = propagate_types(cp->value, c, ok, NULL, 0);
3007 if (!t && cs->condpart)
3008 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3009 // Now we have a type (I hope) push it down
3011 for (cp = cs->casepart; cp; cp = cp->next)
3012 propagate_types(cp->value, c, ok, t, 0);
3013 propagate_types(cs->condpart, c, ok, t, Rboolok);
3016 // (if)then, else, and case parts must return expected type.
3017 if (!cs->dopart && !type)
3018 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3020 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3021 for (cp = cs->casepart;
3024 type = propagate_types(cp->action, c, ok, NULL, rules);
3027 propagate_types(cs->thenpart, c, ok, type, rules);
3028 propagate_types(cs->elsepart, c, ok, type, rules);
3029 for (cp = cs->casepart; cp ; cp = cp->next)
3030 propagate_types(cp->action, c, ok, type, rules);
3036 ###### interp exec cases
3037 case Xcond_statement:
3039 struct value v, cnd;
3040 struct casepart *cp;
3041 struct cond_statement *c = cast(cond_statement, e);
3044 interp_exec(c->forpart);
3047 cnd = interp_exec(c->condpart);
3050 if (!(cnd.type == Tnone ||
3051 (cnd.type == Tbool && cnd.bool != 0)))
3053 // cnd is Tnone or Tbool, doesn't need to be freed
3055 interp_exec(c->dopart);
3058 v = interp_exec(c->thenpart);
3059 if (v.type != Tnone || !c->dopart)
3063 } while (c->dopart);
3065 for (cp = c->casepart; cp; cp = cp->next) {
3066 v = interp_exec(cp->value);
3067 if (value_cmp(v, cnd) == 0) {
3070 return interp_exec(cp->action);
3076 return interp_exec(c->elsepart);
3081 ### Finally the whole program.
3083 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3084 the keyword "program" and a list of variable names which are assigned
3085 values from command line arguments. Following this is a `block` which
3086 is the code to execute.
3088 As this is the top level, several things are handled a bit
3090 The whole program is not interpreted by `interp_exec` as that isn't
3091 passed the argument list which the program requires. Similarly type
3092 analysis is a bit more interesting at this level.
3097 ###### Parser: grammar
3100 Program -> program OpenScope Varlist Block OptNL ${
3103 $0->left = reorder_bilist($<3);
3105 var_block_close(config2context(config), CloseSequential);
3106 if (config2context(config)->scope_stack) abort();
3109 tok_err(config2context(config),
3110 "error: unhandled parse error", &$1);
3113 Varlist -> Varlist ArgDecl ${
3122 ArgDecl -> IDENTIFIER ${ {
3123 struct variable *v = var_decl(config2context(config), $1.txt);
3130 ###### print binode cases
3132 do_indent(indent, "program");
3133 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3135 print_exec(b2->left, 0, 0);
3141 print_exec(b->right, indent+1, bracket);
3143 do_indent(indent, "}\n");
3146 ###### propagate binode cases
3147 case Program: abort();
3149 ###### core functions
3151 static int analyse_prog(struct exec *prog, struct parse_context *c)
3153 struct binode *b = cast(binode, prog);
3160 propagate_types(b->right, c, &ok, Tnone, 0);
3165 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3166 struct var *v = cast(var, b->left);
3167 if (!v->var->val.type) {
3168 v->var->where_set = b;
3169 v->var->val = val_init(Tstr);
3172 b = cast(binode, prog);
3175 propagate_types(b->right, c, &ok, Tnone, 0);
3180 /* Make sure everything is still consistent */
3181 propagate_types(b->right, c, &ok, Tnone, 0);
3185 static void interp_prog(struct exec *prog, char **argv)
3187 struct binode *p = cast(binode, prog);
3193 al = cast(binode, p->left);
3195 struct var *v = cast(var, al->left);
3196 struct value *vl = &v->var->val;
3198 if (argv[0] == NULL) {
3199 printf("Not enough args\n");
3202 al = cast(binode, al->right);
3204 *vl = parse_value(vl->type, argv[0]);
3205 if (vl->type == NULL)
3209 v = interp_exec(p->right);
3213 ###### interp binode cases
3214 case Program: abort();
3216 ## And now to test it out.
3218 Having a language requires having a "hello world" program. I'll
3219 provide a little more than that: a program that prints "Hello world"
3220 finds the GCD of two numbers, prints the first few elements of
3221 Fibonacci, and performs a binary search for a number.
3223 ###### File: oceani.mk
3226 @echo "===== TEST ====="
3227 ./oceani --section "test: hello" oceani.mdc 55 33
3232 print "Hello World, what lovely oceans you have!"
3233 /* When a variable is defined in both branches of an 'if',
3234 * and used afterwards, the variables are merged.
3240 print "Is", A, "bigger than", B,"? ", bigger
3241 /* If a variable is not used after the 'if', no
3242 * merge happens, so types can be different
3245 double:string = "yes"
3246 print A, "is more than twice", B, "?", double
3249 print "double", A, "is only", double
3260 print "GCD of", A, "and", B,"is", a
3262 print a, "is not positive, cannot calculate GCD"
3264 print b, "is not positive, cannot calculate GCD"
3269 print "Fibonacci:", f1,f2,
3270 then togo = togo - 1
3278 /* Binary search... */
3283 mid := (lo + hi) / 2
3295 print "Yay, I found", target
3297 print "Closest I found was", mid