1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing,
77 - Analyse the parsed program to ensure consistency,
79 - Execute the program.
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 This code must be compiled with `-fplan9-extensions` so that anonymous
92 structures can be used.
94 ###### File: oceani.mk
96 myCFLAGS := -Wall -g -fplan9-extensions
97 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
98 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
99 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
101 all :: $(LDLIBS) oceani
102 oceani.c oceani.h : oceani.mdc parsergen
103 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
104 oceani.mk: oceani.mdc md2c
107 oceani: oceani.o $(LDLIBS)
108 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
110 ###### Parser: header
113 struct parse_context {
114 struct token_config config;
122 #define container_of(ptr, type, member) ({ \
123 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
124 (type *)( (char *)__mptr - offsetof(type,member) );})
126 #define config2context(_conf) container_of(_conf, struct parse_context, \
135 #include <sys/mman.h>
154 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
155 "--section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_line_comment)
175 | (1 << TK_block_comment),
176 .number_chars = ".,_+-",
181 int doprint=0, dotrace=0, doexec=1, brackets=0;
184 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
187 case 't': dotrace=1; break;
188 case 'p': doprint=1; break;
189 case 'n': doexec=0; break;
190 case 'b': brackets=1; break;
191 case 's': section = optarg; break;
192 default: fprintf(stderr, Usage);
196 if (optind >= argc) {
197 fprintf(stderr, "oceani: no input file given\n");
200 fd = open(argv[optind], O_RDONLY);
202 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
205 context.file_name = argv[optind];
206 len = lseek(fd, 0, 2);
207 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
208 s = code_extract(file, file+len, NULL);
210 fprintf(stderr, "oceani: could not find any code in %s\n",
215 ## context initialization
219 for (ss = s; ss; ss = ss->next) {
220 struct text sec = ss->section;
221 if (sec.len == strlen(section) &&
222 strncmp(sec.txt, section, sec.len) == 0)
226 prog = parse_oceani(ss->code, &context.config,
227 dotrace ? stderr : NULL);
229 fprintf(stderr, "oceani: cannot find section %s\n",
234 prog = parse_oceani(s->code, &context.config,
235 dotrace ? stderr : NULL);
237 fprintf(stderr, "oceani: fatal parser error.\n");
238 context.parse_error = 1;
241 print_exec(*prog, 0, brackets);
242 if (prog && doexec && !context.parse_error) {
243 if (!analyse_prog(*prog, &context)) {
244 fprintf(stderr, "oceani: type error in program - not running.\n");
247 interp_prog(*prog, argv+optind+1);
254 struct section *t = s->next;
260 ## free context types
261 exit(context.parse_error ? 1 : 0);
266 The four requirements of parse, analyse, print, interpret apply to
267 each language element individually so that is how most of the code
270 Three of the four are fairly self explanatory. The one that requires
271 a little explanation is the analysis step.
273 The current language design does not require the types of variables to
274 be declared, but they must still have a single type. Different
275 operations impose different requirements on the variables, for example
276 addition requires both arguments to be numeric, and assignment
277 requires the variable on the left to have the same type as the
278 expression on the right.
280 Analysis involves propagating these type requirements around and
281 consequently setting the type of each variable. If any requirements
282 are violated (e.g. a string is compared with a number) or if a
283 variable needs to have two different types, then an error is raised
284 and the program will not run.
286 If the same variable is declared in both branchs of an 'if/else', or
287 in all cases of a 'switch' then the multiple instances may be merged
288 into just one variable if the variable is references after the
289 conditional statement. When this happens, the types must naturally be
290 consistent across all the branches. When the variable is not used
291 outside the if, the variables in the different branches are distinct
292 and can be of different types.
294 Determining the types of all variables early is important for
295 processing command line arguments. These can be assigned to any type
296 of variable, but we must first know the correct type so any required
297 conversion can happen. If a variable is associated with a command
298 line argument but no type can be interpreted (e.g. the variable is
299 only ever used in a `print` statement), then the type is set to
302 Undeclared names may only appear in "use" statements and "case" expressions.
303 These names are given a type of "label" and a unique value.
304 This allows them to fill the role of a name in an enumerated type, which
305 is useful for testing the `switch` statement.
307 As we will see, the condition part of a `while` statement can return
308 either a Boolean or some other type. This requires that the expect
309 type that gets passed around comprises a type (`enum vtype`) and a
310 flag to indicate that `Vbool` is also permitted.
312 As there are, as yet, no distinct types that are compatible, there
313 isn't much subtlety in the analysis. When we have distinct number
314 types, this will become more interesting.
318 When analysis discovers an inconsistency it needs to report an error;
319 just refusing to run the code ensures that the error doesn't cascade,
320 but by itself it isn't very useful. A clear understand of the sort of
321 error message that are useful will help guide the process of analysis.
323 At a simplistic level, the only sort of error that type analysis can
324 report is that the type of some construct doesn't match a contextual
325 requirement. For example, in `4 + "hello"` the addition provides a
326 contextual requirement for numbers, but `"hello"` is not a number. In
327 this particular example no further information is needed as the types
328 are obvious from local information. When a variable is involved that
329 isn't the case. It may be helpful to explain why the variable has a
330 particular type, by indicating the location where the type was set,
331 whether by declaration or usage.
333 Using a recursive-descent analysis we can easily detect a problem at
334 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
335 will detect that one argument is not a number and the usage of `hello`
336 will detect that a number was wanted, but not provided. In this
337 (early) version of the language, we will generate error reports at
338 multiple locations, so the use of `hello` will report an error and
339 explain were the value was set, and the addition will report an error
340 and say why numbers are needed. To be able to report locations for
341 errors, each language element will need to record a file location
342 (line and column) and each variable will need to record the language
343 element where its type was set. For now we will assume that each line
344 of an error message indicates one location in the file, and up to 2
345 types. So we provide a `printf`-like function which takes a format, a
346 language (a `struct exec` which has not yet been introduced), and 2
347 types. "`%1`" reports the first type, "`%2`" reports the second. We
348 will need a function to print the location, once we know how that is
349 stored. As will be explained later, there are sometimes extra rules for
350 type matching and they might affect error messages, we need to pass those
353 As well as type errors, we sometimes need to report problems with
354 tokens, which might be unexpected or might name a type that has not
355 been defined. For these we have `tok_err()` which reports an error
356 with a given token. Each of the error functions sets the flag in the
357 context so indicate that parsing failed.
361 static void fput_loc(struct exec *loc, FILE *f);
363 ###### core functions
365 static void type_err(struct parse_context *c,
366 char *fmt, struct exec *loc,
367 struct type *t1, int rules, struct type *t2)
369 fprintf(stderr, "%s:", c->file_name);
370 fput_loc(loc, stderr);
371 for (; *fmt ; fmt++) {
378 case '%': fputc(*fmt, stderr); break;
379 default: fputc('?', stderr); break;
382 fprintf(stderr, "%.*s", t1->name.len, t1->name.txt);
384 fputs("*unknown*", stderr);
388 fprintf(stderr, "%.*s", t2->name.len, t2->name.txt);
390 fputs("*unknown*", stderr);
400 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
402 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
403 t->txt.len, t->txt.txt);
409 One last introductory step before detailing the language elements and
410 providing their four requirements is to establish the data structures
411 to store these elements.
413 There are two key objects that we need to work with: executable
414 elements which comprise the program, and values which the program
415 works with. Between these are the variables in their various scopes
416 which hold the values, and types which classify the values stored and
417 manipulatd by executables.
421 Values come in a wide range of types, with more likely to be added.
422 Each type needs to be able to parse and print its own values (for
423 convenience at least) as well as to compare two values, at least for
424 equality and possibly for order. For now, values might need to be
425 duplicated and freed, though eventually such manipulations will be
426 better integrated into the language.
428 Rather than requiring every numeric type to support all numeric
429 operations (add, multiple, etc), we allow types to be able to present
430 as one of a few standard types: integer, float, and fraction. The
431 existance of these conversion functions enable types to determine if
432 they are compatible with other types.
434 Named type are stored in a simple linked list. Objects of each type are "values"
435 which are often passed around by value.
442 ## value union fields
449 struct value (*init)(struct type *type);
450 struct value (*prepare)(struct type *type);
451 struct value (*parse)(struct type *type, char *str);
452 void (*print)(struct value val);
453 int (*cmp_order)(struct value v1, struct value v2);
454 int (*cmp_eq)(struct value v1, struct value v2);
455 struct value (*dup)(struct value val);
456 void (*free)(struct value val);
457 int (*compat)(struct type *this, struct type *other);
458 long long (*to_int)(struct value *v);
459 double (*to_float)(struct value *v);
460 int (*to_mpq)(mpq_t *q, struct value *v);
468 struct type *typelist;
472 static struct type *find_type(struct parse_context *c, struct text s)
474 struct type *l = c->typelist;
477 text_cmp(l->name, s) != 0)
482 static struct type *add_type(struct parse_context *c, struct text s,
487 n = calloc(1, sizeof(*n));
490 n->next = c->typelist;
495 static void free_type(struct type *t)
497 /* The type is always a reference to something in the
498 * context, so we don't need to free anything.
502 static void free_value(struct value v)
508 static int type_compat(struct type *require, struct type *have, int rules)
510 if ((rules & Rboolok) && have == Tbool)
512 if ((rules & Rnolabel) && have == Tlabel)
514 if (!require || !have)
518 return require->compat(require, have);
520 return require == have;
523 static struct value val_prepare(struct type *type)
528 return type->prepare(type);
533 static struct value val_init(struct type *type)
538 return type->init(type);
543 static struct value dup_value(struct value v)
546 return v.type->dup(v);
550 static int value_cmp(struct value left, struct value right)
552 if (left.type && left.type->cmp_order)
553 return left.type->cmp_order(left, right);
554 if (left.type && left.type->cmp_eq)
555 return left.type->cmp_eq(left, right);
559 static void print_value(struct value v)
561 if (v.type && v.type->print)
567 static struct value parse_value(struct type *type, char *arg)
571 if (type && type->parse)
572 return type->parse(type, arg);
577 ###### free context types
579 while (context.typelist) {
580 struct type *t = context.typelist;
582 context.typelist = t->next;
588 Values of the base types can be numbers, which we represent as
589 multi-precision fractions, strings, Booleans and labels. When
590 analysing the program we also need to allow for places where no value
591 is meaningful (type `Tnone`) and where we don't know what type to
592 expect yet (type is `NULL`).
594 Values are never shared, they are always copied when used, and freed
595 when no longer needed.
597 When propagating type information around the program, we need to
598 determine if two types are compatible, where type `NULL` is compatible
599 with anything. There are two special cases with type compatibility,
600 both related to the Conditional Statement which will be described
601 later. In some cases a Boolean can be accepted as well as some other
602 primary type, and in others any type is acceptable except a label (`Vlabel`).
603 A separate function encode these cases will simplify some code later.
605 When assigning command line arguments to variables, we need to be able
606 to parse each type from a string.
614 myLDLIBS := libnumber.o libstring.o -lgmp
615 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
617 ###### type union fields
618 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
620 ###### value union fields
627 static void _free_value(struct value v)
629 switch (v.type->vtype) {
631 case Vstr: free(v.str.txt); break;
632 case Vnum: mpq_clear(v.num); break;
638 ###### value functions
640 static struct value _val_prepare(struct type *type)
645 switch(type->vtype) {
649 memset(&rv.num, 0, sizeof(rv.num));
665 static struct value _val_init(struct type *type)
670 switch(type->vtype) {
674 mpq_init(rv.num); break;
676 rv.str.txt = malloc(1);
689 static struct value _dup_value(struct value v)
693 switch (rv.type->vtype) {
704 mpq_set(rv.num, v.num);
707 rv.str.len = v.str.len;
708 rv.str.txt = malloc(rv.str.len);
709 memcpy(rv.str.txt, v.str.txt, v.str.len);
715 static int _value_cmp(struct value left, struct value right)
718 if (left.type != right.type)
719 return left.type - right.type;
720 switch (left.type->vtype) {
721 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
722 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
723 case Vstr: cmp = text_cmp(left.str, right.str); break;
724 case Vbool: cmp = left.bool - right.bool; break;
730 static void _print_value(struct value v)
732 switch (v.type->vtype) {
734 printf("*no-value*"); break;
736 printf("*label-%p*", v.label); break;
738 printf("%.*s", v.str.len, v.str.txt); break;
740 printf("%s", v.bool ? "True":"False"); break;
745 mpf_set_q(fl, v.num);
746 gmp_printf("%Fg", fl);
753 static struct value _parse_value(struct type *type, char *arg)
761 switch(type->vtype) {
767 val.str.len = strlen(arg);
768 val.str.txt = malloc(val.str.len);
769 memcpy(val.str.txt, arg, val.str.len);
776 tx.txt = arg; tx.len = strlen(tx.txt);
777 if (number_parse(val.num, tail, tx) == 0)
780 mpq_neg(val.num, val.num);
782 printf("Unsupported suffix: %s\n", arg);
787 if (strcasecmp(arg, "true") == 0 ||
788 strcmp(arg, "1") == 0)
790 else if (strcasecmp(arg, "false") == 0 ||
791 strcmp(arg, "0") == 0)
794 printf("Bad bool: %s\n", arg);
802 static void _free_value(struct value v);
804 static struct type base_prototype = {
806 .prepare = _val_prepare,
807 .parse = _parse_value,
808 .print = _print_value,
809 .cmp_order = _value_cmp,
810 .cmp_eq = _value_cmp,
815 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
818 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
820 struct text txt = { n, strlen(n) };
823 t = add_type(c, txt, &base_prototype);
828 ###### context initialization
830 Tbool = add_base_type(&context, "Boolean", Vbool);
831 Tstr = add_base_type(&context, "string", Vstr);
832 Tnum = add_base_type(&context, "number", Vnum);
833 Tnone = add_base_type(&context, "none", Vnone);
834 Tlabel = add_base_type(&context, "label", Vlabel);
838 Variables are scoped named values. We store the names in a linked
839 list of "bindings" sorted lexically, and use sequential search and
846 struct binding *next; // in lexical order
850 This linked list is stored in the parse context so that "reduce"
851 functions can find or add variables, and so the analysis phase can
852 ensure that every variable gets a type.
856 struct binding *varlist; // In lexical order
860 static struct binding *find_binding(struct parse_context *c, struct text s)
862 struct binding **l = &c->varlist;
867 (cmp = text_cmp((*l)->name, s)) < 0)
871 n = calloc(1, sizeof(*n));
878 Each name can be linked to multiple variables defined in different
879 scopes. Each scope starts where the name is declared and continues
880 until the end of the containing code block. Scopes of a given name
881 cannot nest, so a declaration while a name is in-scope is an error.
883 ###### binding fields
884 struct variable *var;
888 struct variable *previous;
890 struct binding *name;
891 struct exec *where_decl;// where name was declared
892 struct exec *where_set; // where type was set
896 While the naming seems strange, we include local constants in the
897 definition of variables. A name declared `var := value` can
898 subsequently be changed, but a name declared `var ::= value` cannot -
901 ###### variable fields
904 Scopes in parallel branches can be partially merged. More
905 specifically, if a given name is declared in both branches of an
906 if/else then its scope is a candidate for merging. Similarly if
907 every branch of an exhaustive switch (e.g. has an "else" clause)
908 declares a given name, then the scopes from the branches are
909 candidates for merging.
911 Note that names declared inside a loop (which is only parallel to
912 itself) are never visible after the loop. Similarly names defined in
913 scopes which are not parallel, such as those started by `for` and
914 `switch`, are never visible after the scope. Only variables defined in
915 both `then` and `else` (including the implicit then after an `if`, and
916 excluding `then` used with `for`) and in all `case`s and `else` of a
917 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
919 Labels, which are a bit like variables, follow different rules.
920 Labels are not explicitly declared, but if an undeclared name appears
921 in a context where a label is legal, that effectively declares the
922 name as a label. The declaration remains in force (or in scope) at
923 least to the end of the immediately containing block and conditionally
924 in any larger containing block which does not declare the name in some
925 other way. Importantly, the conditional scope extension happens even
926 if the label is only used in one parallel branch of a conditional --
927 when used in one branch it is treated as having been declared in all
930 Merge candidates are tentatively visible beyond the end of the
931 branching statement which creates them. If the name is used, the
932 merge is affirmed and they become a single variable visible at the
933 outer layer. If not - if it is redeclared first - the merge lapses.
935 To track scopes we have an extra stack, implemented as a linked list,
936 which roughly parallels the parse stack and which is used exclusively
937 for scoping. When a new scope is opened, a new frame is pushed and
938 the child-count of the parent frame is incremented. This child-count
939 is used to distinguish between the first of a set of parallel scopes,
940 in which declared variables must not be in scope, and subsequent
941 branches, whether they must already be conditionally scoped.
943 To push a new frame *before* any code in the frame is parsed, we need a
944 grammar reduction. This is most easily achieved with a grammar
945 element which derives the empty string, and creates the new scope when
946 it is recognized. This can be placed, for example, between a keyword
947 like "if" and the code following it.
951 struct scope *parent;
957 struct scope *scope_stack;
960 static void scope_pop(struct parse_context *c)
962 struct scope *s = c->scope_stack;
964 c->scope_stack = s->parent;
969 static void scope_push(struct parse_context *c)
971 struct scope *s = calloc(1, sizeof(*s));
973 c->scope_stack->child_count += 1;
974 s->parent = c->scope_stack;
982 OpenScope -> ${ scope_push(config2context(config)); }$
985 Each variable records a scope depth and is in one of four states:
987 - "in scope". This is the case between the declaration of the
988 variable and the end of the containing block, and also between
989 the usage with affirms a merge and the end of that block.
991 The scope depth is not greater than the current parse context scope
992 nest depth. When the block of that depth closes, the state will
993 change. To achieve this, all "in scope" variables are linked
994 together as a stack in nesting order.
996 - "pending". The "in scope" block has closed, but other parallel
997 scopes are still being processed. So far, every parallel block at
998 the same level that has closed has declared the name.
1000 The scope depth is the depth of the last parallel block that
1001 enclosed the declaration, and that has closed.
1003 - "conditionally in scope". The "in scope" block and all parallel
1004 scopes have closed, and no further mention of the name has been
1005 seen. This state includes a secondary nest depth which records the
1006 outermost scope seen since the variable became conditionally in
1007 scope. If a use of the name is found, the variable becomes "in
1008 scope" and that secondary depth becomes the recorded scope depth.
1009 If the name is declared as a new variable, the old variable becomes
1010 "out of scope" and the recorded scope depth stays unchanged.
1012 - "out of scope". The variable is neither in scope nor conditionally
1013 in scope. It is permanently out of scope now and can be removed from
1014 the "in scope" stack.
1017 ###### variable fields
1018 int depth, min_depth;
1019 enum { OutScope, PendingScope, CondScope, InScope } scope;
1020 struct variable *in_scope;
1022 ###### parse context
1024 struct variable *in_scope;
1026 All variables with the same name are linked together using the
1027 'previous' link. Those variable that have
1028 been affirmatively merged all have a 'merged' pointer that points to
1029 one primary variable - the most recently declared instance. When
1030 merging variables, we need to also adjust the 'merged' pointer on any
1031 other variables that had previously been merged with the one that will
1032 no longer be primary.
1034 ###### variable fields
1035 struct variable *merged;
1037 ###### ast functions
1039 static void variable_merge(struct variable *primary, struct variable *secondary)
1043 if (primary->merged)
1045 primary = primary->merged;
1047 for (v = primary->previous; v; v=v->previous)
1048 if (v == secondary || v == secondary->merged ||
1049 v->merged == secondary ||
1050 (v->merged && v->merged == secondary->merged)) {
1051 v->scope = OutScope;
1052 v->merged = primary;
1056 ###### free context vars
1058 while (context.varlist) {
1059 struct binding *b = context.varlist;
1060 struct variable *v = b->var;
1061 context.varlist = b->next;
1064 struct variable *t = v;
1072 #### Manipulating Bindings
1074 When a name is conditionally visible, a new declaration discards the
1075 old binding - the condition lapses. Conversely a usage of the name
1076 affirms the visibility and extends it to the end of the containing
1077 block - i.e. the block that contains both the original declaration and
1078 the latest usage. This is determined from `min_depth`. When a
1079 conditionally visible variable gets affirmed like this, it is also
1080 merged with other conditionally visible variables with the same name.
1082 When we parse a variable declaration we either signal an error if the
1083 name is currently bound, or create a new variable at the current nest
1084 depth if the name is unbound or bound to a conditionally scoped or
1085 pending-scope variable. If the previous variable was conditionally
1086 scoped, it and its homonyms becomes out-of-scope.
1088 When we parse a variable reference (including non-declarative
1089 assignment) we signal an error if the name is not bound or is bound to
1090 a pending-scope variable; update the scope if the name is bound to a
1091 conditionally scoped variable; or just proceed normally if the named
1092 variable is in scope.
1094 When we exit a scope, any variables bound at this level are either
1095 marked out of scope or pending-scoped, depending on whether the
1096 scope was sequential or parallel.
1098 When exiting a parallel scope we check if there are any variables that
1099 were previously pending and are still visible. If there are, then
1100 there weren't redeclared in the most recent scope, so they cannot be
1101 merged and must become out-of-scope. If it is not the first of
1102 parallel scopes (based on `child_count`), we check that there was a
1103 previous binding that is still pending-scope. If there isn't, the new
1104 variable must now be out-of-scope.
1106 When exiting a sequential scope that immediately enclosed parallel
1107 scopes, we need to resolve any pending-scope variables. If there was
1108 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1109 we need to mark all pending-scope variable as out-of-scope. Otherwise
1110 all pending-scope variables become conditionally scoped.
1113 enum closetype { CloseSequential, CloseParallel, CloseElse };
1115 ###### ast functions
1117 static struct variable *var_decl(struct parse_context *c, struct text s)
1119 struct binding *b = find_binding(c, s);
1120 struct variable *v = b->var;
1122 switch (v ? v->scope : OutScope) {
1124 /* Caller will report the error */
1128 v && v->scope == CondScope;
1130 v->scope = OutScope;
1134 v = calloc(1, sizeof(*v));
1135 v->previous = b->var;
1138 v->min_depth = v->depth = c->scope_depth;
1140 v->in_scope = c->in_scope;
1142 v->val = val_prepare(NULL);
1146 static struct variable *var_ref(struct parse_context *c, struct text s)
1148 struct binding *b = find_binding(c, s);
1149 struct variable *v = b->var;
1150 struct variable *v2;
1152 switch (v ? v->scope : OutScope) {
1155 /* Signal an error - once that is possible */
1158 /* All CondScope variables of this name need to be merged
1159 * and become InScope
1161 v->depth = v->min_depth;
1163 for (v2 = v->previous;
1164 v2 && v2->scope == CondScope;
1166 variable_merge(v, v2);
1174 static void var_block_close(struct parse_context *c, enum closetype ct)
1176 /* close of all variables that are in_scope */
1177 struct variable *v, **vp, *v2;
1180 for (vp = &c->in_scope;
1181 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1185 case CloseParallel: /* handle PendingScope */
1189 if (c->scope_stack->child_count == 1)
1190 v->scope = PendingScope;
1191 else if (v->previous &&
1192 v->previous->scope == PendingScope)
1193 v->scope = PendingScope;
1194 else if (v->val.type == Tlabel)
1195 v->scope = PendingScope;
1196 else if (v->name->var == v)
1197 v->scope = OutScope;
1198 if (ct == CloseElse) {
1199 /* All Pending variables with this name
1200 * are now Conditional */
1202 v2 && v2->scope == PendingScope;
1204 v2->scope = CondScope;
1209 v2 && v2->scope == PendingScope;
1211 if (v2->val.type != Tlabel)
1212 v2->scope = OutScope;
1214 case OutScope: break;
1217 case CloseSequential:
1218 if (v->val.type == Tlabel)
1219 v->scope = PendingScope;
1222 v->scope = OutScope;
1225 /* There was no 'else', so we can only become
1226 * conditional if we know the cases were exhaustive,
1227 * and that doesn't mean anything yet.
1228 * So only labels become conditional..
1231 v2 && v2->scope == PendingScope;
1233 if (v2->val.type == Tlabel) {
1234 v2->scope = CondScope;
1235 v2->min_depth = c->scope_depth;
1237 v2->scope = OutScope;
1240 case OutScope: break;
1244 if (v->scope == OutScope)
1253 Executables can be lots of different things. In many cases an
1254 executable is just an operation combined with one or two other
1255 executables. This allows for expressions and lists etc. Other times
1256 an executable is something quite specific like a constant or variable
1257 name. So we define a `struct exec` to be a general executable with a
1258 type, and a `struct binode` which is a subclass of `exec`, forms a
1259 node in a binary tree, and holds an operation. There will be other
1260 subclasses, and to access these we need to be able to `cast` the
1261 `exec` into the various other types.
1264 #define cast(structname, pointer) ({ \
1265 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1266 if (__mptr && *__mptr != X##structname) abort(); \
1267 (struct structname *)( (char *)__mptr);})
1269 #define new(structname) ({ \
1270 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1271 __ptr->type = X##structname; \
1272 __ptr->line = -1; __ptr->column = -1; \
1275 #define new_pos(structname, token) ({ \
1276 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1277 __ptr->type = X##structname; \
1278 __ptr->line = token.line; __ptr->column = token.col; \
1287 enum exec_types type;
1295 struct exec *left, *right;
1298 ###### ast functions
1300 static int __fput_loc(struct exec *loc, FILE *f)
1302 if (loc->line >= 0) {
1303 fprintf(f, "%d:%d: ", loc->line, loc->column);
1306 if (loc->type == Xbinode)
1307 return __fput_loc(cast(binode,loc)->left, f) ||
1308 __fput_loc(cast(binode,loc)->right, f);
1311 static void fput_loc(struct exec *loc, FILE *f)
1313 if (!__fput_loc(loc, f))
1314 fprintf(f, "??:??: ");
1317 Each different type of `exec` node needs a number of functions
1318 defined, a bit like methods. We must be able to be able to free it,
1319 print it, analyse it and execute it. Once we have specific `exec`
1320 types we will need to parse them too. Let's take this a bit more
1325 The parser generator requires a `free_foo` function for each struct
1326 that stores attributes and they will be `exec`s and subtypes there-of.
1327 So we need `free_exec` which can handle all the subtypes, and we need
1330 ###### ast functions
1332 static void free_binode(struct binode *b)
1337 free_exec(b->right);
1341 ###### core functions
1342 static void free_exec(struct exec *e)
1351 ###### forward decls
1353 static void free_exec(struct exec *e);
1355 ###### free exec cases
1356 case Xbinode: free_binode(cast(binode, e)); break;
1360 Printing an `exec` requires that we know the current indent level for
1361 printing line-oriented components. As will become clear later, we
1362 also want to know what sort of bracketing to use.
1364 ###### ast functions
1366 static void do_indent(int i, char *str)
1373 ###### core functions
1374 static void print_binode(struct binode *b, int indent, int bracket)
1378 ## print binode cases
1382 static void print_exec(struct exec *e, int indent, int bracket)
1388 print_binode(cast(binode, e), indent, bracket); break;
1393 ###### forward decls
1395 static void print_exec(struct exec *e, int indent, int bracket);
1399 As discussed, analysis involves propagating type requirements around
1400 the program and looking for errors.
1402 So `propagate_types` is passed an expected type (being a `struct type`
1403 pointer together with some `val_rules` flags) that the `exec` is
1404 expected to return, and returns the type that it does return, either
1405 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1406 by reference. It is set to `0` when an error is found, and `2` when
1407 any change is made. If it remains unchanged at `1`, then no more
1408 propagation is needed.
1412 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
1416 if (rules & Rnolabel)
1417 fputs(" (labels not permitted)", stderr);
1420 ###### core functions
1422 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1423 struct type *type, int rules)
1430 switch (prog->type) {
1433 struct binode *b = cast(binode, prog);
1435 ## propagate binode cases
1439 ## propagate exec cases
1446 Interpreting an `exec` doesn't require anything but the `exec`. State
1447 is stored in variables and each variable will be directly linked from
1448 within the `exec` tree. The exception to this is the whole `program`
1449 which needs to look at command line arguments. The `program` will be
1450 interpreted separately.
1452 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1454 ###### core functions
1456 static struct value interp_exec(struct exec *e)
1466 struct binode *b = cast(binode, e);
1467 struct value left, right;
1468 left.type = right.type = Tnone;
1470 ## interp binode cases
1472 free_value(left); free_value(right);
1475 ## interp exec cases
1480 ## Language elements
1482 Each language element needs to be parsed, printed, analysed,
1483 interpreted, and freed. There are several, so let's just start with
1484 the easy ones and work our way up.
1488 We have already met values as separate objects. When manifest
1489 constants appear in the program text, that must result in an executable
1490 which has a constant value. So the `val` structure embeds a value in
1506 $0 = new_pos(val, $1);
1507 $0->val.type = Tbool;
1511 $0 = new_pos(val, $1);
1512 $0->val.type = Tbool;
1516 $0 = new_pos(val, $1);
1517 $0->val.type = Tnum;
1520 if (number_parse($0->val.num, tail, $1.txt) == 0)
1521 mpq_init($0->val.num);
1523 tok_err(config2context(config), "error: unsupported number suffix",
1528 $0 = new_pos(val, $1);
1529 $0->val.type = Tstr;
1532 string_parse(&$1, '\\', &$0->val.str, tail);
1534 tok_err(config2context(config), "error: unsupported string suffix",
1539 $0 = new_pos(val, $1);
1540 $0->val.type = Tstr;
1543 string_parse(&$1, '\\', &$0->val.str, tail);
1545 tok_err(config2context(config), "error: unsupported string suffix",
1550 ###### print exec cases
1553 struct val *v = cast(val, e);
1554 if (v->val.type == Tstr)
1556 print_value(v->val);
1557 if (v->val.type == Tstr)
1562 ###### propagate exec cases
1565 struct val *val = cast(val, prog);
1566 if (!type_compat(type, val->val.type, rules)) {
1567 type_err(c, "error: expected %1%r found %2",
1568 prog, type, rules, val->val.type);
1571 return val->val.type;
1574 ###### interp exec cases
1576 return dup_value(cast(val, e)->val);
1578 ###### ast functions
1579 static void free_val(struct val *v)
1587 ###### free exec cases
1588 case Xval: free_val(cast(val, e)); break;
1590 ###### ast functions
1591 // Move all nodes from 'b' to 'rv', reversing the order.
1592 // In 'b' 'left' is a list, and 'right' is the last node.
1593 // In 'rv', left' is the first node and 'right' is a list.
1594 static struct binode *reorder_bilist(struct binode *b)
1596 struct binode *rv = NULL;
1599 struct exec *t = b->right;
1603 b = cast(binode, b->left);
1613 Just as we used a `val` to wrap a value into an `exec`, we similarly
1614 need a `var` to wrap a `variable` into an exec. While each `val`
1615 contained a copy of the value, each `var` hold a link to the variable
1616 because it really is the same variable no matter where it appears.
1617 When a variable is used, we need to remember to follow the `->merged`
1618 link to find the primary instance.
1626 struct variable *var;
1632 VariableDecl -> IDENTIFIER : ${ {
1633 struct variable *v = var_decl(config2context(config), $1.txt);
1634 $0 = new_pos(var, $1);
1639 v = var_ref(config2context(config), $1.txt);
1641 type_err(config2context(config), "error: variable '%v' redeclared",
1642 $0, Tnone, 0, Tnone);
1643 type_err(config2context(config), "info: this is where '%v' was first declared",
1644 v->where_decl, Tnone, 0, Tnone);
1647 | IDENTIFIER :: ${ {
1648 struct variable *v = var_decl(config2context(config), $1.txt);
1649 $0 = new_pos(var, $1);
1655 v = var_ref(config2context(config), $1.txt);
1657 type_err(config2context(config), "error: variable '%v' redeclared",
1658 $0, Tnone, 0, Tnone);
1659 type_err(config2context(config), "info: this is where '%v' was first declared",
1660 v->where_decl, Tnone, 0, Tnone);
1663 | IDENTIFIER : Type ${ {
1664 struct variable *v = var_decl(config2context(config), $1.txt);
1665 $0 = new_pos(var, $1);
1670 v->val = val_prepare($<3);
1672 v = var_ref(config2context(config), $1.txt);
1674 type_err(config2context(config), "error: variable '%v' redeclared",
1675 $0, Tnone, 0, Tnone);
1676 type_err(config2context(config), "info: this is where '%v' was first declared",
1677 v->where_decl, Tnone, 0, Tnone);
1680 | IDENTIFIER :: Type ${ {
1681 struct variable *v = var_decl(config2context(config), $1.txt);
1682 $0 = new_pos(var, $1);
1687 v->val = val_prepare($<3);
1690 v = var_ref(config2context(config), $1.txt);
1692 type_err(config2context(config), "error: variable '%v' redeclared",
1693 $0, Tnone, 0, Tnone);
1694 type_err(config2context(config), "info: this is where '%v' was first declared",
1695 v->where_decl, Tnone, 0, Tnone);
1699 Variable -> IDENTIFIER ${ {
1700 struct variable *v = var_ref(config2context(config), $1.txt);
1701 $0 = new_pos(var, $1);
1703 /* This might be a label - allocate a var just in case */
1704 v = var_decl(config2context(config), $1.txt);
1706 v->val = val_prepare(Tlabel);
1707 v->val.label = &v->val;
1715 Type -> IDENTIFIER ${
1716 $0 = find_type(config2context(config), $1.txt);
1718 tok_err(config2context(config),
1719 "error: undefined type", &$1);
1725 ###### print exec cases
1728 struct var *v = cast(var, e);
1730 struct binding *b = v->var->name;
1731 printf("%.*s", b->name.len, b->name.txt);
1738 if (loc->type == Xvar) {
1739 struct var *v = cast(var, loc);
1741 struct binding *b = v->var->name;
1742 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1744 fputs("???", stderr);
1746 fputs("NOTVAR", stderr);
1749 ###### propagate exec cases
1753 struct var *var = cast(var, prog);
1754 struct variable *v = var->var;
1756 type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone);
1762 if (v->val.type == NULL) {
1763 if (type && *ok != 0) {
1764 v->val = val_prepare(type);
1765 v->where_set = prog;
1770 if (!type_compat(type, v->val.type, rules)) {
1771 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
1772 type, rules, v->val.type);
1773 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1774 v->val.type, rules, Tnone);
1782 ###### interp exec cases
1785 struct var *var = cast(var, e);
1786 struct variable *v = var->var;
1790 return dup_value(v->val);
1793 ###### ast functions
1795 static void free_var(struct var *v)
1800 ###### free exec cases
1801 case Xvar: free_var(cast(var, e)); break;
1803 ### Expressions: Boolean
1805 Our first user of the `binode` will be expressions, and particularly
1806 Boolean expressions. As I haven't implemented precedence in the
1807 parser generator yet, we need different names for each precedence
1808 level used by expressions. The outer most or lowest level precedence
1809 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1820 Expression -> Expression or BTerm ${ {
1821 struct binode *b = new(binode);
1827 | BTerm ${ $0 = $<1; }$
1829 BTerm -> BTerm and BFact ${ {
1830 struct binode *b = new(binode);
1836 | BFact ${ $0 = $<1; }$
1838 BFact -> not BFact ${ {
1839 struct binode *b = new(binode);
1846 ###### print binode cases
1848 print_exec(b->left, -1, 0);
1850 print_exec(b->right, -1, 0);
1853 print_exec(b->left, -1, 0);
1855 print_exec(b->right, -1, 0);
1859 print_exec(b->right, -1, 0);
1862 ###### propagate binode cases
1866 /* both must be Tbool, result is Tbool */
1867 propagate_types(b->left, c, ok, Tbool, 0);
1868 propagate_types(b->right, c, ok, Tbool, 0);
1869 if (type && type != Tbool) {
1870 type_err(c, "error: %1 operation found where %2 expected", prog,
1876 ###### interp binode cases
1878 rv = interp_exec(b->left);
1879 right = interp_exec(b->right);
1880 rv.bool = rv.bool && right.bool;
1883 rv = interp_exec(b->left);
1884 right = interp_exec(b->right);
1885 rv.bool = rv.bool || right.bool;
1888 rv = interp_exec(b->right);
1892 ### Expressions: Comparison
1894 Of slightly higher precedence that Boolean expressions are
1896 A comparison takes arguments of any type, but the two types must be
1899 To simplify the parsing we introduce an `eop` which can record an
1900 expression operator.
1907 ###### ast functions
1908 static void free_eop(struct eop *e)
1923 | Expr CMPop Expr ${ {
1924 struct binode *b = new(binode);
1930 | Expr ${ $0 = $<1; }$
1935 CMPop -> < ${ $0.op = Less; }$
1936 | > ${ $0.op = Gtr; }$
1937 | <= ${ $0.op = LessEq; }$
1938 | >= ${ $0.op = GtrEq; }$
1939 | == ${ $0.op = Eql; }$
1940 | != ${ $0.op = NEql; }$
1942 ###### print binode cases
1950 print_exec(b->left, -1, 0);
1952 case Less: printf(" < "); break;
1953 case LessEq: printf(" <= "); break;
1954 case Gtr: printf(" > "); break;
1955 case GtrEq: printf(" >= "); break;
1956 case Eql: printf(" == "); break;
1957 case NEql: printf(" != "); break;
1960 print_exec(b->right, -1, 0);
1963 ###### propagate binode cases
1970 /* Both must match but not be labels, result is Tbool */
1971 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
1973 propagate_types(b->right, c, ok, t, 0);
1975 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
1977 t = propagate_types(b->left, c, ok, t, 0);
1979 if (!type_compat(type, Tbool, 0)) {
1980 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1981 Tbool, rules, type);
1986 ###### interp binode cases
1995 left = interp_exec(b->left);
1996 right = interp_exec(b->right);
1997 cmp = value_cmp(left, right);
2000 case Less: rv.bool = cmp < 0; break;
2001 case LessEq: rv.bool = cmp <= 0; break;
2002 case Gtr: rv.bool = cmp > 0; break;
2003 case GtrEq: rv.bool = cmp >= 0; break;
2004 case Eql: rv.bool = cmp == 0; break;
2005 case NEql: rv.bool = cmp != 0; break;
2006 default: rv.bool = 0; break;
2011 ### Expressions: The rest
2013 The remaining expressions with the highest precedence are arithmetic
2014 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2015 The `Factor` is where the `Value` and `Variable` that we already have
2018 `+` and `-` are both infix and prefix operations (where they are
2019 absolute value and negation). These have different operator names.
2021 We also have a 'Bracket' operator which records where parentheses were
2022 found. This makes it easy to reproduce these when printing. Once
2023 precedence is handled better I might be able to discard this.
2035 Expr -> Expr Eop Term ${ {
2036 struct binode *b = new(binode);
2042 | Term ${ $0 = $<1; }$
2044 Term -> Term Top Factor ${ {
2045 struct binode *b = new(binode);
2051 | Factor ${ $0 = $<1; }$
2053 Factor -> ( Expression ) ${ {
2054 struct binode *b = new_pos(binode, $1);
2060 struct binode *b = new(binode);
2065 | Value ${ $0 = $<1; }$
2066 | Variable ${ $0 = $<1; }$
2069 Eop -> + ${ $0.op = Plus; }$
2070 | - ${ $0.op = Minus; }$
2072 Uop -> + ${ $0.op = Absolute; }$
2073 | - ${ $0.op = Negate; }$
2075 Top -> * ${ $0.op = Times; }$
2076 | / ${ $0.op = Divide; }$
2077 | ++ ${ $0.op = Concat; }$
2079 ###### print binode cases
2085 print_exec(b->left, indent, 0);
2087 case Plus: printf(" + "); break;
2088 case Minus: printf(" - "); break;
2089 case Times: printf(" * "); break;
2090 case Divide: printf(" / "); break;
2091 case Concat: printf(" ++ "); break;
2094 print_exec(b->right, indent, 0);
2098 print_exec(b->right, indent, 0);
2102 print_exec(b->right, indent, 0);
2106 print_exec(b->right, indent, 0);
2110 ###### propagate binode cases
2115 /* both must be numbers, result is Tnum */
2118 /* as propagate_types ignores a NULL,
2119 * unary ops fit here too */
2120 propagate_types(b->left, c, ok, Tnum, 0);
2121 propagate_types(b->right, c, ok, Tnum, 0);
2122 if (!type_compat(type, Tnum, 0)) {
2123 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2130 /* both must be Tstr, result is Tstr */
2131 propagate_types(b->left, c, ok, Tstr, 0);
2132 propagate_types(b->right, c, ok, Tstr, 0);
2133 if (!type_compat(type, Tstr, 0)) {
2134 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2141 return propagate_types(b->right, c, ok, type, 0);
2143 ###### interp binode cases
2146 rv = interp_exec(b->left);
2147 right = interp_exec(b->right);
2148 mpq_add(rv.num, rv.num, right.num);
2151 rv = interp_exec(b->left);
2152 right = interp_exec(b->right);
2153 mpq_sub(rv.num, rv.num, right.num);
2156 rv = interp_exec(b->left);
2157 right = interp_exec(b->right);
2158 mpq_mul(rv.num, rv.num, right.num);
2161 rv = interp_exec(b->left);
2162 right = interp_exec(b->right);
2163 mpq_div(rv.num, rv.num, right.num);
2166 rv = interp_exec(b->right);
2167 mpq_neg(rv.num, rv.num);
2170 rv = interp_exec(b->right);
2171 mpq_abs(rv.num, rv.num);
2174 rv = interp_exec(b->right);
2177 left = interp_exec(b->left);
2178 right = interp_exec(b->right);
2180 rv.str = text_join(left.str, right.str);
2184 ###### value functions
2186 static struct text text_join(struct text a, struct text b)
2189 rv.len = a.len + b.len;
2190 rv.txt = malloc(rv.len);
2191 memcpy(rv.txt, a.txt, a.len);
2192 memcpy(rv.txt+a.len, b.txt, b.len);
2197 ### Blocks, Statements, and Statement lists.
2199 Now that we have expressions out of the way we need to turn to
2200 statements. There are simple statements and more complex statements.
2201 Simple statements do not contain newlines, complex statements do.
2203 Statements often come in sequences and we have corresponding simple
2204 statement lists and complex statement lists.
2205 The former comprise only simple statements separated by semicolons.
2206 The later comprise complex statements and simple statement lists. They are
2207 separated by newlines. Thus the semicolon is only used to separate
2208 simple statements on the one line. This may be overly restrictive,
2209 but I'm not sure I ever want a complex statement to share a line with
2212 Note that a simple statement list can still use multiple lines if
2213 subsequent lines are indented, so
2215 ###### Example: wrapped simple statement list
2220 is a single simple statement list. This might allow room for
2221 confusion, so I'm not set on it yet.
2223 A simple statement list needs no extra syntax. A complex statement
2224 list has two syntactic forms. It can be enclosed in braces (much like
2225 C blocks), or it can be introduced by a colon and continue until an
2226 unindented newline (much like Python blocks). With this extra syntax
2227 it is referred to as a block.
2229 Note that a block does not have to include any newlines if it only
2230 contains simple statements. So both of:
2232 if condition: a=b; d=f
2234 if condition { a=b; print f }
2238 In either case the list is constructed from a `binode` list with
2239 `Block` as the operator. When parsing the list it is most convenient
2240 to append to the end, so a list is a list and a statement. When using
2241 the list it is more convenient to consider a list to be a statement
2242 and a list. So we need a function to re-order a list.
2243 `reorder_bilist` serves this purpose.
2245 The only stand-alone statement we introduce at this stage is `pass`
2246 which does nothing and is represented as a `NULL` pointer in a `Block`
2247 list. Other stand-alone statements will follow once the infrastructure
2267 Block -> Open Statementlist Close ${ $0 = $<2; }$
2268 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2269 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2270 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2271 | : Statementlist ${ $0 = $<2; }$
2272 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2274 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2276 ComplexStatements -> ComplexStatements ComplexStatement ${
2282 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2283 | ComplexStatement ${
2291 ComplexStatement -> SimpleStatements NEWLINE ${
2292 $0 = reorder_bilist($<1);
2294 ## ComplexStatement Grammar
2297 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2303 | SimpleStatement ${
2309 | SimpleStatements ; ${ $0 = $<1; }$
2311 SimpleStatement -> pass ${ $0 = NULL; }$
2312 ## SimpleStatement Grammar
2314 ###### print binode cases
2318 if (b->left == NULL)
2321 print_exec(b->left, indent, 0);
2324 print_exec(b->right, indent, 0);
2327 // block, one per line
2328 if (b->left == NULL)
2329 do_indent(indent, "pass\n");
2331 print_exec(b->left, indent, bracket);
2333 print_exec(b->right, indent, bracket);
2337 ###### propagate binode cases
2340 /* If any statement returns something other than Tnone
2341 * or Tbool then all such must return same type.
2342 * As each statement may be Tnone or something else,
2343 * we must always pass NULL (unknown) down, otherwise an incorrect
2344 * error might occur. We never return Tnone unless it is
2349 for (e = b; e; e = cast(binode, e->right)) {
2350 t = propagate_types(e->left, c, ok, NULL, rules);
2351 if ((rules & Rboolok) && t == Tbool)
2353 if (t && t != Tnone && t != Tbool) {
2356 else if (t != type) {
2357 type_err(c, "error: expected %1%r, found %2",
2358 e->left, type, rules, t);
2366 ###### interp binode cases
2368 while (rv.type == Tnone &&
2371 rv = interp_exec(b->left);
2372 b = cast(binode, b->right);
2376 ### The Print statement
2378 `print` is a simple statement that takes a comma-separated list of
2379 expressions and prints the values separated by spaces and terminated
2380 by a newline. No control of formatting is possible.
2382 `print` faces the same list-ordering issue as blocks, and uses the
2388 ###### SimpleStatement Grammar
2390 | print ExpressionList ${
2391 $0 = reorder_bilist($<2);
2393 | print ExpressionList , ${
2398 $0 = reorder_bilist($0);
2409 ExpressionList -> ExpressionList , Expression ${
2422 ###### print binode cases
2425 do_indent(indent, "print");
2429 print_exec(b->left, -1, 0);
2433 b = cast(binode, b->right);
2439 ###### propagate binode cases
2442 /* don't care but all must be consistent */
2443 propagate_types(b->left, c, ok, NULL, Rnolabel);
2444 propagate_types(b->right, c, ok, NULL, Rnolabel);
2447 ###### interp binode cases
2453 for ( ; b; b = cast(binode, b->right))
2457 left = interp_exec(b->left);
2470 ###### Assignment statement
2472 An assignment will assign a value to a variable, providing it hasn't
2473 be declared as a constant. The analysis phase ensures that the type
2474 will be correct so the interpreter just needs to perform the
2475 calculation. There is a form of assignment which declares a new
2476 variable as well as assigning a value. If a name is assigned before
2477 it is declared, and error will be raised as the name is created as
2478 `Tlabel` and it is illegal to assign to such names.
2484 ###### SimpleStatement Grammar
2485 | Variable = Expression ${ {
2486 struct var *v = cast(var, $1);
2492 if (v->var && v->var->constant) {
2493 type_err(config2context(config), "Cannot assign to a constant: %v",
2494 $0->left, NULL, 0, NULL);
2495 type_err(config2context(config), "name was defined as a constant here",
2496 v->var->where_decl, NULL, 0, NULL);
2499 | VariableDecl = Expression ${
2507 if ($1->var->where_set == NULL) {
2508 type_err(config2context(config), "Variable declared with no type or value: %v",
2518 ###### print binode cases
2521 do_indent(indent, "");
2522 print_exec(b->left, indent, 0);
2524 print_exec(b->right, indent, 0);
2531 struct variable *v = cast(var, b->left)->var;
2532 do_indent(indent, "");
2533 print_exec(b->left, indent, 0);
2534 if (cast(var, b->left)->var->constant) {
2535 if (v->where_decl == v->where_set)
2536 printf("::%.*s ", v->val.type->name.len,
2537 v->val.type->name.txt);
2541 if (v->where_decl == v->where_set)
2542 printf(":%.*s ", v->val.type->name.len,
2543 v->val.type->name.txt);
2549 print_exec(b->right, indent, 0);
2556 ###### propagate binode cases
2560 /* Both must match and not be labels,
2561 * Type must support 'dup',
2562 * result is Tnone */
2563 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2568 if (propagate_types(b->right, c, ok, t, 0) != t)
2569 if (b->left->type == Xvar)
2570 type_err(c, "info: variable '%v' was set as %1 here.",
2571 cast(var, b->left)->var->where_set, t, rules, Tnone);
2573 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2575 propagate_types(b->left, c, ok, t, 0);
2577 if (t && t->dup == NULL) {
2578 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
2585 ###### interp binode cases
2589 struct variable *v = cast(var, b->left)->var;
2592 right = interp_exec(b->right);
2601 struct variable *v = cast(var, b->left)->var;
2605 right = interp_exec(b->right);
2607 right = val_init(v->val.type);
2614 ### The `use` statement
2616 The `use` statement is the last "simple" statement. It is needed when
2617 the condition in a conditional statement is a block. `use` works much
2618 like `return` in C, but only completes the `condition`, not the whole
2624 ###### SimpleStatement Grammar
2626 $0 = new_pos(binode, $1);
2631 ###### print binode cases
2634 do_indent(indent, "use ");
2635 print_exec(b->right, -1, 0);
2640 ###### propagate binode cases
2643 /* result matches value */
2644 return propagate_types(b->right, c, ok, type, 0);
2646 ###### interp binode cases
2649 rv = interp_exec(b->right);
2652 ### The Conditional Statement
2654 This is the biggy and currently the only complex statement. This
2655 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2656 It is comprised of a number of parts, all of which are optional though
2657 set combinations apply. Each part is (usually) a key word (`then` is
2658 sometimes optional) followed by either an expression or a code block,
2659 except the `casepart` which is a "key word and an expression" followed
2660 by a code block. The code-block option is valid for all parts and,
2661 where an expression is also allowed, the code block can use the `use`
2662 statement to report a value. If the code block does not report a value
2663 the effect is similar to reporting `True`.
2665 The `else` and `case` parts, as well as `then` when combined with
2666 `if`, can contain a `use` statement which will apply to some
2667 containing conditional statement. `for` parts, `do` parts and `then`
2668 parts used with `for` can never contain a `use`, except in some
2669 subordinate conditional statement.
2671 If there is a `forpart`, it is executed first, only once.
2672 If there is a `dopart`, then it is executed repeatedly providing
2673 always that the `condpart` or `cond`, if present, does not return a non-True
2674 value. `condpart` can fail to return any value if it simply executes
2675 to completion. This is treated the same as returning `True`.
2677 If there is a `thenpart` it will be executed whenever the `condpart`
2678 or `cond` returns True (or does not return any value), but this will happen
2679 *after* `dopart` (when present).
2681 If `elsepart` is present it will be executed at most once when the
2682 condition returns `False` or some value that isn't `True` and isn't
2683 matched by any `casepart`. If there are any `casepart`s, they will be
2684 executed when the condition returns a matching value.
2686 The particular sorts of values allowed in case parts has not yet been
2687 determined in the language design, so nothing is prohibited.
2689 The various blocks in this complex statement potentially provide scope
2690 for variables as described earlier. Each such block must include the
2691 "OpenScope" nonterminal before parsing the block, and must call
2692 `var_block_close()` when closing the block.
2694 The code following "`if`", "`switch`" and "`for`" does not get its own
2695 scope, but is in a scope covering the whole statement, so names
2696 declared there cannot be redeclared elsewhere. Similarly the
2697 condition following "`while`" is in a scope the covers the body
2698 ("`do`" part) of the loop, and which does not allow conditional scope
2699 extension. Code following "`then`" (both looping and non-looping),
2700 "`else`" and "`case`" each get their own local scope.
2702 The type requirements on the code block in a `whilepart` are quite
2703 unusal. It is allowed to return a value of some identifiable type, in
2704 which case the loop aborts and an appropriate `casepart` is run, or it
2705 can return a Boolean, in which case the loop either continues to the
2706 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2707 This is different both from the `ifpart` code block which is expected to
2708 return a Boolean, or the `switchpart` code block which is expected to
2709 return the same type as the casepart values. The correct analysis of
2710 the type of the `whilepart` code block is the reason for the
2711 `Rboolok` flag which is passed to `propagate_types()`.
2713 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2722 struct exec *action;
2723 struct casepart *next;
2725 struct cond_statement {
2727 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2728 struct casepart *casepart;
2731 ###### ast functions
2733 static void free_casepart(struct casepart *cp)
2737 free_exec(cp->value);
2738 free_exec(cp->action);
2745 static void free_cond_statement(struct cond_statement *s)
2749 free_exec(s->forpart);
2750 free_exec(s->condpart);
2751 free_exec(s->dopart);
2752 free_exec(s->thenpart);
2753 free_exec(s->elsepart);
2754 free_casepart(s->casepart);
2758 ###### free exec cases
2759 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2761 ###### ComplexStatement Grammar
2762 | CondStatement ${ $0 = $<1; }$
2767 // both ForThen and Whilepart open scopes, and CondSuffix only
2768 // closes one - so in the first branch here we have another to close.
2769 CondStatement -> ForThen WhilePart CondSuffix ${
2771 $0->forpart = $1.forpart; $1.forpart = NULL;
2772 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2773 $0->condpart = $2.condpart; $2.condpart = NULL;
2774 $0->dopart = $2.dopart; $2.dopart = NULL;
2775 var_block_close(config2context(config), CloseSequential);
2777 | WhilePart CondSuffix ${
2779 $0->condpart = $1.condpart; $1.condpart = NULL;
2780 $0->dopart = $1.dopart; $1.dopart = NULL;
2782 | SwitchPart CondSuffix ${
2786 | IfPart IfSuffix ${
2788 $0->condpart = $1.condpart; $1.condpart = NULL;
2789 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2790 // This is where we close an "if" statement
2791 var_block_close(config2context(config), CloseSequential);
2794 CondSuffix -> IfSuffix ${
2796 // This is where we close scope of the whole
2797 // "for" or "while" statement
2798 var_block_close(config2context(config), CloseSequential);
2800 | CasePart CondSuffix ${
2802 $1->next = $0->casepart;
2807 CasePart -> Newlines case Expression OpenScope Block ${
2808 $0 = calloc(1,sizeof(struct casepart));
2811 var_block_close(config2context(config), CloseParallel);
2813 | case Expression OpenScope Block ${
2814 $0 = calloc(1,sizeof(struct casepart));
2817 var_block_close(config2context(config), CloseParallel);
2821 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2822 | Newlines else OpenScope Block ${
2823 $0 = new(cond_statement);
2825 var_block_close(config2context(config), CloseElse);
2827 | else OpenScope Block ${
2828 $0 = new(cond_statement);
2830 var_block_close(config2context(config), CloseElse);
2832 | Newlines else OpenScope CondStatement ${
2833 $0 = new(cond_statement);
2835 var_block_close(config2context(config), CloseElse);
2837 | else OpenScope CondStatement ${
2838 $0 = new(cond_statement);
2840 var_block_close(config2context(config), CloseElse);
2845 // These scopes are closed in CondSuffix
2846 ForPart -> for OpenScope SimpleStatements ${
2847 $0 = reorder_bilist($<3);
2849 | for OpenScope Block ${
2853 ThenPart -> then OpenScope SimpleStatements ${
2854 $0 = reorder_bilist($<3);
2855 var_block_close(config2context(config), CloseSequential);
2857 | then OpenScope Block ${
2859 var_block_close(config2context(config), CloseSequential);
2862 ThenPartNL -> ThenPart OptNL ${
2866 // This scope is closed in CondSuffix
2867 WhileHead -> while OpenScope Block ${
2872 ForThen -> ForPart OptNL ThenPartNL ${
2880 // This scope is closed in CondSuffix
2881 WhilePart -> while OpenScope Expression Block ${
2882 $0.type = Xcond_statement;
2886 | WhileHead OptNL do Block ${
2887 $0.type = Xcond_statement;
2892 IfPart -> if OpenScope Expression OpenScope Block ${
2893 $0.type = Xcond_statement;
2896 var_block_close(config2context(config), CloseParallel);
2898 | if OpenScope Block OptNL then OpenScope Block ${
2899 $0.type = Xcond_statement;
2902 var_block_close(config2context(config), CloseParallel);
2906 // This scope is closed in CondSuffix
2907 SwitchPart -> switch OpenScope Expression ${
2910 | switch OpenScope Block ${
2914 ###### print exec cases
2916 case Xcond_statement:
2918 struct cond_statement *cs = cast(cond_statement, e);
2919 struct casepart *cp;
2921 do_indent(indent, "for");
2922 if (bracket) printf(" {\n"); else printf(":\n");
2923 print_exec(cs->forpart, indent+1, bracket);
2926 do_indent(indent, "} then {\n");
2928 do_indent(indent, "then:\n");
2929 print_exec(cs->thenpart, indent+1, bracket);
2931 if (bracket) do_indent(indent, "}\n");
2935 if (cs->condpart && cs->condpart->type == Xbinode &&
2936 cast(binode, cs->condpart)->op == Block) {
2938 do_indent(indent, "while {\n");
2940 do_indent(indent, "while:\n");
2941 print_exec(cs->condpart, indent+1, bracket);
2943 do_indent(indent, "} do {\n");
2945 do_indent(indent, "do:\n");
2946 print_exec(cs->dopart, indent+1, bracket);
2948 do_indent(indent, "}\n");
2950 do_indent(indent, "while ");
2951 print_exec(cs->condpart, 0, bracket);
2956 print_exec(cs->dopart, indent+1, bracket);
2958 do_indent(indent, "}\n");
2963 do_indent(indent, "switch");
2965 do_indent(indent, "if");
2966 if (cs->condpart && cs->condpart->type == Xbinode &&
2967 cast(binode, cs->condpart)->op == Block) {
2972 print_exec(cs->condpart, indent+1, bracket);
2974 do_indent(indent, "}\n");
2976 do_indent(indent, "then:\n");
2977 print_exec(cs->thenpart, indent+1, bracket);
2981 print_exec(cs->condpart, 0, bracket);
2987 print_exec(cs->thenpart, indent+1, bracket);
2989 do_indent(indent, "}\n");
2994 for (cp = cs->casepart; cp; cp = cp->next) {
2995 do_indent(indent, "case ");
2996 print_exec(cp->value, -1, 0);
3001 print_exec(cp->action, indent+1, bracket);
3003 do_indent(indent, "}\n");
3006 do_indent(indent, "else");
3011 print_exec(cs->elsepart, indent+1, bracket);
3013 do_indent(indent, "}\n");
3018 ###### propagate exec cases
3019 case Xcond_statement:
3021 // forpart and dopart must return Tnone
3022 // thenpart must return Tnone if there is a dopart,
3023 // otherwise it is like elsepart.
3025 // be bool if there is no casepart
3026 // match casepart->values if there is a switchpart
3027 // either be bool or match casepart->value if there
3029 // elsepart and casepart->action must match the return type
3030 // expected of this statement.
3031 struct cond_statement *cs = cast(cond_statement, prog);
3032 struct casepart *cp;
3034 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3035 if (!type_compat(Tnone, t, 0))
3037 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3038 if (!type_compat(Tnone, t, 0))
3041 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3042 if (!type_compat(Tnone, t, 0))
3045 if (cs->casepart == NULL)
3046 propagate_types(cs->condpart, c, ok, Tbool, 0);
3048 /* Condpart must match case values, with bool permitted */
3050 for (cp = cs->casepart;
3051 cp && !t; cp = cp->next)
3052 t = propagate_types(cp->value, c, ok, NULL, 0);
3053 if (!t && cs->condpart)
3054 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3055 // Now we have a type (I hope) push it down
3057 for (cp = cs->casepart; cp; cp = cp->next)
3058 propagate_types(cp->value, c, ok, t, 0);
3059 propagate_types(cs->condpart, c, ok, t, Rboolok);
3062 // (if)then, else, and case parts must return expected type.
3063 if (!cs->dopart && !type)
3064 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3066 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3067 for (cp = cs->casepart;
3070 type = propagate_types(cp->action, c, ok, NULL, rules);
3073 propagate_types(cs->thenpart, c, ok, type, rules);
3074 propagate_types(cs->elsepart, c, ok, type, rules);
3075 for (cp = cs->casepart; cp ; cp = cp->next)
3076 propagate_types(cp->action, c, ok, type, rules);
3082 ###### interp exec cases
3083 case Xcond_statement:
3085 struct value v, cnd;
3086 struct casepart *cp;
3087 struct cond_statement *c = cast(cond_statement, e);
3090 interp_exec(c->forpart);
3093 cnd = interp_exec(c->condpart);
3096 if (!(cnd.type == Tnone ||
3097 (cnd.type == Tbool && cnd.bool != 0)))
3099 // cnd is Tnone or Tbool, doesn't need to be freed
3101 interp_exec(c->dopart);
3104 v = interp_exec(c->thenpart);
3105 if (v.type != Tnone || !c->dopart)
3109 } while (c->dopart);
3111 for (cp = c->casepart; cp; cp = cp->next) {
3112 v = interp_exec(cp->value);
3113 if (value_cmp(v, cnd) == 0) {
3116 return interp_exec(cp->action);
3122 return interp_exec(c->elsepart);
3127 ### Finally the whole program.
3129 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3130 the keyword "program" and a list of variable names which are assigned
3131 values from command line arguments. Following this is a `block` which
3132 is the code to execute.
3134 As this is the top level, several things are handled a bit
3136 The whole program is not interpreted by `interp_exec` as that isn't
3137 passed the argument list which the program requires. Similarly type
3138 analysis is a bit more interesting at this level.
3143 ###### Parser: grammar
3146 Program -> program OpenScope Varlist Block OptNL ${
3149 $0->left = reorder_bilist($<3);
3151 var_block_close(config2context(config), CloseSequential);
3152 if (config2context(config)->scope_stack) abort();
3155 tok_err(config2context(config),
3156 "error: unhandled parse error", &$1);
3159 Varlist -> Varlist ArgDecl ${
3168 ArgDecl -> IDENTIFIER ${ {
3169 struct variable *v = var_decl(config2context(config), $1.txt);
3176 ###### print binode cases
3178 do_indent(indent, "program");
3179 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3181 print_exec(b2->left, 0, 0);
3187 print_exec(b->right, indent+1, bracket);
3189 do_indent(indent, "}\n");
3192 ###### propagate binode cases
3193 case Program: abort();
3195 ###### core functions
3197 static int analyse_prog(struct exec *prog, struct parse_context *c)
3199 struct binode *b = cast(binode, prog);
3206 propagate_types(b->right, c, &ok, Tnone, 0);
3211 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3212 struct var *v = cast(var, b->left);
3213 if (!v->var->val.type) {
3214 v->var->where_set = b;
3215 v->var->val = val_prepare(Tstr);
3218 b = cast(binode, prog);
3221 propagate_types(b->right, c, &ok, Tnone, 0);
3226 /* Make sure everything is still consistent */
3227 propagate_types(b->right, c, &ok, Tnone, 0);
3231 static void interp_prog(struct exec *prog, char **argv)
3233 struct binode *p = cast(binode, prog);
3239 al = cast(binode, p->left);
3241 struct var *v = cast(var, al->left);
3242 struct value *vl = &v->var->val;
3244 if (argv[0] == NULL) {
3245 printf("Not enough args\n");
3248 al = cast(binode, al->right);
3250 *vl = parse_value(vl->type, argv[0]);
3251 if (vl->type == NULL)
3255 v = interp_exec(p->right);
3259 ###### interp binode cases
3260 case Program: abort();
3262 ## And now to test it out.
3264 Having a language requires having a "hello world" program. I'll
3265 provide a little more than that: a program that prints "Hello world"
3266 finds the GCD of two numbers, prints the first few elements of
3267 Fibonacci, and performs a binary search for a number.
3269 ###### File: oceani.mk
3272 @echo "===== TEST ====="
3273 ./oceani --section "test: hello" oceani.mdc 55 33
3278 print "Hello World, what lovely oceans you have!"
3279 /* When a variable is defined in both branches of an 'if',
3280 * and used afterwards, the variables are merged.
3286 print "Is", A, "bigger than", B,"? ", bigger
3287 /* If a variable is not used after the 'if', no
3288 * merge happens, so types can be different
3291 double:string = "yes"
3292 print A, "is more than twice", B, "?", double
3295 print "double", A, "is only", double
3306 print "GCD of", A, "and", B,"is", a
3308 print a, "is not positive, cannot calculate GCD"
3310 print b, "is not positive, cannot calculate GCD"
3315 print "Fibonacci:", f1,f2,
3316 then togo = togo - 1
3324 /* Binary search... */
3329 mid := (lo + hi) / 2
3341 print "Yay, I found", target
3343 print "Closest I found was", mid