1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
213 for (ss = s; ss; ss = ss->next) {
214 struct text sec = ss->section;
215 if (sec.len == strlen(section) &&
216 strncmp(sec.txt, section, sec.len) == 0)
220 prog = parse_oceani(ss->code, &context.config,
221 dotrace ? stderr : NULL);
223 fprintf(stderr, "oceani: cannot find section %s\n",
228 prog = parse_oceani(s->code, &context.config,
229 dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: fatal parser error.\n");
232 context.parse_error = 1;
235 print_exec(*prog, 0, brackets);
236 if (prog && doexec && !context.parse_error) {
237 if (!analyse_prog(*prog, &context)) {
238 fprintf(stderr, "oceani: type error in program - not running.\n");
241 interp_prog(*prog, argv+optind+1);
248 struct section *t = s->next;
254 exit(context.parse_error ? 1 : 0);
259 These four requirements of parse, analyse, print, interpret apply to
260 each language element individually so that is how most of the code
263 Three of the four are fairly self explanatory. The one that requires
264 a little explanation is the analysis step.
266 The current language design does not require (or even allow) the types
267 of variables to be declared, but they must still have a single type.
268 Different operations impose different requirements on the variables,
269 for example addition requires both arguments to be numeric, and
270 assignment requires the variable on the left to have the same type as
271 the expression on the right.
273 Analysis involves propagating these type requirements around and
274 consequently setting the type of each variable. If any requirements
275 are violated (e.g. a string is compared with a number) or if a
276 variable needs to have two different types, then an error is raised
277 and the program will not run.
279 If the same variable is declared in both branchs of an 'if/else', or
280 in all cases of a 'switch' then the multiple instances may be merged
281 into just one variable if the variable is references after the
282 conditional statement. When this happens, the types must naturally be
283 consistent across all the branches. When the variable is not used
284 outside the if, the variables in the different branches are distinct
285 and can be of different types.
287 Determining the types of all variables early is important for
288 processing command line arguments. These can be assigned to any type
289 of variable, but we must first know the correct type so any required
290 conversion can happen. If a variable is associated with a command
291 line argument but no type can be interpreted (e.g. the variable is
292 only ever used in a `print` statement), then the type is set to
295 Undeclared names may only appear in "use" statements and "case" expressions.
296 These names are given a type of "label" and a unique value.
297 This allows them to fill the role of a name in an enumerated type, which
298 is useful for testing the `switch` statement.
300 As we will see, the condition part of a `while` statement can return
301 either a Boolean or some other type. This requires that the expect
302 type that gets passed around comprises a type (`enum vtype`) and a
303 flag to indicate that `Vbool` is also permitted.
305 As there are, as yet, no distinct types that are compatible, there
306 isn't much subtlety in the analysis. When we have distinct number
307 types, this will become more interesting.
311 When analysis discovers an inconsistency it needs to report an error;
312 just refusing to run the code ensures that the error doesn't cascade,
313 but by itself it isn't very useful. A clear understand of the sort of
314 error message that are useful will help guide the process of analysis.
316 At a simplistic level, the only sort of error that type analysis can
317 report is that the type of some construct doesn't match a contextual
318 requirement. For example, in `4 + "hello"` the addition provides a
319 contextual requirement for numbers, but `"hello"` is not a number. In
320 this particular example no further information is needed as the types
321 are obvious from local information. When a variable is involved that
322 isn't the case. It may be helpful to explain why the variable has a
323 particular type, by indicating the location where the type was set,
324 whether by declaration or usage.
326 Using a recursive-descent analysis we can easily detect a problem at
327 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
328 will detect that one argument is not a number and the usage of `hello`
329 will detect that a number was wanted, but not provided. In this
330 (early) version of the language, we will generate error reports at
331 multiple locations, so the use of `hello` will report an error and
332 explain were the value was set, and the addition will report an error
333 and say why numbers are needed. To be able to report locations for
334 errors, each language element will need to record a file location
335 (line and column) and each variable will need to record the language
336 element where its type was set. For now we will assume that each line
337 of an error message indicates one location in the file, and up to 2
338 types. So we provide a `printf`-like function which takes a format, a
339 language (a `struct exec` which has not yet been introduced), and 2
340 types. "`%1`" reports the first type, "`%2`" reports the second. We
341 will need a function to print the location, once we know how that is
342 stored. As will be explained later, there are sometimes extra rules for
343 type matching and they might affect error messages, we need to pass those
348 static void fput_loc(struct exec *loc, FILE *f);
350 ###### core functions
352 static void type_err(struct parse_context *c,
353 char *fmt, struct exec *loc,
354 struct type *t1, int rules, struct type *t2)
356 fprintf(stderr, "%s:", c->file_name);
357 fput_loc(loc, stderr);
358 for (; *fmt ; fmt++) {
365 case '%': fputc(*fmt, stderr); break;
366 default: fputc('?', stderr); break;
368 fputs(t1 ? t1->name : "*unknown*", stderr);
371 fputs(t2 ? t2->name : "*unknown*", stderr);
380 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
382 fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
388 One last introductory step before detailing the language elements and
389 providing their four requirements is to establish the data structures
390 to store these elements.
392 There are two key objects that we need to work with: executable
393 elements which comprise the program, and values which the program
394 works with. Between these are the variables in their various scopes
395 which hold the values, and types which classify the values stored and
396 manipulatd by executables.
400 Values come in a wide range of types, with more likely to be added.
401 Each type needs to be able to parse and print its own values (for
402 convenience at least) as well as to compare two values, at least for
403 equality and possibly for order. For now, values might need to be
404 duplicated and freed, though eventually such manipulations will be
405 better integrated into the language.
407 Rather than requiring every numeric type to support all numeric
408 operations (add, multiple, etc), we allow types to be able to present
409 as one of a few standard types: integer, float, and fraction. The
410 existance of these conversion functions enable types to determine if
411 they are compatible with other types.
417 struct value (*init)(struct type *type);
418 struct value (*parse)(struct type *type, char *str);
419 void (*print)(struct value val);
420 int (*cmp_order)(struct value v1, struct value v2);
421 int (*cmp_eq)(struct value v1, struct value v2);
422 struct value (*dup)(struct value val);
423 void (*free)(struct value val);
424 struct type *(*compat)(struct type *this, struct type *other);
425 long long (*to_int)(struct value *v);
426 double (*to_float)(struct value *v);
427 int (*to_mpq)(mpq_t *q, struct value *v);
433 Values can be numbers, which we represent as multi-precision
434 fractions, strings, Booleans and labels. When analysing the program
435 we also need to allow for places where no value is meaningful (type
436 `Tnone`) and where we don't know what type to expect yet (type is
439 Values are never shared, they are always copied when used, and freed
440 when no longer needed.
442 When propagating type information around the program, we need to
443 determine if two types are compatible, where type `NULL` is compatible
444 with anything. There are two special cases with type compatibility,
445 both related to the Conditional Statement which will be described
446 later. In some cases a Boolean can be accepted as well as some other
447 primary type, and in others any type is acceptable except a label (`Vlabel`).
448 A separate function encode these cases will simplify some code later.
450 When assigning command line arguments to variables, we need to be able
451 to parse each type from a string.
459 myLDLIBS := libnumber.o libstring.o -lgmp
460 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
463 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
476 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
480 if (rules & Rnolabel)
481 fputs(" (labels not permitted)", stderr);
485 static void _free_value(struct value v)
487 switch (v.type->vtype) {
489 case Vstr: free(v.str.txt); break;
490 case Vnum: mpq_clear(v.num); break;
496 static void free_value(struct value v)
502 static int vtype_compat(struct type *require, struct type *have, int rules)
504 if ((rules & Rboolok) && have == &Tbool)
506 if ((rules & Rnolabel) && have == &Tlabel)
508 if (!require || !have)
511 return require == have;
514 ###### value functions
516 static struct value _val_init(struct type *type)
521 switch(type->vtype) {
524 mpq_init(rv.num); break;
526 rv.str.txt = malloc(1);
539 static struct value val_init(struct type *type)
544 return type->init(type);
549 static struct value _dup_value(struct value v)
553 switch (rv.type->vtype) {
564 mpq_set(rv.num, v.num);
567 rv.str.len = v.str.len;
568 rv.str.txt = malloc(rv.str.len);
569 memcpy(rv.str.txt, v.str.txt, v.str.len);
575 static struct value dup_value(struct value v)
578 return v.type->dup(v);
582 static int _value_cmp(struct value left, struct value right)
585 if (left.type != right.type)
586 return left.type - right.type;
587 switch (left.type->vtype) {
588 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
589 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
590 case Vstr: cmp = text_cmp(left.str, right.str); break;
591 case Vbool: cmp = left.bool - right.bool; break;
597 static int value_cmp(struct value left, struct value right)
599 if (left.type && left.type->cmp_order)
600 return left.type->cmp_order(left, right);
601 if (left.type && left.type->cmp_eq)
602 return left.type->cmp_eq(left, right);
606 static struct text text_join(struct text a, struct text b)
609 rv.len = a.len + b.len;
610 rv.txt = malloc(rv.len);
611 memcpy(rv.txt, a.txt, a.len);
612 memcpy(rv.txt+a.len, b.txt, b.len);
616 static void _print_value(struct value v)
618 switch (v.type->vtype) {
620 printf("*no-value*"); break;
622 printf("*label-%p*", v.label); break;
624 printf("%.*s", v.str.len, v.str.txt); break;
626 printf("%s", v.bool ? "True":"False"); break;
631 mpf_set_q(fl, v.num);
632 gmp_printf("%Fg", fl);
639 static void print_value(struct value v)
641 if (v.type && v.type->print)
647 static struct value _parse_value(struct type *type, char *arg)
655 switch(type->vtype) {
661 val.str.len = strlen(arg);
662 val.str.txt = malloc(val.str.len);
663 memcpy(val.str.txt, arg, val.str.len);
670 tx.txt = arg; tx.len = strlen(tx.txt);
671 if (number_parse(val.num, tail, tx) == 0)
674 mpq_neg(val.num, val.num);
676 printf("Unsupported suffix: %s\n", arg);
681 if (strcasecmp(arg, "true") == 0 ||
682 strcmp(arg, "1") == 0)
684 else if (strcasecmp(arg, "false") == 0 ||
685 strcmp(arg, "0") == 0)
688 printf("Bad bool: %s\n", arg);
696 static struct value parse_value(struct type *type, char *arg)
700 if (type && type->parse)
701 return type->parse(type, arg);
706 static void _free_value(struct value v);
710 .parse = _parse_value, \
711 .print = _print_value, \
712 .cmp_order = _value_cmp, \
713 .cmp_eq = _value_cmp, \
715 .free = _free_value, \
717 static struct type Tbool = {
723 static struct type Tstr = {
729 static struct type Tnum = {
735 static struct type Tnone = {
741 static struct type Tlabel = {
749 Variables are scoped named values. We store the names in a linked
750 list of "bindings" sorted lexically, and use sequential search and
757 struct binding *next; // in lexical order
761 This linked list is stored in the parse context so that "reduce"
762 functions can find or add variables, and so the analysis phase can
763 ensure that every variable gets a type.
767 struct binding *varlist; // In lexical order
771 static struct binding *find_binding(struct parse_context *c, struct text s)
773 struct binding **l = &c->varlist;
778 (cmp = text_cmp((*l)->name, s)) < 0)
782 n = calloc(1, sizeof(*n));
789 Each name can be linked to multiple variables defined in different
790 scopes. Each scope starts where the name is declared and continues
791 until the end of the containing code block. Scopes of a given name
792 cannot nest, so a declaration while a name is in-scope is an error.
794 ###### binding fields
795 struct variable *var;
799 struct variable *previous;
801 struct binding *name;
802 struct exec *where_decl;// where name was declared
803 struct exec *where_set; // where type was set
807 While the naming seems strange, we include local constants in the
808 definition of variables. A name declared `var := value` can
809 subsequently be changed, but a name declared `var ::= value` cannot -
812 ###### variable fields
815 Scopes in parallel branches can be partially merged. More
816 specifically, if a given name is declared in both branches of an
817 if/else then it's scope is a candidate for merging. Similarly if
818 every branch of an exhaustive switch (e.g. has an "else" clause)
819 declares a given name, then the scopes from the branches are
820 candidates for merging.
822 Note that names declared inside a loop (which is only parallel to
823 itself) are never visible after the loop. Similarly names defined in
824 scopes which are not parallel, such as those started by `for` and
825 `switch`, are never visible after the scope. Only variables defined in
826 both `then` and `else` (including the implicit then after an `if`, and
827 excluding `then` used with `for`) and in all `case`s and `else` of a
828 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
830 Labels, which are a bit like variables, follow different rules.
831 Labels are not explicitly declared, but if an undeclared name appears
832 in a context where a label is legal, that effectively declares the
833 name as a label. The declaration remains in force (or in scope) at
834 least to the end of the immediately containing block and conditionally
835 in any larger containing block which does not declare the name in some
836 other way. Importantly, the conditional scope extension happens even
837 if the label is only used in one parallel branch of a conditional --
838 when used in one branch it is treated as having been declared in all
841 Merge candidates are tentatively visible beyond the end of the
842 branching statement which creates them. If the name is used, the
843 merge is affirmed and they become a single variable visible at the
844 outer layer. If not - if it is redeclared first - the merge lapses.
846 To track scopes we have an extra stack, implemented as a linked list,
847 which roughly parallels the parse stack and which is used exclusively
848 for scoping. When a new scope is opened, a new frame is pushed and
849 the child-count of the parent frame is incremented. This child-count
850 is used to distinguish between the first of a set of parallel scopes,
851 in which declared variables must not be in scope, and subsequent
852 branches, whether they must already be conditionally scoped.
854 To push a new frame *before* any code in the frame is parsed, we need a
855 grammar reduction. This is most easily achieved with a grammar
856 element which derives the empty string, and creates the new scope when
857 it is recognized. This can be placed, for example, between a keyword
858 like "if" and the code following it.
862 struct scope *parent;
868 struct scope *scope_stack;
871 static void scope_pop(struct parse_context *c)
873 struct scope *s = c->scope_stack;
875 c->scope_stack = s->parent;
880 static void scope_push(struct parse_context *c)
882 struct scope *s = calloc(1, sizeof(*s));
884 c->scope_stack->child_count += 1;
885 s->parent = c->scope_stack;
893 OpenScope -> ${ scope_push(config2context(config)); }$
896 Each variable records a scope depth and is in one of four states:
898 - "in scope". This is the case between the declaration of the
899 variable and the end of the containing block, and also between
900 the usage with affirms a merge and the end of that block.
902 The scope depth is not greater than the current parse context scope
903 nest depth. When the block of that depth closes, the state will
904 change. To achieve this, all "in scope" variables are linked
905 together as a stack in nesting order.
907 - "pending". The "in scope" block has closed, but other parallel
908 scopes are still being processed. So far, every parallel block at
909 the same level that has closed has declared the name.
911 The scope depth is the depth of the last parallel block that
912 enclosed the declaration, and that has closed.
914 - "conditionally in scope". The "in scope" block and all parallel
915 scopes have closed, and no further mention of the name has been
916 seen. This state includes a secondary nest depth which records the
917 outermost scope seen since the variable became conditionally in
918 scope. If a use of the name is found, the variable becomes "in
919 scope" and that secondary depth becomes the recorded scope depth.
920 If the name is declared as a new variable, the old variable becomes
921 "out of scope" and the recorded scope depth stays unchanged.
923 - "out of scope". The variable is neither in scope nor conditionally
924 in scope. It is permanently out of scope now and can be removed from
925 the "in scope" stack.
928 ###### variable fields
929 int depth, min_depth;
930 enum { OutScope, PendingScope, CondScope, InScope } scope;
931 struct variable *in_scope;
935 struct variable *in_scope;
937 All variables with the same name are linked together using the
938 'previous' link. Those variable that have
939 been affirmatively merged all have a 'merged' pointer that points to
940 one primary variable - the most recently declared instance. When
941 merging variables, we need to also adjust the 'merged' pointer on any
942 other variables that had previously been merged with the one that will
943 no longer be primary.
945 ###### variable fields
946 struct variable *merged;
950 static void variable_merge(struct variable *primary, struct variable *secondary)
956 primary = primary->merged;
958 for (v = primary->previous; v; v=v->previous)
959 if (v == secondary || v == secondary->merged ||
960 v->merged == secondary ||
961 (v->merged && v->merged == secondary->merged)) {
969 while (context.varlist) {
970 struct binding *b = context.varlist;
971 struct variable *v = b->var;
972 context.varlist = b->next;
975 struct variable *t = v;
983 #### Manipulating Bindings
985 When a name is conditionally visible, a new declaration discards the
986 old binding - the condition lapses. Conversely a usage of the name
987 affirms the visibility and extends it to the end of the containing
988 block - i.e. the block that contains both the original declaration and
989 the latest usage. This is determined from `min_depth`. When a
990 conditionally visible variable gets affirmed like this, it is also
991 merged with other conditionally visible variables with the same name.
993 When we parse a variable declaration we either signal an error if the
994 name is currently bound, or create a new variable at the current nest
995 depth if the name is unbound or bound to a conditionally scoped or
996 pending-scope variable. If the previous variable was conditionally
997 scoped, it and its homonyms becomes out-of-scope.
999 When we parse a variable reference (including non-declarative
1000 assignment) we signal an error if the name is not bound or is bound to
1001 a pending-scope variable; update the scope if the name is bound to a
1002 conditionally scoped variable; or just proceed normally if the named
1003 variable is in scope.
1005 When we exit a scope, any variables bound at this level are either
1006 marked out of scope or pending-scoped, depending on whether the
1007 scope was sequential or parallel.
1009 When exiting a parallel scope we check if there are any variables that
1010 were previously pending and are still visible. If there are, then
1011 there weren't redeclared in the most recent scope, so they cannot be
1012 merged and must become out-of-scope. If it is not the first of
1013 parallel scopes (based on `child_count`), we check that there was a
1014 previous binding that is still pending-scope. If there isn't, the new
1015 variable must now be out-of-scope.
1017 When exiting a sequential scope that immediately enclosed parallel
1018 scopes, we need to resolve any pending-scope variables. If there was
1019 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1020 we need to mark all pending-scope variable as out-of-scope. Otherwise
1021 all pending-scope variables become conditionally scoped.
1024 enum closetype { CloseSequential, CloseParallel, CloseElse };
1026 ###### ast functions
1028 static struct variable *var_decl(struct parse_context *c, struct text s)
1030 struct binding *b = find_binding(c, s);
1031 struct variable *v = b->var;
1033 switch (v ? v->scope : OutScope) {
1035 /* Caller will report the error */
1039 v && v->scope == CondScope;
1041 v->scope = OutScope;
1045 v = calloc(1, sizeof(*v));
1046 v->previous = b->var;
1049 v->min_depth = v->depth = c->scope_depth;
1051 v->in_scope = c->in_scope;
1053 v->val = val_init(NULL);
1057 static struct variable *var_ref(struct parse_context *c, struct text s)
1059 struct binding *b = find_binding(c, s);
1060 struct variable *v = b->var;
1061 struct variable *v2;
1063 switch (v ? v->scope : OutScope) {
1066 /* Signal an error - once that is possible */
1069 /* All CondScope variables of this name need to be merged
1070 * and become InScope
1072 v->depth = v->min_depth;
1074 for (v2 = v->previous;
1075 v2 && v2->scope == CondScope;
1077 variable_merge(v, v2);
1085 static void var_block_close(struct parse_context *c, enum closetype ct)
1087 /* close of all variables that are in_scope */
1088 struct variable *v, **vp, *v2;
1091 for (vp = &c->in_scope;
1092 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1096 case CloseParallel: /* handle PendingScope */
1100 if (c->scope_stack->child_count == 1)
1101 v->scope = PendingScope;
1102 else if (v->previous &&
1103 v->previous->scope == PendingScope)
1104 v->scope = PendingScope;
1105 else if (v->val.type == &Tlabel)
1106 v->scope = PendingScope;
1107 else if (v->name->var == v)
1108 v->scope = OutScope;
1109 if (ct == CloseElse) {
1110 /* All Pending variables with this name
1111 * are now Conditional */
1113 v2 && v2->scope == PendingScope;
1115 v2->scope = CondScope;
1120 v2 && v2->scope == PendingScope;
1122 if (v2->val.type != &Tlabel)
1123 v2->scope = OutScope;
1125 case OutScope: break;
1128 case CloseSequential:
1129 if (v->val.type == &Tlabel)
1130 v->scope = PendingScope;
1133 v->scope = OutScope;
1136 /* There was no 'else', so we can only become
1137 * conditional if we know the cases were exhaustive,
1138 * and that doesn't mean anything yet.
1139 * So only labels become conditional..
1142 v2 && v2->scope == PendingScope;
1144 if (v2->val.type == &Tlabel) {
1145 v2->scope = CondScope;
1146 v2->min_depth = c->scope_depth;
1148 v2->scope = OutScope;
1151 case OutScope: break;
1155 if (v->scope == OutScope)
1164 Executables can be lots of different things. In many cases an
1165 executable is just an operation combined with one or two other
1166 executables. This allows for expressions and lists etc. Other times
1167 an executable is something quite specific like a constant or variable
1168 name. So we define a `struct exec` to be a general executable with a
1169 type, and a `struct binode` which is a subclass of `exec`, forms a
1170 node in a binary tree, and holds an operation. There will be other
1171 subclasses, and to access these we need to be able to `cast` the
1172 `exec` into the various other types.
1175 #define cast(structname, pointer) ({ \
1176 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1177 if (__mptr && *__mptr != X##structname) abort(); \
1178 (struct structname *)( (char *)__mptr);})
1180 #define new(structname) ({ \
1181 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1182 __ptr->type = X##structname; \
1183 __ptr->line = -1; __ptr->column = -1; \
1186 #define new_pos(structname, token) ({ \
1187 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1188 __ptr->type = X##structname; \
1189 __ptr->line = token.line; __ptr->column = token.col; \
1198 enum exec_types type;
1206 struct exec *left, *right;
1209 ###### ast functions
1211 static int __fput_loc(struct exec *loc, FILE *f)
1213 if (loc->line >= 0) {
1214 fprintf(f, "%d:%d: ", loc->line, loc->column);
1217 if (loc->type == Xbinode)
1218 return __fput_loc(cast(binode,loc)->left, f) ||
1219 __fput_loc(cast(binode,loc)->right, f);
1222 static void fput_loc(struct exec *loc, FILE *f)
1224 if (!__fput_loc(loc, f))
1225 fprintf(f, "??:??: ");
1228 Each different type of `exec` node needs a number of functions
1229 defined, a bit like methods. We must be able to be able to free it,
1230 print it, analyse it and execute it. Once we have specific `exec`
1231 types we will need to parse them too. Let's take this a bit more
1236 The parser generator requires a `free_foo` function for each struct
1237 that stores attributes and they will be `exec`s and subtypes there-of.
1238 So we need `free_exec` which can handle all the subtypes, and we need
1241 ###### ast functions
1243 static void free_binode(struct binode *b)
1248 free_exec(b->right);
1252 ###### core functions
1253 static void free_exec(struct exec *e)
1262 ###### forward decls
1264 static void free_exec(struct exec *e);
1266 ###### free exec cases
1267 case Xbinode: free_binode(cast(binode, e)); break;
1271 Printing an `exec` requires that we know the current indent level for
1272 printing line-oriented components. As will become clear later, we
1273 also want to know what sort of bracketing to use.
1275 ###### ast functions
1277 static void do_indent(int i, char *str)
1284 ###### core functions
1285 static void print_binode(struct binode *b, int indent, int bracket)
1289 ## print binode cases
1293 static void print_exec(struct exec *e, int indent, int bracket)
1299 print_binode(cast(binode, e), indent, bracket); break;
1304 ###### forward decls
1306 static void print_exec(struct exec *e, int indent, int bracket);
1310 As discussed, analysis involves propagating type requirements around
1311 the program and looking for errors.
1313 So `propagate_types` is passed an expected type (being a `struct type`
1314 pointer together with some `val_rules` flags) that the `exec` is
1315 expected to return, and returns the type that it does return, either
1316 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1317 by reference. It is set to `0` when an error is found, and `2` when
1318 any change is made. If it remains unchanged at `1`, then no more
1319 propagation is needed.
1321 ###### core functions
1323 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1324 struct type *type, int rules)
1331 switch (prog->type) {
1334 struct binode *b = cast(binode, prog);
1336 ## propagate binode cases
1340 ## propagate exec cases
1347 Interpreting an `exec` doesn't require anything but the `exec`. State
1348 is stored in variables and each variable will be directly linked from
1349 within the `exec` tree. The exception to this is the whole `program`
1350 which needs to look at command line arguments. The `program` will be
1351 interpreted separately.
1353 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1355 ###### core functions
1357 static struct value interp_exec(struct exec *e)
1367 struct binode *b = cast(binode, e);
1368 struct value left, right;
1369 left.type = right.type = &Tnone;
1371 ## interp binode cases
1373 free_value(left); free_value(right);
1376 ## interp exec cases
1381 ## Language elements
1383 Each language element needs to be parsed, printed, analysed,
1384 interpreted, and freed. There are several, so let's just start with
1385 the easy ones and work our way up.
1389 We have already met values as separate objects. When manifest
1390 constants appear in the program text, that must result in an executable
1391 which has a constant value. So the `val` structure embeds a value in
1407 $0 = new_pos(val, $1);
1408 $0->val.type = &Tbool;
1412 $0 = new_pos(val, $1);
1413 $0->val.type = &Tbool;
1417 $0 = new_pos(val, $1);
1418 $0->val.type = &Tnum;
1421 if (number_parse($0->val.num, tail, $1.txt) == 0)
1422 mpq_init($0->val.num);
1424 tok_err(config2context(config), "error: unsupported number suffix.",
1429 $0 = new_pos(val, $1);
1430 $0->val.type = &Tstr;
1433 string_parse(&$1, '\\', &$0->val.str, tail);
1435 tok_err(config2context(config), "error: unsupported string suffix.",
1440 $0 = new_pos(val, $1);
1441 $0->val.type = &Tstr;
1444 string_parse(&$1, '\\', &$0->val.str, tail);
1446 tok_err(config2context(config), "error: unsupported string suffix.",
1451 ###### print exec cases
1454 struct val *v = cast(val, e);
1455 if (v->val.type == &Tstr)
1457 print_value(v->val);
1458 if (v->val.type == &Tstr)
1463 ###### propagate exec cases
1466 struct val *val = cast(val, prog);
1467 if (!vtype_compat(type, val->val.type, rules)) {
1468 type_err(c, "error: expected %1%r found %2",
1469 prog, type, rules, val->val.type);
1472 return val->val.type;
1475 ###### interp exec cases
1477 return dup_value(cast(val, e)->val);
1479 ###### ast functions
1480 static void free_val(struct val *v)
1488 ###### free exec cases
1489 case Xval: free_val(cast(val, e)); break;
1491 ###### ast functions
1492 // Move all nodes from 'b' to 'rv', reversing the order.
1493 // In 'b' 'left' is a list, and 'right' is the last node.
1494 // In 'rv', left' is the first node and 'right' is a list.
1495 static struct binode *reorder_bilist(struct binode *b)
1497 struct binode *rv = NULL;
1500 struct exec *t = b->right;
1504 b = cast(binode, b->left);
1514 Just as we used a `val` to wrap a value into an `exec`, we similarly
1515 need a `var` to wrap a `variable` into an exec. While each `val`
1516 contained a copy of the value, each `var` hold a link to the variable
1517 because it really is the same variable no matter where it appears.
1518 When a variable is used, we need to remember to follow the `->merged`
1519 link to find the primary instance.
1527 struct variable *var;
1533 VariableDecl -> IDENTIFIER := ${ {
1534 struct variable *v = var_decl(config2context(config), $1.txt);
1535 $0 = new_pos(var, $1);
1540 v = var_ref(config2context(config), $1.txt);
1542 type_err(config2context(config), "error: variable '%v' redeclared",
1543 $0, &Tnone, 0, &Tnone);
1544 type_err(config2context(config), "info: this is where '%v' was first declared",
1545 v->where_decl, &Tnone, 0, &Tnone);
1548 | IDENTIFIER ::= ${ {
1549 struct variable *v = var_decl(config2context(config), $1.txt);
1550 $0 = new_pos(var, $1);
1556 v = var_ref(config2context(config), $1.txt);
1558 type_err(config2context(config), "error: variable '%v' redeclared",
1559 $0, &Tnone, 0, &Tnone);
1560 type_err(config2context(config), "info: this is where '%v' was first declared",
1561 v->where_decl, &Tnone, 0, &Tnone);
1565 Variable -> IDENTIFIER ${ {
1566 struct variable *v = var_ref(config2context(config), $1.txt);
1567 $0 = new_pos(var, $1);
1569 /* This might be a label - allocate a var just in case */
1570 v = var_decl(config2context(config), $1.txt);
1572 v->val = val_init(&Tlabel);
1573 v->val.label = &v->val;
1580 ###### print exec cases
1583 struct var *v = cast(var, e);
1585 struct binding *b = v->var->name;
1586 printf("%.*s", b->name.len, b->name.txt);
1593 if (loc->type == Xvar) {
1594 struct var *v = cast(var, loc);
1596 struct binding *b = v->var->name;
1597 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1599 fputs("???", stderr);
1601 fputs("NOTVAR", stderr);
1604 ###### propagate exec cases
1608 struct var *var = cast(var, prog);
1609 struct variable *v = var->var;
1611 type_err(c, "%d:BUG: no variable!!", prog, &Tnone, 0, &Tnone);
1617 if (v->val.type == NULL) {
1618 if (type && *ok != 0) {
1619 v->val = val_init(type);
1620 v->where_set = prog;
1625 if (!vtype_compat(type, v->val.type, rules)) {
1626 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
1627 type, rules, v->val.type);
1628 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1629 v->val.type, rules, &Tnone);
1637 ###### interp exec cases
1640 struct var *var = cast(var, e);
1641 struct variable *v = var->var;
1645 return dup_value(v->val);
1648 ###### ast functions
1650 static void free_var(struct var *v)
1655 ###### free exec cases
1656 case Xvar: free_var(cast(var, e)); break;
1658 ### Expressions: Boolean
1660 Our first user of the `binode` will be expressions, and particularly
1661 Boolean expressions. As I haven't implemented precedence in the
1662 parser generator yet, we need different names from each precedence
1663 level used by expressions. The outer most or lowest level precedence
1664 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1675 Expression -> Expression or BTerm ${ {
1676 struct binode *b = new(binode);
1682 | BTerm ${ $0 = $<1; }$
1684 BTerm -> BTerm and BFact ${ {
1685 struct binode *b = new(binode);
1691 | BFact ${ $0 = $<1; }$
1693 BFact -> not BFact ${ {
1694 struct binode *b = new(binode);
1701 ###### print binode cases
1703 print_exec(b->left, -1, 0);
1705 print_exec(b->right, -1, 0);
1708 print_exec(b->left, -1, 0);
1710 print_exec(b->right, -1, 0);
1714 print_exec(b->right, -1, 0);
1717 ###### propagate binode cases
1721 /* both must be Tbool, result is Tbool */
1722 propagate_types(b->left, c, ok, &Tbool, 0);
1723 propagate_types(b->right, c, ok, &Tbool, 0);
1724 if (type && type != &Tbool) {
1725 type_err(c, "error: %1 operation found where %2 expected", prog,
1731 ###### interp binode cases
1733 rv = interp_exec(b->left);
1734 right = interp_exec(b->right);
1735 rv.bool = rv.bool && right.bool;
1738 rv = interp_exec(b->left);
1739 right = interp_exec(b->right);
1740 rv.bool = rv.bool || right.bool;
1743 rv = interp_exec(b->right);
1747 ### Expressions: Comparison
1749 Of slightly higher precedence that Boolean expressions are
1751 A comparison takes arguments of any type, but the two types must be
1754 To simplify the parsing we introduce an `eop` which can record an
1755 expression operator.
1762 ###### ast functions
1763 static void free_eop(struct eop *e)
1778 | Expr CMPop Expr ${ {
1779 struct binode *b = new(binode);
1785 | Expr ${ $0 = $<1; }$
1790 CMPop -> < ${ $0.op = Less; }$
1791 | > ${ $0.op = Gtr; }$
1792 | <= ${ $0.op = LessEq; }$
1793 | >= ${ $0.op = GtrEq; }$
1794 | == ${ $0.op = Eql; }$
1795 | != ${ $0.op = NEql; }$
1797 ###### print binode cases
1805 print_exec(b->left, -1, 0);
1807 case Less: printf(" < "); break;
1808 case LessEq: printf(" <= "); break;
1809 case Gtr: printf(" > "); break;
1810 case GtrEq: printf(" >= "); break;
1811 case Eql: printf(" == "); break;
1812 case NEql: printf(" != "); break;
1815 print_exec(b->right, -1, 0);
1818 ###### propagate binode cases
1825 /* Both must match but not labels, result is Tbool */
1826 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
1828 propagate_types(b->right, c, ok, t, 0);
1830 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
1832 t = propagate_types(b->left, c, ok, t, 0);
1834 if (!vtype_compat(type, &Tbool, 0)) {
1835 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1836 &Tbool, rules, type);
1841 ###### interp binode cases
1850 left = interp_exec(b->left);
1851 right = interp_exec(b->right);
1852 cmp = value_cmp(left, right);
1855 case Less: rv.bool = cmp < 0; break;
1856 case LessEq: rv.bool = cmp <= 0; break;
1857 case Gtr: rv.bool = cmp > 0; break;
1858 case GtrEq: rv.bool = cmp >= 0; break;
1859 case Eql: rv.bool = cmp == 0; break;
1860 case NEql: rv.bool = cmp != 0; break;
1861 default: rv.bool = 0; break;
1866 ### Expressions: The rest
1868 The remaining expressions with the highest precedence are arithmetic
1869 and string concatenation. They are `Expr`, `Term`, and `Factor`.
1870 The `Factor` is where the `Value` and `Variable` that we already have
1873 `+` and `-` are both infix and prefix operations (where they are
1874 absolute value and negation). These have different operator names.
1876 We also have a 'Bracket' operator which records where parentheses were
1877 found. This make it easy to reproduce these when printing. Once
1878 precedence is handled better I might be able to discard this.
1890 Expr -> Expr Eop Term ${ {
1891 struct binode *b = new(binode);
1897 | Term ${ $0 = $<1; }$
1899 Term -> Term Top Factor ${ {
1900 struct binode *b = new(binode);
1906 | Factor ${ $0 = $<1; }$
1908 Factor -> ( Expression ) ${ {
1909 struct binode *b = new_pos(binode, $1);
1915 struct binode *b = new(binode);
1920 | Value ${ $0 = $<1; }$
1921 | Variable ${ $0 = $<1; }$
1924 Eop -> + ${ $0.op = Plus; }$
1925 | - ${ $0.op = Minus; }$
1927 Uop -> + ${ $0.op = Absolute; }$
1928 | - ${ $0.op = Negate; }$
1930 Top -> * ${ $0.op = Times; }$
1931 | / ${ $0.op = Divide; }$
1932 | ++ ${ $0.op = Concat; }$
1934 ###### print binode cases
1940 print_exec(b->left, indent, 0);
1942 case Plus: printf(" + "); break;
1943 case Minus: printf(" - "); break;
1944 case Times: printf(" * "); break;
1945 case Divide: printf(" / "); break;
1946 case Concat: printf(" ++ "); break;
1949 print_exec(b->right, indent, 0);
1953 print_exec(b->right, indent, 0);
1957 print_exec(b->right, indent, 0);
1961 print_exec(b->right, indent, 0);
1965 ###### propagate binode cases
1970 /* both must be numbers, result is Tnum */
1973 /* as propagate_types ignores a NULL,
1974 * unary ops fit here too */
1975 propagate_types(b->left, c, ok, &Tnum, 0);
1976 propagate_types(b->right, c, ok, &Tnum, 0);
1977 if (!vtype_compat(type, &Tnum, 0)) {
1978 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
1979 &Tnum, rules, type);
1985 /* both must be Tstr, result is Tstr */
1986 propagate_types(b->left, c, ok, &Tstr, 0);
1987 propagate_types(b->right, c, ok, &Tstr, 0);
1988 if (!vtype_compat(type, &Tstr, 0)) {
1989 type_err(c, "error: Concat returns %1 but %2 expected", prog,
1990 &Tstr, rules, type);
1996 return propagate_types(b->right, c, ok, type, 0);
1998 ###### interp binode cases
2001 rv = interp_exec(b->left);
2002 right = interp_exec(b->right);
2003 mpq_add(rv.num, rv.num, right.num);
2006 rv = interp_exec(b->left);
2007 right = interp_exec(b->right);
2008 mpq_sub(rv.num, rv.num, right.num);
2011 rv = interp_exec(b->left);
2012 right = interp_exec(b->right);
2013 mpq_mul(rv.num, rv.num, right.num);
2016 rv = interp_exec(b->left);
2017 right = interp_exec(b->right);
2018 mpq_div(rv.num, rv.num, right.num);
2021 rv = interp_exec(b->right);
2022 mpq_neg(rv.num, rv.num);
2025 rv = interp_exec(b->right);
2026 mpq_abs(rv.num, rv.num);
2029 rv = interp_exec(b->right);
2032 left = interp_exec(b->left);
2033 right = interp_exec(b->right);
2035 rv.str = text_join(left.str, right.str);
2038 ### Blocks, Statements, and Statement lists.
2040 Now that we have expressions out of the way we need to turn to
2041 statements. There are simple statements and more complex statements.
2042 Simple statements do not contain newlines, complex statements do.
2044 Statements often come in sequences and we have corresponding simple
2045 statement lists and complex statement lists.
2046 The former comprise only simple statements separated by semicolons.
2047 The later comprise complex statements and simple statement lists. They are
2048 separated by newlines. Thus the semicolon is only used to separate
2049 simple statements on the one line. This may be overly restrictive,
2050 but I'm not sure I ever want a complex statement to share a line with
2053 Note that a simple statement list can still use multiple lines if
2054 subsequent lines are indented, so
2056 ###### Example: wrapped simple statement list
2061 is a single simple statement list. This might allow room for
2062 confusion, so I'm not set on it yet.
2064 A simple statement list needs no extra syntax. A complex statement
2065 list has two syntactic forms. It can be enclosed in braces (much like
2066 C blocks), or it can be introduced by a colon and continue until an
2067 unindented newline (much like Python blocks). With this extra syntax
2068 it is referred to as a block.
2070 Note that a block does not have to include any newlines if it only
2071 contains simple statements. So both of:
2073 if condition: a=b; d=f
2075 if condition { a=b; print f }
2079 In either case the list is constructed from a `binode` list with
2080 `Block` as the operator. When parsing the list it is most convenient
2081 to append to the end, so a list is a list and a statement. When using
2082 the list it is more convenient to consider a list to be a statement
2083 and a list. So we need a function to re-order a list.
2084 `reorder_bilist` serves this purpose.
2086 The only stand-alone statement we introduce at this stage is `pass`
2087 which does nothing and is represented as a `NULL` pointer in a `Block`
2107 Block -> Open Statementlist Close ${ $0 = $<2; }$
2108 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2109 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2110 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2111 | : Statementlist ${ $0 = $<2; }$
2112 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2114 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2116 ComplexStatements -> ComplexStatements ComplexStatement ${
2122 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2123 | ComplexStatement ${
2131 ComplexStatement -> SimpleStatements NEWLINE ${
2132 $0 = reorder_bilist($<1);
2134 ## ComplexStatement Grammar
2137 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2143 | SimpleStatement ${
2149 | SimpleStatements ; ${ $0 = $<1; }$
2151 SimpleStatement -> pass ${ $0 = NULL; }$
2152 ## SimpleStatement Grammar
2154 ###### print binode cases
2158 if (b->left == NULL)
2161 print_exec(b->left, indent, 0);
2164 print_exec(b->right, indent, 0);
2167 // block, one per line
2168 if (b->left == NULL)
2169 do_indent(indent, "pass\n");
2171 print_exec(b->left, indent, bracket);
2173 print_exec(b->right, indent, bracket);
2177 ###### propagate binode cases
2180 /* If any statement returns something other then Tnone
2181 * or Tbool then all such must return same type.
2182 * As each statement may be Tnone or something else,
2183 * we must always pass NULL (unknown) down, otherwise an incorrect
2184 * error might occur. We never return Tnone unless it is
2189 for (e = b; e; e = cast(binode, e->right)) {
2190 t = propagate_types(e->left, c, ok, NULL, rules);
2191 if ((rules & Rboolok) && t == &Tbool)
2193 if (t && t != &Tnone && t != &Tbool) {
2196 else if (t != type) {
2197 type_err(c, "error: expected %1%r, found %2",
2198 e->left, type, rules, t);
2206 ###### interp binode cases
2208 while (rv.type == &Tnone &&
2211 rv = interp_exec(b->left);
2212 b = cast(binode, b->right);
2216 ### The Print statement
2218 `print` is a simple statement that takes a comma-separated list of
2219 expressions and prints the values separated by spaces and terminated
2220 by a newline. No control of formatting is possible.
2222 `print` faces the same list-ordering issue as blocks, and uses the
2228 ###### SimpleStatement Grammar
2230 | print ExpressionList ${
2231 $0 = reorder_bilist($<2);
2233 | print ExpressionList , ${
2238 $0 = reorder_bilist($0);
2249 ExpressionList -> ExpressionList , Expression ${
2262 ###### print binode cases
2265 do_indent(indent, "print");
2269 print_exec(b->left, -1, 0);
2273 b = cast(binode, b->right);
2279 ###### propagate binode cases
2282 /* don't care but all must be consistent */
2283 propagate_types(b->left, c, ok, NULL, Rnolabel);
2284 propagate_types(b->right, c, ok, NULL, Rnolabel);
2287 ###### interp binode cases
2293 for ( ; b; b = cast(binode, b->right))
2297 left = interp_exec(b->left);
2310 ###### Assignment statement
2312 An assignment will assign a value to a variable, providing it hasn't
2313 be declared as a constant. The analysis phase ensures that the type
2314 will be correct so the interpreter just needs to perform the
2315 calculation. There is a form of assignment which declares a new
2316 variable as well as assigning a value. If a name is assigned before
2317 it is declared, and error will be raised as the name is created as
2318 `Tlabel` and it is illegal to assign to such names.
2324 ###### SimpleStatement Grammar
2325 | Variable = Expression ${ {
2326 struct var *v = cast(var, $1);
2332 if (v->var && !v->var->constant) {
2336 | VariableDecl Expression ${
2343 ###### print binode cases
2346 do_indent(indent, "");
2347 print_exec(b->left, indent, 0);
2349 print_exec(b->right, indent, 0);
2355 do_indent(indent, "");
2356 print_exec(b->left, indent, 0);
2357 if (cast(var, b->left)->var->constant)
2361 print_exec(b->right, indent, 0);
2366 ###### propagate binode cases
2370 /* Both must match and not be labels, result is Tnone */
2371 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2373 if (propagate_types(b->right, c, ok, t, 0) != t)
2374 if (b->left->type == Xvar)
2375 type_err(c, "info: variable '%v' was set as %1 here.",
2376 cast(var, b->left)->var->where_set, t, rules, &Tnone);
2378 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2380 propagate_types(b->left, c, ok, t, 0);
2386 ###### interp binode cases
2391 struct variable *v = cast(var, b->left)->var;
2394 right = interp_exec(b->right);
2401 ### The `use` statement
2403 The `use` statement is the last "simple" statement. It is needed when
2404 the condition in a conditional statement is a block. `use` works much
2405 like `return` in C, but only completes the `condition`, not the whole
2411 ###### SimpleStatement Grammar
2413 $0 = new_pos(binode, $1);
2418 ###### print binode cases
2421 do_indent(indent, "use ");
2422 print_exec(b->right, -1, 0);
2427 ###### propagate binode cases
2430 /* result matches value */
2431 return propagate_types(b->right, c, ok, type, 0);
2433 ###### interp binode cases
2436 rv = interp_exec(b->right);
2439 ### The Conditional Statement
2441 This is the biggy and currently the only complex statement. This
2442 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2443 It is comprised of a number of parts, all of which are optional though
2444 set combinations apply. Each part is (usually) a key word (`then` is
2445 sometimes optional) followed by either an expression or a code block,
2446 except the `casepart` which is a "key word and an expression" followed
2447 by a code block. The code-block option is valid for all parts and,
2448 where an expression is also allowed, the code block can use the `use`
2449 statement to report a value. If the code block does not report a value
2450 the effect is similar to reporting `True`.
2452 The `else` and `case` parts, as well as `then` when combined with
2453 `if`, can contain a `use` statement which will apply to some
2454 containing conditional statement. `for` parts, `do` parts and `then`
2455 parts used with `for` can never contain a `use`, except in some
2456 subordinate conditional statement.
2458 If there is a `forpart`, it is executed first, only once.
2459 If there is a `dopart`, then it is executed repeatedly providing
2460 always that the `condpart` or `cond`, if present, does not return a non-True
2461 value. `condpart` can fail to return any value if it simply executes
2462 to completion. This is treated the same as returning `True`.
2464 If there is a `thenpart` it will be executed whenever the `condpart`
2465 or `cond` returns True (or does not return any value), but this will happen
2466 *after* `dopart` (when present).
2468 If `elsepart` is present it will be executed at most once when the
2469 condition returns `False` or some value that isn't `True` and isn't
2470 matched by any `casepart`. If there are any `casepart`s, they will be
2471 executed when the condition returns a matching value.
2473 The particular sorts of values allowed in case parts has not yet been
2474 determined in the language design, so nothing is prohibited.
2476 The various blocks in this complex statement potentially provide scope
2477 for variables as described earlier. Each such block must include the
2478 "OpenScope" nonterminal before parsing the block, and must call
2479 `var_block_close()` when closing the block.
2481 The code following "`if`", "`switch`" and "`for`" does not get its own
2482 scope, but is in a scope covering the whole statement, so names
2483 declared there cannot be redeclared elsewhere. Similarly the
2484 condition following "`while`" is in a scope the covers the body
2485 ("`do`" part) of the loop, and which does not allow conditional scope
2486 extension. Code following "`then`" (both looping and non-looping),
2487 "`else`" and "`case`" each get their own local scope.
2489 The type requirements on the code block in a `whilepart` are quite
2490 unusal. It is allowed to return a value of some identifiable type, in
2491 which case the loop aborts and an appropriate `casepart` is run, or it
2492 can return a Boolean, in which case the loop either continues to the
2493 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2494 This is different both from the `ifpart` code block which is expected to
2495 return a Boolean, or the `switchpart` code block which is expected to
2496 return the same type as the casepart values. The correct analysis of
2497 the type of the `whilepart` code block is the reason for the
2498 `Rboolok` flag which is passed to `propagate_types()`.
2500 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2509 struct exec *action;
2510 struct casepart *next;
2512 struct cond_statement {
2514 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2515 struct casepart *casepart;
2518 ###### ast functions
2520 static void free_casepart(struct casepart *cp)
2524 free_exec(cp->value);
2525 free_exec(cp->action);
2532 static void free_cond_statement(struct cond_statement *s)
2536 free_exec(s->forpart);
2537 free_exec(s->condpart);
2538 free_exec(s->dopart);
2539 free_exec(s->thenpart);
2540 free_exec(s->elsepart);
2541 free_casepart(s->casepart);
2545 ###### free exec cases
2546 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2548 ###### ComplexStatement Grammar
2549 | CondStatement ${ $0 = $<1; }$
2554 // both ForThen and Whilepart open scopes, and CondSuffix only
2555 // closes one - so in the first branch here we have another to close.
2556 CondStatement -> ForThen WhilePart CondSuffix ${
2558 $0->forpart = $1.forpart; $1.forpart = NULL;
2559 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2560 $0->condpart = $2.condpart; $2.condpart = NULL;
2561 $0->dopart = $2.dopart; $2.dopart = NULL;
2562 var_block_close(config2context(config), CloseSequential);
2564 | WhilePart CondSuffix ${
2566 $0->condpart = $1.condpart; $1.condpart = NULL;
2567 $0->dopart = $1.dopart; $1.dopart = NULL;
2569 | SwitchPart CondSuffix ${
2573 | IfPart IfSuffix ${
2575 $0->condpart = $1.condpart; $1.condpart = NULL;
2576 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2577 // This is where we close an "if" statement
2578 var_block_close(config2context(config), CloseSequential);
2581 CondSuffix -> IfSuffix ${
2583 // This is where we close scope of the whole
2584 // "for" or "while" statement
2585 var_block_close(config2context(config), CloseSequential);
2587 | CasePart CondSuffix ${
2589 $1->next = $0->casepart;
2594 CasePart -> Newlines case Expression OpenScope Block ${
2595 $0 = calloc(1,sizeof(struct casepart));
2598 var_block_close(config2context(config), CloseParallel);
2600 | case Expression OpenScope Block ${
2601 $0 = calloc(1,sizeof(struct casepart));
2604 var_block_close(config2context(config), CloseParallel);
2608 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2609 | Newlines else OpenScope Block ${
2610 $0 = new(cond_statement);
2612 var_block_close(config2context(config), CloseElse);
2614 | else OpenScope Block ${
2615 $0 = new(cond_statement);
2617 var_block_close(config2context(config), CloseElse);
2619 | Newlines else OpenScope CondStatement ${
2620 $0 = new(cond_statement);
2622 var_block_close(config2context(config), CloseElse);
2624 | else OpenScope CondStatement ${
2625 $0 = new(cond_statement);
2627 var_block_close(config2context(config), CloseElse);
2632 // These scopes are closed in CondSuffix
2633 ForPart -> for OpenScope SimpleStatements ${
2634 $0 = reorder_bilist($<3);
2636 | for OpenScope Block ${
2640 ThenPart -> then OpenScope SimpleStatements ${
2641 $0 = reorder_bilist($<3);
2642 var_block_close(config2context(config), CloseSequential);
2644 | then OpenScope Block ${
2646 var_block_close(config2context(config), CloseSequential);
2649 ThenPartNL -> ThenPart OptNL ${
2653 // This scope is closed in CondSuffix
2654 WhileHead -> while OpenScope Block ${
2659 ForThen -> ForPart OptNL ThenPartNL ${
2667 // This scope is closed in CondSuffix
2668 WhilePart -> while OpenScope Expression Block ${
2669 $0.type = Xcond_statement;
2673 | WhileHead OptNL do Block ${
2674 $0.type = Xcond_statement;
2679 IfPart -> if OpenScope Expression OpenScope Block ${
2680 $0.type = Xcond_statement;
2683 var_block_close(config2context(config), CloseParallel);
2685 | if OpenScope Block OptNL then OpenScope Block ${
2686 $0.type = Xcond_statement;
2689 var_block_close(config2context(config), CloseParallel);
2693 // This scope is closed in CondSuffix
2694 SwitchPart -> switch OpenScope Expression ${
2697 | switch OpenScope Block ${
2701 ###### print exec cases
2703 case Xcond_statement:
2705 struct cond_statement *cs = cast(cond_statement, e);
2706 struct casepart *cp;
2708 do_indent(indent, "for");
2709 if (bracket) printf(" {\n"); else printf(":\n");
2710 print_exec(cs->forpart, indent+1, bracket);
2713 do_indent(indent, "} then {\n");
2715 do_indent(indent, "then:\n");
2716 print_exec(cs->thenpart, indent+1, bracket);
2718 if (bracket) do_indent(indent, "}\n");
2722 if (cs->condpart && cs->condpart->type == Xbinode &&
2723 cast(binode, cs->condpart)->op == Block) {
2725 do_indent(indent, "while {\n");
2727 do_indent(indent, "while:\n");
2728 print_exec(cs->condpart, indent+1, bracket);
2730 do_indent(indent, "} do {\n");
2732 do_indent(indent, "do:\n");
2733 print_exec(cs->dopart, indent+1, bracket);
2735 do_indent(indent, "}\n");
2737 do_indent(indent, "while ");
2738 print_exec(cs->condpart, 0, bracket);
2743 print_exec(cs->dopart, indent+1, bracket);
2745 do_indent(indent, "}\n");
2750 do_indent(indent, "switch");
2752 do_indent(indent, "if");
2753 if (cs->condpart && cs->condpart->type == Xbinode &&
2754 cast(binode, cs->condpart)->op == Block) {
2759 print_exec(cs->condpart, indent+1, bracket);
2761 do_indent(indent, "}\n");
2763 do_indent(indent, "then:\n");
2764 print_exec(cs->thenpart, indent+1, bracket);
2768 print_exec(cs->condpart, 0, bracket);
2774 print_exec(cs->thenpart, indent+1, bracket);
2776 do_indent(indent, "}\n");
2781 for (cp = cs->casepart; cp; cp = cp->next) {
2782 do_indent(indent, "case ");
2783 print_exec(cp->value, -1, 0);
2788 print_exec(cp->action, indent+1, bracket);
2790 do_indent(indent, "}\n");
2793 do_indent(indent, "else");
2798 print_exec(cs->elsepart, indent+1, bracket);
2800 do_indent(indent, "}\n");
2805 ###### propagate exec cases
2806 case Xcond_statement:
2808 // forpart and dopart must return Tnone
2809 // thenpart must return Tnone if there is a dopart,
2810 // otherwise it is like elsepart.
2812 // be bool if there is not casepart
2813 // match casepart->values if there is a switchpart
2814 // either be bool or match casepart->value if there
2816 // elsepart, casepart->action must match there return type
2817 // expected of this statement.
2818 struct cond_statement *cs = cast(cond_statement, prog);
2819 struct casepart *cp;
2821 t = propagate_types(cs->forpart, c, ok, &Tnone, 0);
2822 if (!vtype_compat(&Tnone, t, 0))
2824 t = propagate_types(cs->dopart, c, ok, &Tnone, 0);
2825 if (!vtype_compat(&Tnone, t, 0))
2828 t = propagate_types(cs->thenpart, c, ok, &Tnone, 0);
2829 if (!vtype_compat(&Tnone, t, 0))
2832 if (cs->casepart == NULL)
2833 propagate_types(cs->condpart, c, ok, &Tbool, 0);
2835 /* Condpart must match case values, with bool permitted */
2837 for (cp = cs->casepart;
2838 cp && !t; cp = cp->next)
2839 t = propagate_types(cp->value, c, ok, NULL, 0);
2840 if (!t && cs->condpart)
2841 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
2842 // Now we have a type (I hope) push it down
2844 for (cp = cs->casepart; cp; cp = cp->next)
2845 propagate_types(cp->value, c, ok, t, 0);
2846 propagate_types(cs->condpart, c, ok, t, Rboolok);
2849 // (if)then, else, and case parts must return expected type.
2850 if (!cs->dopart && !type)
2851 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
2853 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
2854 for (cp = cs->casepart;
2857 type = propagate_types(cp->action, c, ok, NULL, rules);
2860 propagate_types(cs->thenpart, c, ok, type, rules);
2861 propagate_types(cs->elsepart, c, ok, type, rules);
2862 for (cp = cs->casepart; cp ; cp = cp->next)
2863 propagate_types(cp->action, c, ok, type, rules);
2869 ###### interp exec cases
2870 case Xcond_statement:
2872 struct value v, cnd;
2873 struct casepart *cp;
2874 struct cond_statement *c = cast(cond_statement, e);
2877 interp_exec(c->forpart);
2880 cnd = interp_exec(c->condpart);
2883 if (!(cnd.type == &Tnone ||
2884 (cnd.type == &Tbool && cnd.bool != 0)))
2886 // cnd is Tnone or Tbool, doesn't need to be freed
2888 interp_exec(c->dopart);
2891 v = interp_exec(c->thenpart);
2892 if (v.type != &Tnone || !c->dopart)
2896 } while (c->dopart);
2898 for (cp = c->casepart; cp; cp = cp->next) {
2899 v = interp_exec(cp->value);
2900 if (value_cmp(v, cnd) == 0) {
2903 return interp_exec(cp->action);
2909 return interp_exec(c->elsepart);
2914 ### Finally the whole program.
2916 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2917 the keyword "program" and a list of variable names which are assigned
2918 values from command line arguments. Following this is a `block` which
2919 is the code to execute.
2921 As this is the top level, several things are handled a bit
2923 The whole program is not interpreted by `interp_exec` as that isn't
2924 passed the argument list which the program requires. Similarly type
2925 analysis is a bit more interesting at this level.
2930 ###### Parser: grammar
2933 Program -> program OpenScope Varlist Block OptNL ${
2936 $0->left = reorder_bilist($<3);
2938 var_block_close(config2context(config), CloseSequential);
2939 if (config2context(config)->scope_stack) abort();
2942 tok_err(config2context(config),
2943 "error: unhandled parse error.", &$1);
2946 Varlist -> Varlist ArgDecl ${
2955 ArgDecl -> IDENTIFIER ${ {
2956 struct variable *v = var_decl(config2context(config), $1.txt);
2963 ###### print binode cases
2965 do_indent(indent, "program");
2966 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2968 print_exec(b2->left, 0, 0);
2974 print_exec(b->right, indent+1, bracket);
2976 do_indent(indent, "}\n");
2979 ###### propagate binode cases
2980 case Program: abort();
2982 ###### core functions
2984 static int analyse_prog(struct exec *prog, struct parse_context *c)
2986 struct binode *b = cast(binode, prog);
2993 propagate_types(b->right, c, &ok, &Tnone, 0);
2998 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2999 struct var *v = cast(var, b->left);
3000 if (!v->var->val.type) {
3001 v->var->where_set = b;
3002 v->var->val = val_init(&Tstr);
3005 b = cast(binode, prog);
3008 propagate_types(b->right, c, &ok, &Tnone, 0);
3013 /* Make sure everything is still consistent */
3014 propagate_types(b->right, c, &ok, &Tnone, 0);
3018 static void interp_prog(struct exec *prog, char **argv)
3020 struct binode *p = cast(binode, prog);
3026 al = cast(binode, p->left);
3028 struct var *v = cast(var, al->left);
3029 struct value *vl = &v->var->val;
3031 if (argv[0] == NULL) {
3032 printf("Not enough args\n");
3035 al = cast(binode, al->right);
3037 *vl = parse_value(vl->type, argv[0]);
3038 if (vl->type == NULL)
3042 v = interp_exec(p->right);
3046 ###### interp binode cases
3047 case Program: abort();
3049 ## And now to test it out.
3051 Having a language requires having a "hello world" program. I'll
3052 provide a little more than that: a program that prints "Hello world"
3053 finds the GCD of two numbers, prints the first few elements of
3054 Fibonacci, and performs a binary search for a number.
3056 ###### File: oceani.mk
3059 @echo "===== TEST ====="
3060 ./oceani --section "test: hello" oceani.mdc 55 33
3065 print "Hello World, what lovely oceans you have!"
3066 /* When a variable is defined in both branches of an 'if',
3067 * and used afterwards, the variables are merged.
3073 print "Is", A, "bigger than", B,"? ", bigger
3074 /* If a variable is not used after the 'if', no
3075 * merge happens, so types can be different
3079 print A, "is more than twice", B, "?", double
3082 print "double", A, "is only", double
3091 print "GCD of", A, "and", B,"is", a
3093 print a, "is not positive, cannot calculate GCD"
3095 print b, "is not positive, cannot calculate GCD"
3100 print "Fibonacci:", f1,f2,
3101 then togo = togo - 1
3109 /* Binary search... */
3114 mid := (lo + hi) / 2
3126 print "Yay, I found", target
3128 print "Closest I found was", mid