1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possible with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
117 #define container_of(ptr, type, member) ({ \
118 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
119 (type *)( (char *)__mptr - offsetof(type,member) );})
121 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 #include <sys/mman.h>
149 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
150 "--section=SectionName prog.ocn\n";
151 static const struct option long_options[] = {
152 {"trace", 0, NULL, 't'},
153 {"print", 0, NULL, 'p'},
154 {"noexec", 0, NULL, 'n'},
155 {"brackets", 0, NULL, 'b'},
156 {"section", 1, NULL, 's'},
159 const char *options = "tpnbs";
160 int main(int argc, char *argv[])
166 char *section = NULL;
167 struct parse_context context = {
169 .ignored = (1 << TK_line_comment)
170 | (1 << TK_block_comment),
171 .number_chars = ".,_+-",
176 int doprint=0, dotrace=0, doexec=1, brackets=0;
179 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
182 case 't': dotrace=1; break;
183 case 'p': doprint=1; break;
184 case 'n': doexec=0; break;
185 case 'b': brackets=1; break;
186 case 's': section = optarg; break;
187 default: fprintf(stderr, Usage);
191 if (optind >= argc) {
192 fprintf(stderr, "oceani: no input file given\n");
195 fd = open(argv[optind], O_RDONLY);
197 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
200 len = lseek(fd, 0, 2);
201 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
202 s = code_extract(file, file+len, NULL);
204 fprintf(stderr, "oceani: could not find any code in %s\n",
210 for (ss = s; ss; ss = ss->next) {
211 struct text sec = ss->section;
212 if (sec.len == strlen(section) &&
213 strncmp(sec.txt, section, sec.len) == 0)
217 prog = parse_oceani(ss->code, &context.config,
218 dotrace ? stderr : NULL);
220 fprintf(stderr, "oceani: cannot find section %s\n",
225 prog = parse_oceani(s->code, &context.config,
226 dotrace ? stderr : NULL);
228 print_exec(*prog, 0, brackets);
229 if (prog && doexec) {
230 if (!analyse_prog(*prog, &context)) {
231 fprintf(stderr, "oceani: type error in program\n");
234 interp_prog(*prog, argv+optind+1);
241 struct section *t = s->next;
252 These four requirements of parse, analyse, print, interpret apply to
253 each language element individually so that is how most of the code
256 Three of the four are fairly self explanatory. The one that requires
257 a little explanation is the analysis step.
259 The current language design does not require (or even allow) the types
260 of variables to be declared, but they must still have a single type.
261 Different operations impose different requirements on the variables,
262 for example addition requires both arguments to be numeric, and
263 assignment requires the variable on the left to have the same type as
264 the expression on the right.
266 Analysis involves propagating these type requirements around and
267 consequently setting the type of each variable. If any requirements
268 are violated (e.g. a string is compared with a number) or if a
269 variable needs to have two different types, then an error is raised
270 and the program will not run.
272 If the same variable is declared in both branchs of an 'if/else', or
273 in all cases of a 'switch' then the multiple instances may be merged
274 into just one variable if the variable is references after the
275 conditional statement. When this happens, the types must naturally be
276 consistent across all the branches. When the variable is not used
277 outside the if, the variables in the different branches are distinct
278 and can be of different types.
280 Determining the types of all variables early is important for
281 processing command line arguments. These can be assigned to any type
282 of variable, but we must first know the correct type so any required
283 conversion can happen. If a variable is associated with a command
284 line argument but no type can be interpreted (e.g. the variable is
285 only ever used in a `print` statement), then the type is set to
288 Undeclared names may only appear in "use" statements and "case" expressions.
289 These names are given a type of "label" and a unique value.
290 This allows them to fill the role of a name in an enumerated type, which
291 is useful for testing the `switch` statement.
293 As we will see, the condition part of a `while` statement can return
294 either a Boolean or some other type. This requires that the expect
295 type that gets passed around comprises a type (`enum vtype`) and a
296 flag to indicate that `Vbool` is also permitted.
298 As there are, as yet, no distinct types that are compatible, there
299 isn't much subtlety in the analysis. When we hav distinct number
300 types, this will become more interesting.
304 One last introductory step before detailing the language elements and
305 providing their four requirements is to establish the data structures
306 to store these elements.
308 There are two key objects that we need to work with: executable
309 elements which comprise the program, and values which the program
310 works with. Between these are the variables in their various scopes
311 which hold the values.
315 Values can be numbers, which we represent as multi-precision
316 fractions, strings, Booleans and labels. When analysing the program
317 we also need to allow for places where no value is meaningful
318 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`
319 which can be anything and `Vnolabel` which can be anything except a
320 label). A 2 character 'tail' is included in each value as the scanner
321 wants to parse that from the end of numbers and we need somewhere to
322 put it. It is currently ignored but one day might allow for
323 e.g. "imaginary" numbers.
325 Values are never shared, they are always copied when used, and freed
326 when no longer needed.
328 When propagating type information around the program, we need to
329 determine if two types are compatible, where `Vunknown` is compatible
330 which anything, and `Vnolabel` is compatible with anything except a
331 label. A separate funtion to encode this rule will simplify some code
334 When assigning command line arguments to variable, we need to be able
335 to parse each type from a string.
343 myLDLIBS := libnumber.o libstring.o -lgmp
344 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
348 enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
359 static void free_value(struct value v)
364 case Vunknown: break;
365 case Vstr: free(v.str.txt); break;
366 case Vnum: mpq_clear(v.num); break;
372 static int vtype_compat(enum vtype require, enum vtype have, int bool_permitted)
374 if (bool_permitted && have == Vbool)
378 return have != Vlabel;
382 return have == Vunknown || require == have;
386 ###### value functions
388 static void val_init(struct value *val, enum vtype type)
394 case Vunknown: break;
396 mpq_init(val->num); break;
398 val->str.txt = malloc(1);
410 static struct value dup_value(struct value v)
417 case Vunknown: break;
426 mpq_set(rv.num, v.num);
429 rv.str.len = v.str.len;
430 rv.str.txt = malloc(rv.str.len);
431 memcpy(rv.str.txt, v.str.txt, v.str.len);
437 static int value_cmp(struct value left, struct value right)
440 if (left.vtype != right.vtype)
441 return left.vtype - right.vtype;
442 switch (left.vtype) {
443 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
444 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
445 case Vstr: cmp = text_cmp(left.str, right.str); break;
446 case Vbool: cmp = left.bool - right.bool; break;
449 case Vunknown: cmp = 0;
454 static struct text text_join(struct text a, struct text b)
457 rv.len = a.len + b.len;
458 rv.txt = malloc(rv.len);
459 memcpy(rv.txt, a.txt, a.len);
460 memcpy(rv.txt+a.len, b.txt, b.len);
464 static void print_value(struct value v)
468 printf("*Unknown*"); break;
471 printf("*no-value*"); break;
473 printf("*label-%p*", v.label); break;
475 printf("%.*s", v.str.len, v.str.txt); break;
477 printf("%s", v.bool ? "True":"False"); break;
482 mpf_set_q(fl, v.num);
483 gmp_printf("%Fg", fl);
490 static int parse_value(struct value *vl, char *arg)
501 vl->str.len = strlen(arg);
502 vl->str.txt = malloc(vl->str.len);
503 memcpy(vl->str.txt, arg, vl->str.len);
510 tx.txt = arg; tx.len = strlen(tx.txt);
511 if (number_parse(vl->num, vl->tail, tx) == 0)
514 mpq_neg(vl->num, vl->num);
517 if (strcasecmp(arg, "true") == 0 ||
518 strcmp(arg, "1") == 0)
520 else if (strcasecmp(arg, "false") == 0 ||
521 strcmp(arg, "0") == 0)
524 printf("Bad bool: %s\n", arg);
534 Variables are scoped named values. We store the names in a linked
535 list of "bindings" sorted lexically, and use sequential search and
542 struct binding *next; // in lexical order
546 This linked list is stored in the parse context so that "reduce"
547 functions can find or add variables, and so the analysis phase can
548 ensure that every variable gets a type.
552 struct binding *varlist; // In lexical order
556 static struct binding *find_binding(struct parse_context *c, struct text s)
558 struct binding **l = &c->varlist;
563 (cmp = text_cmp((*l)->name, s)) < 0)
567 n = calloc(1, sizeof(*n));
574 Each name can be linked to multiple variables defined in different
575 scopes. Each scope starts where the name is declared and continues
576 until the end of the containing code block. Scopes of a given name
577 cannot nest, so a declaration while a name is in-scope is an error.
579 ###### binding fields
580 struct variable *var;
584 struct variable *previous;
586 struct binding *name;
590 While the naming seems strange, we include local constants in the
591 definition of variables. A name declared `var := value` can
592 subsequently be changed, but a name declared `var ::= value` cannot -
595 ###### variable fields
598 Scopes in parallel branches can be partially merged. More
599 specifically, if a given name is declared in both branches of an
600 if/else then it's scope is a candidate for merging. Similarly if
601 every branch of an exhaustive switch (e.g. has an "else" clause)
602 declares a given name, then the scopes from the branches are
603 candidates for merging.
605 Note that names declared inside a loop (which is only parallel to
606 itself) are never visible after the loop. Similarly names defined in
607 scopes which are not parallel, such as those started by `for` and
608 `switch`, are never visible after the scope. Only variable defined in
609 both `then` and `else` (including the implicit then after an `if`, and
610 excluding `then` used with `for`) and in all `case`s and `else` of a
611 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
613 Labels, which are a bit like variables, follow different rules.
614 Labels are not explicitly declared, but if an undeclared name appears
615 in a context where a label is legal, that effectively declares the
616 name as a label. The declaration remains in force (or in scope) at
617 least to the end of the immediately containing block and conditionally
618 in any larger containing block which does not declare the name in some
619 other way. Importantly, the conditional scope extension happens even
620 if the label is only used in parallel branch of a conditional -- when
621 used in one branch it is treated as having been declared in all
624 Merge candidates are tentatively visible beyond the end of the
625 branching statement which creates them. If the name is used, the
626 merge is affirmed and they become a single variable visible at the
627 outer layer. If not - if it is redeclared first - the merge lapses.
629 To track scopes we have an extra stack, implemented as a linked list,
630 which roughly parallels the parse stack and which is used exclusively
631 for scoping. When a new scope is opened, a new frame is pushed and
632 the child-count of the parent frame is incremented. This child-count
633 is used to distinguish between the first of a set of parallel scopes,
634 in which declared variables must not be in scope, and subsequent
635 branches, whether they must already be conditionally scoped.
637 To push a new frame *before* any code in the frame is parsed, we need a
638 grammar reduction. This is most easily achieved with a grammar
639 element which derives the empty string, and created the new scope when
640 it is recognized. This can be placed, for example, between a keyword
641 like "if" and the code following it.
645 struct scope *parent;
651 struct scope *scope_stack;
654 static void scope_pop(struct parse_context *c)
656 struct scope *s = c->scope_stack;
658 c->scope_stack = s->parent;
663 static void scope_push(struct parse_context *c)
665 struct scope *s = calloc(1, sizeof(*s));
667 c->scope_stack->child_count += 1;
668 s->parent = c->scope_stack;
676 OpenScope -> ${ scope_push(config2context(config)); }$
679 Each variable records a scope depth and is in one of four states:
681 - "in scope". This is the case between the declaration of the
682 variable and the end of the containing block, and also between
683 the usage with affirms a merge and the end of the block.
685 The scope depth is not greater than the current parse context scope
686 nest depth. When the block of that depth closes, the state will
687 change. To achieve this, all "in scope" variables are linked
688 together as a stack in nesting order.
690 - "pending". The "in scope" block has closed, but other parallel
691 scopes are still being processed. So far, every parallel block at
692 the same level that has closed has declared the name.
694 The scope depth is the depth of the last parallel block that
695 enclosed the declaration, and that has closed.
697 - "conditionally in scope". The "in scope" block and all parallel
698 scopes have closed, and no further mention of the name has been
699 seen. This state includes a secondary nest depth which records the
700 outermost scope seen since the variable became conditionally in
701 scope. If a use of the name is found, the variable becomes "in
702 scope" and that secondary depth becomes the recorded scope depth.
703 If the name is declared as a new variable, the old variable becomes
704 "out of scope" and the recorded scope depth stays unchanged.
706 - "out of scope". The variable is neither in scope nor conditionally
707 in scope. It is permanently out of scope now and can be removed from
708 the "in scope" stack.
711 ###### variable fields
712 int depth, min_depth;
713 enum { OutScope, PendingScope, CondScope, InScope } scope;
714 struct variable *in_scope;
718 struct variable *in_scope;
720 All variables with the same name are linked together using the
721 'previous' link. Those variable that have
722 been affirmatively merged all have a 'merged' pointer that points to
723 one primary variable - the most recently declared instance. When
724 merging variables, we need to also adjust the 'merged' pointer on any
725 other variables that had previously been merged with the one that will
726 no longer be primary.
728 ###### variable fields
729 struct variable *merged;
733 static void variable_merge(struct variable *primary, struct variable *secondary)
739 primary = primary->merged;
741 for (v = primary->previous; v; v=v->previous)
742 if (v == secondary || v == secondary->merged ||
743 v->merged == secondary ||
744 (v->merged && v->merged == secondary->merged)) {
752 while (context.varlist) {
753 struct binding *b = context.varlist;
754 struct variable *v = b->var;
755 context.varlist = b->next;
758 struct variable *t = v;
766 #### Manipulating Bindings
768 When a name is conditionally visible, a new declaration discards the
769 old binding - the condition lapses. Conversely a usage of the name
770 affirms the visibility and extends it to the end of the containing
771 block - i.e. the block that contains both the original declaration and
772 the latest usage. This is determined from `min_depth`. When a
773 conditionally visible variable gets affirmed like this, it is also
774 merged with other conditionally visible variables with the same name.
776 When we parse a variable declaration we either signal an error if the
777 name is currently bound, or create a new variable at the current nest
778 depth if the name is unbound or bound to a conditionally scoped or
779 pending-scope variable. If the previous variable was conditionally
780 scoped, it and its homonyms becomes out-of-scope.
782 When we parse a variable reference (including non-declarative
783 assignment) we signal an error if the name is not bound or is bound to
784 a pending-scope variable; update the scope if the name is bound to a
785 conditionally scoped variable; or just proceed normally if the named
786 variable is in scope.
788 When we exit a scope, any variables bound at this level are either
789 marked out of scope or pending-scoped, depending on whether the
790 scope was sequential or parallel.
792 When exiting a parallel scope we check if there are any variables that
793 were previously pending and are still visible. If there are, then
794 there weren't redeclared in the most recent scope, so they cannot be
795 merged and must become out-of-scope. If it is not the first of
796 parallel scopes (based on `child_count`), we check that there was a
797 previous binding that is still pending-scope. If there isn't, the new
798 variable must now be out-of-scope.
800 When exiting a sequential scope that immediately enclosed parallel
801 scopes, we need to resolve any pending-scope variables. If there was
802 no `else` clause, and we cannot determine that the `switch` was exhaustive,
803 we need to mark all pending-scope variable as out-of-scope. Otherwise
804 all pending-scope variables become conditionally scoped.
807 enum closetype { CloseSequential, CloseParallel, CloseElse };
811 static struct variable *var_decl(struct parse_context *c, struct text s)
813 struct binding *b = find_binding(c, s);
814 struct variable *v = b->var;
816 switch (v ? v->scope : OutScope) {
818 /* Signal error ... once I build error signalling support */
822 v && v->scope == CondScope;
828 v = calloc(1, sizeof(*v));
829 v->previous = b->var;
832 v->min_depth = v->depth = c->scope_depth;
834 v->in_scope = c->in_scope;
836 val_init(&v->val, Vunknown);
840 static struct variable *var_ref(struct parse_context *c, struct text s)
842 struct binding *b = find_binding(c, s);
843 struct variable *v = b->var;
846 switch (v ? v->scope : OutScope) {
849 /* Signal an error - once that is possible */
852 /* All CondScope variables of this name need to be merged
855 v->depth = v->min_depth;
857 for (v2 = v->previous;
858 v2 && v2->scope == CondScope;
860 variable_merge(v, v2);
868 static void var_block_close(struct parse_context *c, enum closetype ct)
870 /* close of all variables that are in_scope */
871 struct variable *v, **vp, *v2;
874 for (vp = &c->in_scope;
875 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
879 case CloseParallel: /* handle PendingScope */
883 if (c->scope_stack->child_count == 1)
884 v->scope = PendingScope;
885 else if (v->previous &&
886 v->previous->scope == PendingScope)
887 v->scope = PendingScope;
888 else if (v->val.vtype == Vlabel)
889 v->scope = PendingScope;
890 else if (v->name->var == v)
892 if (ct == CloseElse) {
893 /* All Pending variables with this name
894 * are now Conditional */
896 v2 && v2->scope == PendingScope;
898 v2->scope = CondScope;
903 v2 && v2->scope == PendingScope;
905 if (v2->val.vtype != Vlabel)
906 v2->scope = OutScope;
908 case OutScope: break;
911 case CloseSequential:
912 if (v->val.vtype == Vlabel)
913 v->scope = PendingScope;
919 /* There was no 'else', so we can only become
920 * conditional if we know the cases were exhaustive,
921 * and that doesn't mean anything yet.
922 * So only labels become conditional..
925 v2 && v2->scope == PendingScope;
927 if (v2->val.vtype == Vlabel) {
928 v2->scope = CondScope;
929 v2->min_depth = c->scope_depth;
931 v2->scope = OutScope;
934 case OutScope: break;
938 if (v->scope == OutScope)
947 Executables can be lots of different things. In many cases an
948 executable is just an operation combined with one or two other
949 executables. This allows for expressions and lists etc. Other times
950 an executable is something quite specific like a constant or variable
951 name. So we define a `struct exec` to be a general executable with a
952 type, and a `struct binode` which is a subclass of `exec` and forms a
953 node in a binary tree and holding an operation. There will be other
954 subclasses, and to access these we need to be able to `cast` the
955 `exec` into the various other types.
958 #define cast(structname, pointer) ({ \
959 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
960 if (__mptr && *__mptr != X##structname) abort(); \
961 (struct structname *)( (char *)__mptr);})
963 #define new(structname) ({ \
964 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
965 __ptr->type = X##structname; \
974 enum exec_types type;
981 struct exec *left, *right;
984 Each different type of `exec` node needs a number of functions
985 defined, a bit like methods. We must be able to be able to free it,
986 print it, analyse it and execute it. Once we have specific `exec`
987 types we will need to parse them too. Let's take this a bit more
992 The parser generator requires a `free_foo` function for each struct
993 that stores attributes and they will be `exec`s of subtypes there-of.
994 So we need `free_exec` which can handle all the subtypes, and we need
999 static void free_binode(struct binode *b)
1004 free_exec(b->right);
1008 ###### core functions
1009 static void free_exec(struct exec *e)
1018 ###### forward decls
1020 static void free_exec(struct exec *e);
1022 ###### free exec cases
1023 case Xbinode: free_binode(cast(binode, e)); break;
1027 Printing an `exec` requires that we know the current indent level for
1028 printing line-oriented components. As will become clear later, we
1029 also want to know what sort of bracketing to use.
1031 ###### ast functions
1033 static void do_indent(int i, char *str)
1040 ###### core functions
1041 static void print_binode(struct binode *b, int indent, int bracket)
1045 ## print binode cases
1049 static void print_exec(struct exec *e, int indent, int bracket)
1053 print_binode(cast(binode, e), indent, bracket); break;
1058 ###### forward decls
1060 static void print_exec(struct exec *e, int indent, int bracket);
1064 As discussed, analysis involves propagating type requirements around
1065 the program and looking for errors.
1067 So `propagate_types` is passed an expected type (being a `vtype`
1068 together with a `bool_permitted` flag) that the `exec` is expected to
1069 return, and returns the type that it does return, either of which can
1070 be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
1071 when an error is found, and `2` when any change is made. If it
1072 remains unchanged at `1`, then no more propagation is needed.
1074 ###### core functions
1076 static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1077 enum vtype type, int bool_permitted)
1084 switch (prog->type) {
1087 struct binode *b = cast(binode, prog);
1089 ## propagate binode cases
1093 ## propagate exec cases
1100 Interpreting an `exec` doesn't require anything but the `exec`. State
1101 is stored in variables and each variable will be directly linked from
1102 within the `exec` tree. The exception to this is the whole `program`
1103 which needs to look at command line arguments. The `program` will be
1104 interpreted separately.
1106 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1108 ###### core functions
1110 static struct value interp_exec(struct exec *e)
1120 struct binode *b = cast(binode, e);
1121 struct value left, right;
1122 left.vtype = right.vtype = Vnone;
1124 ## interp binode cases
1126 free_value(left); free_value(right);
1129 ## interp exec cases
1134 ## Language elements
1136 Each language element needs to be parsed, printed, analysed,
1137 interpreted, and freed. There are several, so let's just start with
1138 the easy ones and work our way up.
1142 We have already met values as separate objects. When manifest
1143 constants appear in the program text that must result in an executable
1144 which has a constant value. So the `val` structure embeds a value in
1161 $0->val.vtype = Vbool;
1166 $0->val.vtype = Vbool;
1171 $0->val.vtype = Vnum;
1172 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
1173 mpq_init($0->val.num);
1177 $0->val.vtype = Vstr;
1178 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1182 $0->val.vtype = Vstr;
1183 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1186 ###### print exec cases
1189 struct val *v = cast(val, e);
1190 if (v->val.vtype == Vstr)
1192 print_value(v->val);
1193 if (v->val.vtype == Vstr)
1198 ###### propagate exec cases
1201 struct val *val = cast(val, prog);
1202 if (!vtype_compat(type, val->val.vtype, bool_permitted))
1204 return val->val.vtype;
1207 ###### interp exec cases
1209 return dup_value(cast(val, e)->val);
1211 ###### ast functions
1212 static void free_val(struct val *v)
1220 ###### free exec cases
1221 case Xval: free_val(cast(val, e)); break;
1223 ###### ast functions
1224 // Move all nodes from 'b' to 'rv', reversing the order.
1225 // In 'b' 'left' is a list, and 'right' is the last node.
1226 // In 'rv', left' is the first node and 'right' is a list.
1227 static struct binode *reorder_bilist(struct binode *b)
1229 struct binode *rv = NULL;
1232 struct exec *t = b->right;
1236 b = cast(binode, b->left);
1246 Just as we used as `val` to wrap a value into an `exec`, we similarly
1247 need a `var` to wrap a `variable` into an exec. While each `val`
1248 contained a copy of the value, each `var` hold a link to the variable
1249 because it really is the same variable no matter where it appears.
1250 When a variable is used, we need to remember to follow the `->merged`
1251 link to find the primary instance.
1259 struct variable *var;
1265 VariableDecl -> IDENTIFIER := ${ {
1266 struct variable *v = var_decl(config2context(config), $1.txt);
1270 | IDENTIFIER ::= ${ {
1271 struct variable *v = var_decl(config2context(config), $1.txt);
1277 Variable -> IDENTIFIER ${ {
1278 struct variable *v = var_ref(config2context(config), $1.txt);
1280 /* This might be a label - allocate a var just in case */
1281 v = var_decl(config2context(config), $1.txt);
1283 val_init(&v->val, Vlabel);
1289 ###### print exec cases
1292 struct var *v = cast(var, e);
1294 struct binding *b = v->var->name;
1295 printf("%.*s", b->name.len, b->name.txt);
1300 ###### propagate exec cases
1304 struct var *var = cast(var, prog);
1305 struct variable *v = var->var;
1312 if (v->val.vtype == Vunknown) {
1313 if (type > Vunknown && *ok != 0) {
1314 val_init(&v->val, type);
1319 if (!vtype_compat(type, v->val.vtype, bool_permitted))
1321 if (type <= Vunknown)
1322 return v->val.vtype;
1326 ###### interp exec cases
1329 struct var *var = cast(var, e);
1330 struct variable *v = var->var;
1334 return dup_value(v->val);
1337 ###### ast functions
1339 static void free_var(struct var *v)
1344 ###### free exec cases
1345 case Xvar: free_var(cast(var, e)); break;
1347 ### Expressions: Boolean
1349 Our first user of the `binode` will be expressions, and particularly
1350 Boolean expressions. As I haven't implemented precedence in the
1351 parser generator yet, we need different names from each precedence
1352 level used by expressions. The outer most or lowest level precedence
1353 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1364 Expression -> Expression or BTerm ${
1370 | BTerm ${ $0 = $<1; }$
1372 BTerm -> BTerm and BFact ${
1378 | BFact ${ $0 = $<1; }$
1380 BFact -> not BFact ${
1387 ###### print binode cases
1389 print_exec(b->left, -1, 0);
1391 print_exec(b->right, -1, 0);
1394 print_exec(b->left, -1, 0);
1396 print_exec(b->right, -1, 0);
1400 print_exec(b->right, -1, 0);
1403 ###### propagate binode cases
1407 /* both must be Vbool, result is Vbool */
1408 propagate_types(b->left, c, ok, Vbool, 0);
1409 propagate_types(b->right, c, ok, Vbool, 0);
1410 if (type != Vbool && type > Vunknown)
1414 ###### interp binode cases
1416 rv = interp_exec(b->left);
1417 right = interp_exec(b->right);
1418 rv.bool = rv.bool && right.bool;
1421 rv = interp_exec(b->left);
1422 right = interp_exec(b->right);
1423 rv.bool = rv.bool || right.bool;
1426 rv = interp_exec(b->right);
1430 ### Expressions: Comparison
1432 Of slightly higher precedence that Boolean expressions are
1434 A comparison takes arguments of any type, but the two types must be
1437 To simplify the parsing we introduce an `eop` which can return an
1438 expression operator.
1445 ###### ast functions
1446 static void free_eop(struct eop *e)
1461 | Expr CMPop Expr ${
1467 | Expr ${ $0 = $<1; }$
1472 CMPop -> < ${ $0.op = Less; }$
1473 | > ${ $0.op = Gtr; }$
1474 | <= ${ $0.op = LessEq; }$
1475 | >= ${ $0.op = GtrEq; }$
1476 | == ${ $0.op = Eql; }$
1477 | != ${ $0.op = NEql; }$
1479 ###### print binode cases
1487 print_exec(b->left, -1, 0);
1489 case Less: printf(" < "); break;
1490 case LessEq: printf(" <= "); break;
1491 case Gtr: printf(" > "); break;
1492 case GtrEq: printf(" >= "); break;
1493 case Eql: printf(" == "); break;
1494 case NEql: printf(" != "); break;
1497 print_exec(b->right, -1, 0);
1500 ###### propagate binode cases
1507 /* Both must match but not labels, result is Vbool */
1508 t = propagate_types(b->left, c, ok, Vnolabel, 0);
1510 propagate_types(b->right, c, ok, t, 0);
1512 t = propagate_types(b->right, c, ok, Vnolabel, 0);
1514 t = propagate_types(b->left, c, ok, t, 0);
1516 if (!vtype_compat(type, Vbool, 0))
1520 ###### interp binode cases
1529 left = interp_exec(b->left);
1530 right = interp_exec(b->right);
1531 cmp = value_cmp(left, right);
1534 case Less: rv.bool = cmp < 0; break;
1535 case LessEq: rv.bool = cmp <= 0; break;
1536 case Gtr: rv.bool = cmp > 0; break;
1537 case GtrEq: rv.bool = cmp >= 0; break;
1538 case Eql: rv.bool = cmp == 0; break;
1539 case NEql: rv.bool = cmp != 0; break;
1540 default: rv.bool = 0; break;
1545 ### Expressions: The rest
1547 The remaining expressions with the highest precedence are arithmetic
1548 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1549 The `Factor` is where the `Value` and `Variable` that we already have
1552 `+` and `-` are both infix and prefix operations (where they are
1553 absolute value and negation). These have different operator names.
1555 We also have a 'Bracket' operator which records where parentheses were
1556 found. This make it easy to reproduce these when printing. Once
1557 precedence is handled better I might be able to discard this.
1569 Expr -> Expr Eop Term ${
1575 | Term ${ $0 = $<1; }$
1577 Term -> Term Top Factor ${
1583 | Factor ${ $0 = $<1; }$
1585 Factor -> ( Expression ) ${
1595 | Value ${ $0 = (struct binode *)$<1; }$
1596 | Variable ${ $0 = (struct binode *)$<1; }$
1599 Eop -> + ${ $0.op = Plus; }$
1600 | - ${ $0.op = Minus; }$
1602 Uop -> + ${ $0.op = Absolute; }$
1603 | - ${ $0.op = Negate; }$
1605 Top -> * ${ $0.op = Times; }$
1606 | / ${ $0.op = Divide; }$
1607 | ++ ${ $0.op = Concat; }$
1609 ###### print binode cases
1615 print_exec(b->left, indent, 0);
1617 case Plus: printf(" + "); break;
1618 case Minus: printf(" - "); break;
1619 case Times: printf(" * "); break;
1620 case Divide: printf(" / "); break;
1621 case Concat: printf(" ++ "); break;
1624 print_exec(b->right, indent, 0);
1628 print_exec(b->right, indent, 0);
1632 print_exec(b->right, indent, 0);
1636 print_exec(b->right, indent, 0);
1640 ###### propagate binode cases
1645 /* both must be numbers, result is Vnum */
1648 /* as propagate_types ignores a NULL,
1649 * unary ops fit here too */
1650 propagate_types(b->left, c, ok, Vnum, 0);
1651 propagate_types(b->right, c, ok, Vnum, 0);
1652 if (!vtype_compat(type, Vnum, 0))
1657 /* both must be Vstr, result is Vstr */
1658 propagate_types(b->left, c, ok, Vstr, 0);
1659 propagate_types(b->right, c, ok, Vstr, 0);
1660 if (!vtype_compat(type, Vstr, 0))
1665 return propagate_types(b->right, c, ok, type, 0);
1667 ###### interp binode cases
1670 rv = interp_exec(b->left);
1671 right = interp_exec(b->right);
1672 mpq_add(rv.num, rv.num, right.num);
1675 rv = interp_exec(b->left);
1676 right = interp_exec(b->right);
1677 mpq_sub(rv.num, rv.num, right.num);
1680 rv = interp_exec(b->left);
1681 right = interp_exec(b->right);
1682 mpq_mul(rv.num, rv.num, right.num);
1685 rv = interp_exec(b->left);
1686 right = interp_exec(b->right);
1687 mpq_div(rv.num, rv.num, right.num);
1690 rv = interp_exec(b->right);
1691 mpq_neg(rv.num, rv.num);
1694 rv = interp_exec(b->right);
1695 mpq_abs(rv.num, rv.num);
1698 rv = interp_exec(b->right);
1701 left = interp_exec(b->left);
1702 right = interp_exec(b->right);
1704 rv.str = text_join(left.str, right.str);
1707 ### Blocks, Statements, and Statement lists.
1709 Now that we have expressions out of the way we need to turn to
1710 statements. There are simple statements and more complex statements.
1711 Simple statements do not contain newlines, complex statements do.
1713 Statements often come in sequences and we have corresponding simple
1714 statement lists and complex statement lists.
1715 The former comprise only simple statements separated by semicolons.
1716 The later comprise complex statements and simple statement lists. They are
1717 separated by newlines. Thus the semicolon is only used to separate
1718 simple statements on the one line. This may be overly restrictive,
1719 but I'm not sure I every want a complex statement to share a line with
1722 Note that a simple statement list can still use multiple lines if
1723 subsequent lines are indented, so
1725 ###### Example: wrapped simple statement list
1730 is a single simple statement list. This might allow room for
1731 confusion, so I'm not set on it yet.
1733 A simple statement list needs no extra syntax. A complex statement
1734 list has two syntactic forms. It can be enclosed in braces (much like
1735 C blocks), or it can be introduced by a colon and continue until an
1736 unindented newline (much like Python blocks). With this extra syntax
1737 it is referred to as a block.
1739 Note that a block does not have to include any newlines if it only
1740 contains simple statements. So both of:
1742 if condition: a=b; d=f
1744 if condition { a=b; print f }
1748 In either case the list is constructed from a `binode` list with
1749 `Block` as the operator. When parsing the list it is most convenient
1750 to append to the end, so a list is a list and a statement. When using
1751 the list it is more convenient to consider a list to be a statement
1752 and a list. So we need a function to re-order a list.
1753 `reorder_bilist` serves this purpose.
1755 The only stand-alone statement we introduce at this stage is `pass`
1756 which does nothing and is represented as a `NULL` pointer in a `Block`
1776 Block -> Open Statementlist Close ${ $0 = $<2; }$
1777 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1778 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1779 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1780 | : Statementlist ${ $0 = $<2; }$
1781 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1783 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1785 ComplexStatements -> ComplexStatements ComplexStatement ${
1791 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1792 | ComplexStatement ${
1800 ComplexStatement -> SimpleStatements NEWLINE ${
1801 $0 = reorder_bilist($<1);
1803 ## ComplexStatement Grammar
1806 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1812 | SimpleStatement ${
1818 | SimpleStatements ; ${ $0 = $<1; }$
1820 SimpleStatement -> pass ${ $0 = NULL; }$
1821 ## SimpleStatement Grammar
1823 ###### print binode cases
1827 if (b->left == NULL)
1830 print_exec(b->left, indent, 0);
1833 print_exec(b->right, indent, 0);
1836 // block, one per line
1837 if (b->left == NULL)
1838 do_indent(indent, "pass\n");
1840 print_exec(b->left, indent, bracket);
1842 print_exec(b->right, indent, bracket);
1846 ###### propagate binode cases
1849 /* If any statement returns something other then Vnone
1850 * then all such must return same type.
1851 * As each statement may be Vnone or something else,
1852 * we must always pass Vunknown down, otherwise an incorrect
1853 * error might occur. We never return Vnone unless it is
1858 for (e = b; e; e = cast(binode, e->right)) {
1859 t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
1860 if (bool_permitted && t == Vbool)
1862 if (t != Vunknown && t != Vnone) {
1863 if (type == Vunknown)
1872 ###### interp binode cases
1874 while (rv.vtype == Vnone &&
1877 rv = interp_exec(b->left);
1878 b = cast(binode, b->right);
1882 ### The Print statement
1884 `print` is a simple statement that takes a comma-separated list of
1885 expressions and prints the values separated by spaces and terminated
1886 by a newline. No control of formatting is possible.
1888 `print` faces the same list-ordering issue as blocks, and uses the
1894 ###### SimpleStatement Grammar
1896 | print ExpressionList ${
1897 $0 = reorder_bilist($<2);
1899 | print ExpressionList , ${
1904 $0 = reorder_bilist($0);
1915 ExpressionList -> ExpressionList , Expression ${
1928 ###### print binode cases
1931 do_indent(indent, "print");
1935 print_exec(b->left, -1, 0);
1939 b = cast(binode, b->right);
1945 ###### propagate binode cases
1948 /* don't care but all must be consistent */
1949 propagate_types(b->left, c, ok, Vnolabel, 0);
1950 propagate_types(b->right, c, ok, Vnolabel, 0);
1953 ###### interp binode cases
1959 for ( ; b; b = cast(binode, b->right))
1963 left = interp_exec(b->left);
1976 ###### Assignment statement
1978 An assignment will assign a value to a variable, providing it hasn't
1979 be declared as a constant. The analysis phase ensures that the type
1980 will be correct so the interpreter just needs to perform the
1981 calculation. There is a form of assignment which declares a new
1982 variable as well as assigning a value. If a name is assigned before
1983 it is declared, and error will be raised as the name is created as
1984 `Vlabel` and it is illegal to assign to such names.
1990 ###### SimpleStatement Grammar
1991 | Variable = Expression ${ {
1992 struct var *v = cast(var, $1);
1998 if (v->var && !v->var->constant) {
2002 | VariableDecl Expression ${
2009 ###### print binode cases
2012 do_indent(indent, "");
2013 print_exec(b->left, indent, 0);
2015 print_exec(b->right, indent, 0);
2021 do_indent(indent, "");
2022 print_exec(b->left, indent, 0);
2023 if (cast(var, b->left)->var->constant)
2027 print_exec(b->right, indent, 0);
2032 ###### propagate binode cases
2036 /* Both must match and not be labels, result is Vnone */
2037 t = propagate_types(b->left, c, ok, Vnolabel, 0);
2039 propagate_types(b->right, c, ok, t, 0);
2041 t = propagate_types(b->right, c, ok, Vnolabel, 0);
2043 t = propagate_types(b->left, c, ok, t, 0);
2049 ###### interp binode cases
2054 struct variable *v = cast(var, b->left)->var;
2057 right = interp_exec(b->right);
2060 right.vtype = Vunknown;
2064 ### The `use` statement
2066 The `use` statement is the last "simple" statement. It is needed when
2067 the condition in a conditional statement is a block. `use` works much
2068 like `return` in C, but only completes the `condition`, not the whole
2074 ###### SimpleStatement Grammar
2081 ###### print binode cases
2084 do_indent(indent, "use ");
2085 print_exec(b->right, -1, 0);
2090 ###### propagate binode cases
2093 /* result matches value */
2094 return propagate_types(b->right, c, ok, type, 0);
2096 ###### interp binode cases
2099 rv = interp_exec(b->right);
2102 ### The Conditional Statement
2104 This is the biggy and currently the only complex statement. This
2105 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2106 It is comprised of a number of parts, all of which are optional though
2107 set combinations apply. Each part is (usually) a key word (`then` is
2108 sometimes optional) followed by either an expression of a code block,
2109 except the `casepart` which is a "key word and an expression" followed
2110 by a code block. The code-block option is valid for all parts and,
2111 where an expression is also allowed, the code block can use the `use`
2112 statement to report a value. If the code block does no report a value
2113 the effect is similar to reporting `False`.
2115 The `else` and `case` parts, as well as `then` when combined with
2116 `if`, can contain a `use` statement which will apply to some
2117 containing conditional statement. `for` parts, `do` parts and `then`
2118 parts used with `for` can never contain a `use`, except in some
2119 subordinate conditional statement.
2121 If there is a `forpart`, it is executed first, only once.
2122 If there is a `dopart`, then it is executed repeatedly providing
2123 always that the `condpart` or `cond`, if present, does not return a non-True
2124 value. `condpart` can fail to return any value if it simply executes
2125 to completion. This is treated the same as returning `True`.
2127 If there is a `thenpart` it will be executed whenever the `condpart`
2128 or `cond` returns True (or does not return any value), but this will happen
2129 *after* `dopart` (when present).
2131 If `elsepart` is present it will be executed at most once when the
2132 condition returns `False` or some value that isn't `True` and isn't
2133 matched by any `casepart`. If there are any `casepart`s, they will be
2134 executed when the condition returns a matching value.
2136 The particular sorts of values allowed in case parts has not yet been
2137 determined in the language design, so nothing is prohibited.
2139 The various blocks in this complex statement potentially provide scope
2140 for variables as described earlier. Each such block must include the
2141 "OpenScope" nonterminal before parsing the block, and must call
2142 `var_block_close()` when closing the block.
2144 The code following "`if`", "`switch`" and "`for`" does not get its own
2145 scope, but is in a scope covering the whole statement, so names
2146 declared there cannot be redeclared elsewhere. Similarly the
2147 condition following "`while`" is in a scope the covers the body
2148 ("`do`" part) of the loop, and which does not allow conditional scope
2149 extension. Code following "`then`" (both looping and non-looping),
2150 "`else`" and "`case`" each get their own local scope.
2152 The type requirements on the code block in a `whilepart` are quite
2153 unusal. It is allowed to return a value of some identifiable type, in
2154 which case the loop abort and an appropriate `casepart` is run, or it
2155 can return a Boolean, in which case the loop either continues to the
2156 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2157 This is different both from the `ifpart` code block which is expected to
2158 return a Boolean, or the `switchpart` code block which is expected to
2159 return the same type as the casepart values. The correct analysis of
2160 the type of the `whilepart` code block is the reason for the
2161 `bool_permitted` flag which is passed to `propagate_types()`.
2163 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2172 struct exec *action;
2173 struct casepart *next;
2175 struct cond_statement {
2177 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2178 struct casepart *casepart;
2181 ###### ast functions
2183 static void free_casepart(struct casepart *cp)
2187 free_exec(cp->value);
2188 free_exec(cp->action);
2195 static void free_cond_statement(struct cond_statement *s)
2199 free_exec(s->forpart);
2200 free_exec(s->condpart);
2201 free_exec(s->dopart);
2202 free_exec(s->thenpart);
2203 free_exec(s->elsepart);
2204 free_casepart(s->casepart);
2208 ###### free exec cases
2209 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2211 ###### ComplexStatement Grammar
2212 | CondStatement ${ $0 = $<1; }$
2217 // both ForThen and Whilepart open scopes, and CondSuffix only
2218 // closes one - so in the first branch here we have another to close.
2219 CondStatement -> ForThen WhilePart CondSuffix ${
2221 $0->forpart = $1.forpart; $1.forpart = NULL;
2222 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2223 $0->condpart = $2.condpart; $2.condpart = NULL;
2224 $0->dopart = $2.dopart; $2.dopart = NULL;
2225 var_block_close(config2context(config), CloseSequential);
2227 | WhilePart CondSuffix ${
2229 $0->condpart = $1.condpart; $1.condpart = NULL;
2230 $0->dopart = $1.dopart; $1.dopart = NULL;
2232 | SwitchPart CondSuffix ${
2236 | IfPart IfSuffix ${
2238 $0->condpart = $1.condpart; $1.condpart = NULL;
2239 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2240 // This is where we close an "if" statement
2241 var_block_close(config2context(config), CloseSequential);
2244 CondSuffix -> IfSuffix ${
2246 // This is where we close scope of the whole
2247 // "for" or "while" statement
2248 var_block_close(config2context(config), CloseSequential);
2250 | CasePart CondSuffix ${
2252 $1->next = $0->casepart;
2257 CasePart -> Newlines case Expression OpenScope Block ${
2258 $0 = calloc(1,sizeof(struct casepart));
2261 var_block_close(config2context(config), CloseParallel);
2263 | case Expression OpenScope Block ${
2264 $0 = calloc(1,sizeof(struct casepart));
2267 var_block_close(config2context(config), CloseParallel);
2271 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2272 | Newlines else OpenScope Block ${
2273 $0 = new(cond_statement);
2275 var_block_close(config2context(config), CloseElse);
2277 | else OpenScope Block ${
2278 $0 = new(cond_statement);
2280 var_block_close(config2context(config), CloseElse);
2282 | Newlines else OpenScope CondStatement ${
2283 $0 = new(cond_statement);
2285 var_block_close(config2context(config), CloseElse);
2287 | else OpenScope CondStatement ${
2288 $0 = new(cond_statement);
2290 var_block_close(config2context(config), CloseElse);
2295 // These scopes are closed in CondSuffix
2296 ForPart -> for OpenScope SimpleStatements ${
2297 $0 = reorder_bilist($<3);
2299 | for OpenScope Block ${
2303 ThenPart -> then OpenScope SimpleStatements ${
2304 $0 = reorder_bilist($<3);
2305 var_block_close(config2context(config), CloseSequential);
2307 | then OpenScope Block ${
2309 var_block_close(config2context(config), CloseSequential);
2312 ThenPartNL -> ThenPart OptNL ${
2316 // This scope is closed in CondSuffix
2317 WhileHead -> while OpenScope Block ${
2322 ForThen -> ForPart OptNL ThenPartNL ${
2330 // This scope is closed in CondSuffix
2331 WhilePart -> while OpenScope Expression Block ${
2332 $0.type = Xcond_statement;
2336 | WhileHead OptNL do Block ${
2337 $0.type = Xcond_statement;
2342 IfPart -> if OpenScope Expression OpenScope Block ${
2343 $0.type = Xcond_statement;
2346 var_block_close(config2context(config), CloseParallel);
2348 | if OpenScope Block OptNL then OpenScope Block ${
2349 $0.type = Xcond_statement;
2352 var_block_close(config2context(config), CloseParallel);
2356 // This scope is closed in CondSuffix
2357 SwitchPart -> switch OpenScope Expression ${
2360 | switch OpenScope Block ${
2364 ###### print exec cases
2366 case Xcond_statement:
2368 struct cond_statement *cs = cast(cond_statement, e);
2369 struct casepart *cp;
2371 do_indent(indent, "for");
2372 if (bracket) printf(" {\n"); else printf(":\n");
2373 print_exec(cs->forpart, indent+1, bracket);
2376 do_indent(indent, "} then {\n");
2378 do_indent(indent, "then:\n");
2379 print_exec(cs->thenpart, indent+1, bracket);
2381 if (bracket) do_indent(indent, "}\n");
2385 if (cs->condpart && cs->condpart->type == Xbinode &&
2386 cast(binode, cs->condpart)->op == Block) {
2388 do_indent(indent, "while {\n");
2390 do_indent(indent, "while:\n");
2391 print_exec(cs->condpart, indent+1, bracket);
2393 do_indent(indent, "} do {\n");
2395 do_indent(indent, "do:\n");
2396 print_exec(cs->dopart, indent+1, bracket);
2398 do_indent(indent, "}\n");
2400 do_indent(indent, "while ");
2401 print_exec(cs->condpart, 0, bracket);
2406 print_exec(cs->dopart, indent+1, bracket);
2408 do_indent(indent, "}\n");
2413 do_indent(indent, "switch");
2415 do_indent(indent, "if");
2416 if (cs->condpart && cs->condpart->type == Xbinode &&
2417 cast(binode, cs->condpart)->op == Block) {
2422 print_exec(cs->condpart, indent+1, bracket);
2424 do_indent(indent, "}\n");
2426 do_indent(indent, "then:\n");
2427 print_exec(cs->thenpart, indent+1, bracket);
2431 print_exec(cs->condpart, 0, bracket);
2437 print_exec(cs->thenpart, indent+1, bracket);
2439 do_indent(indent, "}\n");
2444 for (cp = cs->casepart; cp; cp = cp->next) {
2445 do_indent(indent, "case ");
2446 print_exec(cp->value, -1, 0);
2451 print_exec(cp->action, indent+1, bracket);
2453 do_indent(indent, "}\n");
2456 do_indent(indent, "else");
2461 print_exec(cs->elsepart, indent+1, bracket);
2463 do_indent(indent, "}\n");
2468 ###### propagate exec cases
2469 case Xcond_statement:
2471 // forpart and dopart must return Vnone
2472 // thenpart must return Vnone if there is a dopart,
2473 // otherwise it is like elsepart.
2475 // be bool if there is not casepart
2476 // match casepart->values if there is a switchpart
2477 // either be bool or match casepart->value if there
2479 // elsepart, casepart->action must match there return type
2480 // expected of this statement.
2481 struct cond_statement *cs = cast(cond_statement, prog);
2482 struct casepart *cp;
2484 t = propagate_types(cs->forpart, c, ok, Vnone, 0);
2485 if (!vtype_compat(Vnone, t, 0))
2487 t = propagate_types(cs->dopart, c, ok, Vnone, 0);
2488 if (!vtype_compat(Vnone, t, 0))
2491 t = propagate_types(cs->thenpart, c, ok, Vnone, 0);
2492 if (!vtype_compat(Vnone, t, 0))
2495 if (cs->casepart == NULL)
2496 propagate_types(cs->condpart, c, ok, Vbool, 0);
2498 /* Condpart must match case values, with bool permitted */
2500 for (cp = cs->casepart;
2501 cp && (t == Vunknown); cp = cp->next)
2502 t = propagate_types(cp->value, c, ok, Vunknown, 0);
2503 if (t == Vunknown && cs->condpart)
2504 t = propagate_types(cs->condpart, c, ok, Vunknown, 1);
2505 // Now we have a type (I hope) push it down
2506 if (t != Vunknown) {
2507 for (cp = cs->casepart; cp; cp = cp->next)
2508 propagate_types(cp->value, c, ok, t, 0);
2509 propagate_types(cs->condpart, c, ok, t, 1);
2512 // (if)then, else, and case parts must return expected type.
2513 if (!cs->dopart && type == Vunknown)
2514 type = propagate_types(cs->thenpart, c, ok, Vunknown, bool_permitted);
2515 if (type == Vunknown)
2516 type = propagate_types(cs->elsepart, c, ok, Vunknown, bool_permitted);
2517 for (cp = cs->casepart;
2518 cp && type == Vunknown;
2520 type = propagate_types(cp->action, c, ok, Vunknown, bool_permitted);
2521 if (type > Vunknown) {
2523 propagate_types(cs->thenpart, c, ok, type, bool_permitted);
2524 propagate_types(cs->elsepart, c, ok, type, bool_permitted);
2525 for (cp = cs->casepart; cp ; cp = cp->next)
2526 propagate_types(cp->action, c, ok, type, bool_permitted);
2532 ###### interp exec cases
2533 case Xcond_statement:
2535 struct value v, cnd;
2536 struct casepart *cp;
2537 struct cond_statement *c = cast(cond_statement, e);
2539 interp_exec(c->forpart);
2542 cnd = interp_exec(c->condpart);
2545 if (!(cnd.vtype == Vnone ||
2546 (cnd.vtype == Vbool && cnd.bool != 0)))
2550 interp_exec(c->dopart);
2553 v = interp_exec(c->thenpart);
2554 if (v.vtype != Vnone || !c->dopart)
2558 } while (c->dopart);
2560 for (cp = c->casepart; cp; cp = cp->next) {
2561 v = interp_exec(cp->value);
2562 if (value_cmp(v, cnd) == 0) {
2565 return interp_exec(cp->action);
2571 return interp_exec(c->elsepart);
2576 ### Finally the whole program.
2578 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2579 the keyword "program" and a list of variable names which are assigned
2580 values from command line arguments. Following this is a `block` which
2581 is the code to execute.
2583 As this is the top level, several things are handled a bit
2585 The whole program is not interpreted by `interp_exec` as that isn't
2586 passed the argument list which the program requires. Similarly type
2587 analysis is a bit more interesting at this level.
2592 ###### Parser: grammar
2595 Program -> program OpenScope Varlist Block OptNL ${
2598 $0->left = reorder_bilist($<3);
2600 var_block_close(config2context(config), CloseSequential);
2601 if (config2context(config)->scope_stack) abort();
2604 Varlist -> Varlist ArgDecl ${
2613 ArgDecl -> IDENTIFIER ${ {
2614 struct variable *v = var_decl(config2context(config), $1.txt);
2621 ###### print binode cases
2623 do_indent(indent, "program");
2624 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2626 print_exec(b2->left, 0, 0);
2632 print_exec(b->right, indent+1, bracket);
2634 do_indent(indent, "}\n");
2637 ###### propagate binode cases
2638 case Program: abort();
2640 ###### core functions
2642 static int analyse_prog(struct exec *prog, struct parse_context *c)
2644 struct binode *b = cast(binode, prog);
2649 propagate_types(b->right, c, &ok, Vnone, 0);
2654 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2655 struct var *v = cast(var, b->left);
2656 if (v->var->val.vtype == Vunknown)
2657 val_init(&v->var->val, Vstr);
2659 b = cast(binode, prog);
2662 propagate_types(b->right, c, &ok, Vnone, 0);
2667 /* Make sure everything is still consistent */
2668 propagate_types(b->right, c, &ok, Vnone, 0);
2672 static void interp_prog(struct exec *prog, char **argv)
2674 struct binode *p = cast(binode, prog);
2675 struct binode *al = cast(binode, p->left);
2679 struct var *v = cast(var, al->left);
2680 struct value *vl = &v->var->val;
2682 if (argv[0] == NULL) {
2683 printf("Not enough args\n");
2686 al = cast(binode, al->right);
2688 if (!parse_value(vl, argv[0]))
2692 v = interp_exec(p->right);
2696 ###### interp binode cases
2697 case Program: abort();
2699 ## And now to test it out.
2701 Having a language requires having a "hello world" program. I'll
2702 provide a little more than that: a program that prints "Hello world"
2703 finds the GCD of two numbers, prints the first few elements of
2704 Fibonacci, and performs a binary search for a number.
2706 ###### File: oceani.mk
2709 @echo "===== TEST ====="
2710 ./oceani --section "test: hello" oceani.mdc 55 33
2715 print "Hello World, what lovely oceans you have!"
2716 /* When a variable is defined in both branches of an 'if',
2717 * and used afterwards, the variables are merged.
2723 print "Is", A, "bigger than", B,"? ", bigger
2724 /* If a variable is not used after the 'if', no
2725 * merge happens, so types can be different
2729 print A, "is more than twice", B, "?", double
2732 print "double", A, "is only", double
2741 print "GCD of", A, "and", B,"is", a
2743 print a, "is not positive, cannot calculate GCD"
2745 print b, "is not positive, cannot calculate GCD"
2750 print "Fibonacci:", f1,f2,
2751 then togo = togo - 1
2759 /* Binary search... */
2764 mid := (lo + hi) / 2
2776 print "Yay, I found", target
2778 print "Closest I found was", mid