1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possible with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
118 #define container_of(ptr, type, member) ({ \
119 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
120 (type *)( (char *)__mptr - offsetof(type,member) );})
122 #define config2context(_conf) container_of(_conf, struct parse_context, \
131 #include <sys/mman.h>
150 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
151 "--section=SectionName prog.ocn\n";
152 static const struct option long_options[] = {
153 {"trace", 0, NULL, 't'},
154 {"print", 0, NULL, 'p'},
155 {"noexec", 0, NULL, 'n'},
156 {"brackets", 0, NULL, 'b'},
157 {"section", 1, NULL, 's'},
160 const char *options = "tpnbs";
161 int main(int argc, char *argv[])
167 char *section = NULL;
168 struct parse_context context = {
170 .ignored = (1 << TK_line_comment)
171 | (1 << TK_block_comment),
172 .number_chars = ".,_+-",
177 int doprint=0, dotrace=0, doexec=1, brackets=0;
180 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
183 case 't': dotrace=1; break;
184 case 'p': doprint=1; break;
185 case 'n': doexec=0; break;
186 case 'b': brackets=1; break;
187 case 's': section = optarg; break;
188 default: fprintf(stderr, Usage);
192 if (optind >= argc) {
193 fprintf(stderr, "oceani: no input file given\n");
196 fd = open(argv[optind], O_RDONLY);
198 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
201 context.file_name = argv[optind];
202 len = lseek(fd, 0, 2);
203 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
204 s = code_extract(file, file+len, NULL);
206 fprintf(stderr, "oceani: could not find any code in %s\n",
212 for (ss = s; ss; ss = ss->next) {
213 struct text sec = ss->section;
214 if (sec.len == strlen(section) &&
215 strncmp(sec.txt, section, sec.len) == 0)
219 prog = parse_oceani(ss->code, &context.config,
220 dotrace ? stderr : NULL);
222 fprintf(stderr, "oceani: cannot find section %s\n",
227 prog = parse_oceani(s->code, &context.config,
228 dotrace ? stderr : NULL);
230 print_exec(*prog, 0, brackets);
231 if (prog && doexec) {
232 if (!analyse_prog(*prog, &context)) {
233 fprintf(stderr, "oceani: type error in program - not running.\n");
236 interp_prog(*prog, argv+optind+1);
243 struct section *t = s->next;
254 These four requirements of parse, analyse, print, interpret apply to
255 each language element individually so that is how most of the code
258 Three of the four are fairly self explanatory. The one that requires
259 a little explanation is the analysis step.
261 The current language design does not require (or even allow) the types
262 of variables to be declared, but they must still have a single type.
263 Different operations impose different requirements on the variables,
264 for example addition requires both arguments to be numeric, and
265 assignment requires the variable on the left to have the same type as
266 the expression on the right.
268 Analysis involves propagating these type requirements around and
269 consequently setting the type of each variable. If any requirements
270 are violated (e.g. a string is compared with a number) or if a
271 variable needs to have two different types, then an error is raised
272 and the program will not run.
274 If the same variable is declared in both branchs of an 'if/else', or
275 in all cases of a 'switch' then the multiple instances may be merged
276 into just one variable if the variable is references after the
277 conditional statement. When this happens, the types must naturally be
278 consistent across all the branches. When the variable is not used
279 outside the if, the variables in the different branches are distinct
280 and can be of different types.
282 Determining the types of all variables early is important for
283 processing command line arguments. These can be assigned to any type
284 of variable, but we must first know the correct type so any required
285 conversion can happen. If a variable is associated with a command
286 line argument but no type can be interpreted (e.g. the variable is
287 only ever used in a `print` statement), then the type is set to
290 Undeclared names may only appear in "use" statements and "case" expressions.
291 These names are given a type of "label" and a unique value.
292 This allows them to fill the role of a name in an enumerated type, which
293 is useful for testing the `switch` statement.
295 As we will see, the condition part of a `while` statement can return
296 either a Boolean or some other type. This requires that the expect
297 type that gets passed around comprises a type (`enum vtype`) and a
298 flag to indicate that `Vbool` is also permitted.
300 As there are, as yet, no distinct types that are compatible, there
301 isn't much subtlety in the analysis. When we have distinct number
302 types, this will become more interesting.
306 When analysis discovers an inconsistency it needs to report an error;
307 just refusing to run the code esure that the error doesn't cascade,
308 but by itself it isn't very useful. A clear understand of the sort of
309 error message that are useful will help guide the process of analysis.
311 At a simplistic level, the only sort of error that type analysis can
312 report is that the type of some construct doesn't match a contextual
313 requirement. For example, in `4 + "hello"` the addition provides a
314 contextual requirement for numbers, but `"hello"` is not a number. In
315 this particular example no further information is needed as the types
316 are obvious from local information. When a variable is involved that
317 isn't the case. It may be helpful to explain why the variable has a
318 particular type, by indicating the location where the type was set,
319 whether by declaration or usage.
321 Using a recursive-descent analysis we can easily detect a problem at
322 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
323 will detect that one argument is not a number and the usage of `hello`
324 will detect that a number was wanted, but not provided. In this
325 (early) version of the language, we will generate error reports at
326 multiple locations, to the use of `hello` will report an error and
327 explain were the value was set, and the addition will report an error
328 and say why numbers are needed. To be able to report locations for
329 errors, each language element will need to record a file location
330 (line and column) and each variable will need to record the language
331 element where its type was set. For now we will assume that each line
332 of an error message indicates one location in the file, and up to 2
333 types. So we provide a `printf`-like function which takes a format, a
334 language (a `struct exec` which has not yet been introduced), and 2
335 types. "`$1`" reports the first type, "`$2`" reports the second. We
336 will need a function to print the location, once we know how that is
341 static void fput_loc(struct exec *loc, FILE *f);
343 ###### core functions
345 static void type_err(struct parse_context *c,
346 char *fmt, struct exec *loc,
347 enum vtype t1, enum vtype t2)
349 fprintf(stderr, "%s:", c->file_name);
350 fput_loc(loc, stderr);
351 for (; *fmt ; fmt++) {
358 case '%': fputc(*fmt, stderr); break;
359 default: fputc('?', stderr); break;
361 fputs(vtype_names[t1], stderr);
364 fputs(vtype_names[t2], stderr);
374 One last introductory step before detailing the language elements and
375 providing their four requirements is to establish the data structures
376 to store these elements.
378 There are two key objects that we need to work with: executable
379 elements which comprise the program, and values which the program
380 works with. Between these are the variables in their various scopes
381 which hold the values.
385 Values can be numbers, which we represent as multi-precision
386 fractions, strings, Booleans and labels. When analysing the program
387 we also need to allow for places where no value is meaningful
388 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`
389 which can be anything and `Vnolabel` which can be anything except a
390 label). A 2 character 'tail' is included in each value as the scanner
391 wants to parse that from the end of numbers and we need somewhere to
392 put it. It is currently ignored but one day might allow for
393 e.g. "imaginary" numbers.
395 Values are never shared, they are always copied when used, and freed
396 when no longer needed.
398 When propagating type information around the program, we need to
399 determine if two types are compatible, where `Vunknown` is compatible
400 which anything, and `Vnolabel` is compatible with anything except a
401 label. A separate funtion to encode this rule will simplify some code
404 When assigning command line arguments to variable, we need to be able
405 to parse each type from a string.
413 myLDLIBS := libnumber.o libstring.o -lgmp
414 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
418 enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
428 char *vtype_names[] = {"nolabel", "unknown", "none", "string",
429 "number", "Boolean", "label"};
432 static void free_value(struct value v)
437 case Vunknown: break;
438 case Vstr: free(v.str.txt); break;
439 case Vnum: mpq_clear(v.num); break;
445 static int vtype_compat(enum vtype require, enum vtype have, int bool_permitted)
447 if (bool_permitted && have == Vbool)
451 return have != Vlabel;
455 return have == Vunknown || require == have;
459 ###### value functions
461 static void val_init(struct value *val, enum vtype type)
467 case Vunknown: break;
469 mpq_init(val->num); break;
471 val->str.txt = malloc(1);
483 static struct value dup_value(struct value v)
490 case Vunknown: break;
499 mpq_set(rv.num, v.num);
502 rv.str.len = v.str.len;
503 rv.str.txt = malloc(rv.str.len);
504 memcpy(rv.str.txt, v.str.txt, v.str.len);
510 static int value_cmp(struct value left, struct value right)
513 if (left.vtype != right.vtype)
514 return left.vtype - right.vtype;
515 switch (left.vtype) {
516 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
517 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
518 case Vstr: cmp = text_cmp(left.str, right.str); break;
519 case Vbool: cmp = left.bool - right.bool; break;
522 case Vunknown: cmp = 0;
527 static struct text text_join(struct text a, struct text b)
530 rv.len = a.len + b.len;
531 rv.txt = malloc(rv.len);
532 memcpy(rv.txt, a.txt, a.len);
533 memcpy(rv.txt+a.len, b.txt, b.len);
537 static void print_value(struct value v)
541 printf("*Unknown*"); break;
544 printf("*no-value*"); break;
546 printf("*label-%p*", v.label); break;
548 printf("%.*s", v.str.len, v.str.txt); break;
550 printf("%s", v.bool ? "True":"False"); break;
555 mpf_set_q(fl, v.num);
556 gmp_printf("%Fg", fl);
563 static int parse_value(struct value *vl, char *arg)
574 vl->str.len = strlen(arg);
575 vl->str.txt = malloc(vl->str.len);
576 memcpy(vl->str.txt, arg, vl->str.len);
583 tx.txt = arg; tx.len = strlen(tx.txt);
584 if (number_parse(vl->num, vl->tail, tx) == 0)
587 mpq_neg(vl->num, vl->num);
590 if (strcasecmp(arg, "true") == 0 ||
591 strcmp(arg, "1") == 0)
593 else if (strcasecmp(arg, "false") == 0 ||
594 strcmp(arg, "0") == 0)
597 printf("Bad bool: %s\n", arg);
607 Variables are scoped named values. We store the names in a linked
608 list of "bindings" sorted lexically, and use sequential search and
615 struct binding *next; // in lexical order
619 This linked list is stored in the parse context so that "reduce"
620 functions can find or add variables, and so the analysis phase can
621 ensure that every variable gets a type.
625 struct binding *varlist; // In lexical order
629 static struct binding *find_binding(struct parse_context *c, struct text s)
631 struct binding **l = &c->varlist;
636 (cmp = text_cmp((*l)->name, s)) < 0)
640 n = calloc(1, sizeof(*n));
647 Each name can be linked to multiple variables defined in different
648 scopes. Each scope starts where the name is declared and continues
649 until the end of the containing code block. Scopes of a given name
650 cannot nest, so a declaration while a name is in-scope is an error.
652 ###### binding fields
653 struct variable *var;
657 struct variable *previous;
659 struct binding *name;
660 struct exec *where_set; // where type was set
664 While the naming seems strange, we include local constants in the
665 definition of variables. A name declared `var := value` can
666 subsequently be changed, but a name declared `var ::= value` cannot -
669 ###### variable fields
672 Scopes in parallel branches can be partially merged. More
673 specifically, if a given name is declared in both branches of an
674 if/else then it's scope is a candidate for merging. Similarly if
675 every branch of an exhaustive switch (e.g. has an "else" clause)
676 declares a given name, then the scopes from the branches are
677 candidates for merging.
679 Note that names declared inside a loop (which is only parallel to
680 itself) are never visible after the loop. Similarly names defined in
681 scopes which are not parallel, such as those started by `for` and
682 `switch`, are never visible after the scope. Only variable defined in
683 both `then` and `else` (including the implicit then after an `if`, and
684 excluding `then` used with `for`) and in all `case`s and `else` of a
685 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
687 Labels, which are a bit like variables, follow different rules.
688 Labels are not explicitly declared, but if an undeclared name appears
689 in a context where a label is legal, that effectively declares the
690 name as a label. The declaration remains in force (or in scope) at
691 least to the end of the immediately containing block and conditionally
692 in any larger containing block which does not declare the name in some
693 other way. Importantly, the conditional scope extension happens even
694 if the label is only used in parallel branch of a conditional -- when
695 used in one branch it is treated as having been declared in all
698 Merge candidates are tentatively visible beyond the end of the
699 branching statement which creates them. If the name is used, the
700 merge is affirmed and they become a single variable visible at the
701 outer layer. If not - if it is redeclared first - the merge lapses.
703 To track scopes we have an extra stack, implemented as a linked list,
704 which roughly parallels the parse stack and which is used exclusively
705 for scoping. When a new scope is opened, a new frame is pushed and
706 the child-count of the parent frame is incremented. This child-count
707 is used to distinguish between the first of a set of parallel scopes,
708 in which declared variables must not be in scope, and subsequent
709 branches, whether they must already be conditionally scoped.
711 To push a new frame *before* any code in the frame is parsed, we need a
712 grammar reduction. This is most easily achieved with a grammar
713 element which derives the empty string, and created the new scope when
714 it is recognized. This can be placed, for example, between a keyword
715 like "if" and the code following it.
719 struct scope *parent;
725 struct scope *scope_stack;
728 static void scope_pop(struct parse_context *c)
730 struct scope *s = c->scope_stack;
732 c->scope_stack = s->parent;
737 static void scope_push(struct parse_context *c)
739 struct scope *s = calloc(1, sizeof(*s));
741 c->scope_stack->child_count += 1;
742 s->parent = c->scope_stack;
750 OpenScope -> ${ scope_push(config2context(config)); }$
753 Each variable records a scope depth and is in one of four states:
755 - "in scope". This is the case between the declaration of the
756 variable and the end of the containing block, and also between
757 the usage with affirms a merge and the end of the block.
759 The scope depth is not greater than the current parse context scope
760 nest depth. When the block of that depth closes, the state will
761 change. To achieve this, all "in scope" variables are linked
762 together as a stack in nesting order.
764 - "pending". The "in scope" block has closed, but other parallel
765 scopes are still being processed. So far, every parallel block at
766 the same level that has closed has declared the name.
768 The scope depth is the depth of the last parallel block that
769 enclosed the declaration, and that has closed.
771 - "conditionally in scope". The "in scope" block and all parallel
772 scopes have closed, and no further mention of the name has been
773 seen. This state includes a secondary nest depth which records the
774 outermost scope seen since the variable became conditionally in
775 scope. If a use of the name is found, the variable becomes "in
776 scope" and that secondary depth becomes the recorded scope depth.
777 If the name is declared as a new variable, the old variable becomes
778 "out of scope" and the recorded scope depth stays unchanged.
780 - "out of scope". The variable is neither in scope nor conditionally
781 in scope. It is permanently out of scope now and can be removed from
782 the "in scope" stack.
785 ###### variable fields
786 int depth, min_depth;
787 enum { OutScope, PendingScope, CondScope, InScope } scope;
788 struct variable *in_scope;
792 struct variable *in_scope;
794 All variables with the same name are linked together using the
795 'previous' link. Those variable that have
796 been affirmatively merged all have a 'merged' pointer that points to
797 one primary variable - the most recently declared instance. When
798 merging variables, we need to also adjust the 'merged' pointer on any
799 other variables that had previously been merged with the one that will
800 no longer be primary.
802 ###### variable fields
803 struct variable *merged;
807 static void variable_merge(struct variable *primary, struct variable *secondary)
813 primary = primary->merged;
815 for (v = primary->previous; v; v=v->previous)
816 if (v == secondary || v == secondary->merged ||
817 v->merged == secondary ||
818 (v->merged && v->merged == secondary->merged)) {
826 while (context.varlist) {
827 struct binding *b = context.varlist;
828 struct variable *v = b->var;
829 context.varlist = b->next;
832 struct variable *t = v;
840 #### Manipulating Bindings
842 When a name is conditionally visible, a new declaration discards the
843 old binding - the condition lapses. Conversely a usage of the name
844 affirms the visibility and extends it to the end of the containing
845 block - i.e. the block that contains both the original declaration and
846 the latest usage. This is determined from `min_depth`. When a
847 conditionally visible variable gets affirmed like this, it is also
848 merged with other conditionally visible variables with the same name.
850 When we parse a variable declaration we either signal an error if the
851 name is currently bound, or create a new variable at the current nest
852 depth if the name is unbound or bound to a conditionally scoped or
853 pending-scope variable. If the previous variable was conditionally
854 scoped, it and its homonyms becomes out-of-scope.
856 When we parse a variable reference (including non-declarative
857 assignment) we signal an error if the name is not bound or is bound to
858 a pending-scope variable; update the scope if the name is bound to a
859 conditionally scoped variable; or just proceed normally if the named
860 variable is in scope.
862 When we exit a scope, any variables bound at this level are either
863 marked out of scope or pending-scoped, depending on whether the
864 scope was sequential or parallel.
866 When exiting a parallel scope we check if there are any variables that
867 were previously pending and are still visible. If there are, then
868 there weren't redeclared in the most recent scope, so they cannot be
869 merged and must become out-of-scope. If it is not the first of
870 parallel scopes (based on `child_count`), we check that there was a
871 previous binding that is still pending-scope. If there isn't, the new
872 variable must now be out-of-scope.
874 When exiting a sequential scope that immediately enclosed parallel
875 scopes, we need to resolve any pending-scope variables. If there was
876 no `else` clause, and we cannot determine that the `switch` was exhaustive,
877 we need to mark all pending-scope variable as out-of-scope. Otherwise
878 all pending-scope variables become conditionally scoped.
881 enum closetype { CloseSequential, CloseParallel, CloseElse };
885 static struct variable *var_decl(struct parse_context *c, struct text s)
887 struct binding *b = find_binding(c, s);
888 struct variable *v = b->var;
890 switch (v ? v->scope : OutScope) {
892 /* Signal error ... once I build error signalling support */
896 v && v->scope == CondScope;
902 v = calloc(1, sizeof(*v));
903 v->previous = b->var;
906 v->min_depth = v->depth = c->scope_depth;
908 v->in_scope = c->in_scope;
910 val_init(&v->val, Vunknown);
914 static struct variable *var_ref(struct parse_context *c, struct text s)
916 struct binding *b = find_binding(c, s);
917 struct variable *v = b->var;
920 switch (v ? v->scope : OutScope) {
923 /* Signal an error - once that is possible */
926 /* All CondScope variables of this name need to be merged
929 v->depth = v->min_depth;
931 for (v2 = v->previous;
932 v2 && v2->scope == CondScope;
934 variable_merge(v, v2);
942 static void var_block_close(struct parse_context *c, enum closetype ct)
944 /* close of all variables that are in_scope */
945 struct variable *v, **vp, *v2;
948 for (vp = &c->in_scope;
949 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
953 case CloseParallel: /* handle PendingScope */
957 if (c->scope_stack->child_count == 1)
958 v->scope = PendingScope;
959 else if (v->previous &&
960 v->previous->scope == PendingScope)
961 v->scope = PendingScope;
962 else if (v->val.vtype == Vlabel)
963 v->scope = PendingScope;
964 else if (v->name->var == v)
966 if (ct == CloseElse) {
967 /* All Pending variables with this name
968 * are now Conditional */
970 v2 && v2->scope == PendingScope;
972 v2->scope = CondScope;
977 v2 && v2->scope == PendingScope;
979 if (v2->val.vtype != Vlabel)
980 v2->scope = OutScope;
982 case OutScope: break;
985 case CloseSequential:
986 if (v->val.vtype == Vlabel)
987 v->scope = PendingScope;
993 /* There was no 'else', so we can only become
994 * conditional if we know the cases were exhaustive,
995 * and that doesn't mean anything yet.
996 * So only labels become conditional..
999 v2 && v2->scope == PendingScope;
1001 if (v2->val.vtype == Vlabel) {
1002 v2->scope = CondScope;
1003 v2->min_depth = c->scope_depth;
1005 v2->scope = OutScope;
1008 case OutScope: break;
1012 if (v->scope == OutScope)
1021 Executables can be lots of different things. In many cases an
1022 executable is just an operation combined with one or two other
1023 executables. This allows for expressions and lists etc. Other times
1024 an executable is something quite specific like a constant or variable
1025 name. So we define a `struct exec` to be a general executable with a
1026 type, and a `struct binode` which is a subclass of `exec` and forms a
1027 node in a binary tree and holding an operation. There will be other
1028 subclasses, and to access these we need to be able to `cast` the
1029 `exec` into the various other types.
1032 #define cast(structname, pointer) ({ \
1033 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1034 if (__mptr && *__mptr != X##structname) abort(); \
1035 (struct structname *)( (char *)__mptr);})
1037 #define new(structname) ({ \
1038 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1039 __ptr->type = X##structname; \
1040 __ptr->line = -1; __ptr->column = -1; \
1043 #define new_pos(structname, token) ({ \
1044 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1045 __ptr->type = X##structname; \
1046 __ptr->line = token.line; __ptr->column = token.col; \
1055 enum exec_types type;
1063 struct exec *left, *right;
1066 ###### ast functions
1068 static int __fput_loc(struct exec *loc, FILE *f)
1070 if (loc->line >= 0) {
1071 fprintf(f, "%d:%d: ", loc->line, loc->column);
1074 if (loc->type == Xbinode)
1075 return __fput_loc(cast(binode,loc)->left, f) ||
1076 __fput_loc(cast(binode,loc)->right, f);
1079 static void fput_loc(struct exec *loc, FILE *f)
1081 if (!__fput_loc(loc, f))
1082 fprintf(f, "??:??: ");
1085 Each different type of `exec` node needs a number of functions
1086 defined, a bit like methods. We must be able to be able to free it,
1087 print it, analyse it and execute it. Once we have specific `exec`
1088 types we will need to parse them too. Let's take this a bit more
1093 The parser generator requires a `free_foo` function for each struct
1094 that stores attributes and they will be `exec`s of subtypes there-of.
1095 So we need `free_exec` which can handle all the subtypes, and we need
1098 ###### ast functions
1100 static void free_binode(struct binode *b)
1105 free_exec(b->right);
1109 ###### core functions
1110 static void free_exec(struct exec *e)
1119 ###### forward decls
1121 static void free_exec(struct exec *e);
1123 ###### free exec cases
1124 case Xbinode: free_binode(cast(binode, e)); break;
1128 Printing an `exec` requires that we know the current indent level for
1129 printing line-oriented components. As will become clear later, we
1130 also want to know what sort of bracketing to use.
1132 ###### ast functions
1134 static void do_indent(int i, char *str)
1141 ###### core functions
1142 static void print_binode(struct binode *b, int indent, int bracket)
1146 ## print binode cases
1150 static void print_exec(struct exec *e, int indent, int bracket)
1154 print_binode(cast(binode, e), indent, bracket); break;
1159 ###### forward decls
1161 static void print_exec(struct exec *e, int indent, int bracket);
1165 As discussed, analysis involves propagating type requirements around
1166 the program and looking for errors.
1168 So `propagate_types` is passed an expected type (being a `vtype`
1169 together with a `bool_permitted` flag) that the `exec` is expected to
1170 return, and returns the type that it does return, either of which can
1171 be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
1172 when an error is found, and `2` when any change is made. If it
1173 remains unchanged at `1`, then no more propagation is needed.
1175 ###### core functions
1177 static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1178 enum vtype type, int bool_permitted)
1185 switch (prog->type) {
1188 struct binode *b = cast(binode, prog);
1190 ## propagate binode cases
1194 ## propagate exec cases
1201 Interpreting an `exec` doesn't require anything but the `exec`. State
1202 is stored in variables and each variable will be directly linked from
1203 within the `exec` tree. The exception to this is the whole `program`
1204 which needs to look at command line arguments. The `program` will be
1205 interpreted separately.
1207 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1209 ###### core functions
1211 static struct value interp_exec(struct exec *e)
1221 struct binode *b = cast(binode, e);
1222 struct value left, right;
1223 left.vtype = right.vtype = Vnone;
1225 ## interp binode cases
1227 free_value(left); free_value(right);
1230 ## interp exec cases
1235 ## Language elements
1237 Each language element needs to be parsed, printed, analysed,
1238 interpreted, and freed. There are several, so let's just start with
1239 the easy ones and work our way up.
1243 We have already met values as separate objects. When manifest
1244 constants appear in the program text that must result in an executable
1245 which has a constant value. So the `val` structure embeds a value in
1261 $0 = new_pos(val, $1);
1262 $0->val.vtype = Vbool;
1266 $0 = new_pos(val, $1);
1267 $0->val.vtype = Vbool;
1271 $0 = new_pos(val, $1);
1272 $0->val.vtype = Vnum;
1273 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
1274 mpq_init($0->val.num);
1277 $0 = new_pos(val, $1);
1278 $0->val.vtype = Vstr;
1279 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1282 $0 = new_pos(val, $1);
1283 $0->val.vtype = Vstr;
1284 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1287 ###### print exec cases
1290 struct val *v = cast(val, e);
1291 if (v->val.vtype == Vstr)
1293 print_value(v->val);
1294 if (v->val.vtype == Vstr)
1299 ###### propagate exec cases
1302 struct val *val = cast(val, prog);
1303 if (!vtype_compat(type, val->val.vtype, bool_permitted)) {
1304 type_err(c, "error: expected %1 found %2",
1305 prog, type, val->val.vtype);
1308 return val->val.vtype;
1311 ###### interp exec cases
1313 return dup_value(cast(val, e)->val);
1315 ###### ast functions
1316 static void free_val(struct val *v)
1324 ###### free exec cases
1325 case Xval: free_val(cast(val, e)); break;
1327 ###### ast functions
1328 // Move all nodes from 'b' to 'rv', reversing the order.
1329 // In 'b' 'left' is a list, and 'right' is the last node.
1330 // In 'rv', left' is the first node and 'right' is a list.
1331 static struct binode *reorder_bilist(struct binode *b)
1333 struct binode *rv = NULL;
1336 struct exec *t = b->right;
1340 b = cast(binode, b->left);
1350 Just as we used as `val` to wrap a value into an `exec`, we similarly
1351 need a `var` to wrap a `variable` into an exec. While each `val`
1352 contained a copy of the value, each `var` hold a link to the variable
1353 because it really is the same variable no matter where it appears.
1354 When a variable is used, we need to remember to follow the `->merged`
1355 link to find the primary instance.
1363 struct variable *var;
1369 VariableDecl -> IDENTIFIER := ${ {
1370 struct variable *v = var_decl(config2context(config), $1.txt);
1371 $0 = new_pos(var, $1);
1374 | IDENTIFIER ::= ${ {
1375 struct variable *v = var_decl(config2context(config), $1.txt);
1377 $0 = new_pos(var, $1);
1381 Variable -> IDENTIFIER ${ {
1382 struct variable *v = var_ref(config2context(config), $1.txt);
1383 $0 = new_pos(var, $1);
1385 /* This might be a label - allocate a var just in case */
1386 v = var_decl(config2context(config), $1.txt);
1388 val_init(&v->val, Vlabel);
1395 ###### print exec cases
1398 struct var *v = cast(var, e);
1400 struct binding *b = v->var->name;
1401 printf("%.*s", b->name.len, b->name.txt);
1408 if (loc->type == Xvar) {
1409 struct var *v = cast(var, loc);
1411 struct binding *b = v->var->name;
1412 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1414 fputs("???", stderr);
1416 fputs("NOTVAR", stderr);
1419 ###### propagate exec cases
1423 struct var *var = cast(var, prog);
1424 struct variable *v = var->var;
1426 type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone);
1432 if (v->val.vtype == Vunknown) {
1433 if (type > Vunknown && *ok != 0) {
1434 val_init(&v->val, type);
1435 v->where_set = prog;
1440 if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
1441 type_err(c, "error: expected %1 but variable %v is %2", prog,
1442 type, v->val.vtype);
1443 type_err(c, "info: this is where %v was set to %1", v->where_set,
1444 v->val.vtype, Vnone);
1447 if (type <= Vunknown)
1448 return v->val.vtype;
1452 ###### interp exec cases
1455 struct var *var = cast(var, e);
1456 struct variable *v = var->var;
1460 return dup_value(v->val);
1463 ###### ast functions
1465 static void free_var(struct var *v)
1470 ###### free exec cases
1471 case Xvar: free_var(cast(var, e)); break;
1473 ### Expressions: Boolean
1475 Our first user of the `binode` will be expressions, and particularly
1476 Boolean expressions. As I haven't implemented precedence in the
1477 parser generator yet, we need different names from each precedence
1478 level used by expressions. The outer most or lowest level precedence
1479 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1490 Expression -> Expression or BTerm ${ {
1491 struct binode *b = new(binode);
1497 | BTerm ${ $0 = $<1; }$
1499 BTerm -> BTerm and BFact ${ {
1500 struct binode *b = new(binode);
1506 | BFact ${ $0 = $<1; }$
1508 BFact -> not BFact ${ {
1509 struct binode *b = new(binode);
1516 ###### print binode cases
1518 print_exec(b->left, -1, 0);
1520 print_exec(b->right, -1, 0);
1523 print_exec(b->left, -1, 0);
1525 print_exec(b->right, -1, 0);
1529 print_exec(b->right, -1, 0);
1532 ###### propagate binode cases
1536 /* both must be Vbool, result is Vbool */
1537 propagate_types(b->left, c, ok, Vbool, 0);
1538 propagate_types(b->right, c, ok, Vbool, 0);
1539 if (type != Vbool && type > Vunknown) {
1540 type_err(c, "error: %1 operation found where %2 expected", prog,
1546 ###### interp binode cases
1548 rv = interp_exec(b->left);
1549 right = interp_exec(b->right);
1550 rv.bool = rv.bool && right.bool;
1553 rv = interp_exec(b->left);
1554 right = interp_exec(b->right);
1555 rv.bool = rv.bool || right.bool;
1558 rv = interp_exec(b->right);
1562 ### Expressions: Comparison
1564 Of slightly higher precedence that Boolean expressions are
1566 A comparison takes arguments of any type, but the two types must be
1569 To simplify the parsing we introduce an `eop` which can return an
1570 expression operator.
1577 ###### ast functions
1578 static void free_eop(struct eop *e)
1593 | Expr CMPop Expr ${ {
1594 struct binode *b = new(binode);
1600 | Expr ${ $0 = $<1; }$
1605 CMPop -> < ${ $0.op = Less; }$
1606 | > ${ $0.op = Gtr; }$
1607 | <= ${ $0.op = LessEq; }$
1608 | >= ${ $0.op = GtrEq; }$
1609 | == ${ $0.op = Eql; }$
1610 | != ${ $0.op = NEql; }$
1612 ###### print binode cases
1620 print_exec(b->left, -1, 0);
1622 case Less: printf(" < "); break;
1623 case LessEq: printf(" <= "); break;
1624 case Gtr: printf(" > "); break;
1625 case GtrEq: printf(" >= "); break;
1626 case Eql: printf(" == "); break;
1627 case NEql: printf(" != "); break;
1630 print_exec(b->right, -1, 0);
1633 ###### propagate binode cases
1640 /* Both must match but not labels, result is Vbool */
1641 t = propagate_types(b->left, c, ok, Vnolabel, 0);
1643 propagate_types(b->right, c, ok, t, 0);
1645 t = propagate_types(b->right, c, ok, Vnolabel, 0);
1647 t = propagate_types(b->left, c, ok, t, 0);
1649 if (!vtype_compat(type, Vbool, 0)) {
1650 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1656 ###### interp binode cases
1665 left = interp_exec(b->left);
1666 right = interp_exec(b->right);
1667 cmp = value_cmp(left, right);
1670 case Less: rv.bool = cmp < 0; break;
1671 case LessEq: rv.bool = cmp <= 0; break;
1672 case Gtr: rv.bool = cmp > 0; break;
1673 case GtrEq: rv.bool = cmp >= 0; break;
1674 case Eql: rv.bool = cmp == 0; break;
1675 case NEql: rv.bool = cmp != 0; break;
1676 default: rv.bool = 0; break;
1681 ### Expressions: The rest
1683 The remaining expressions with the highest precedence are arithmetic
1684 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1685 The `Factor` is where the `Value` and `Variable` that we already have
1688 `+` and `-` are both infix and prefix operations (where they are
1689 absolute value and negation). These have different operator names.
1691 We also have a 'Bracket' operator which records where parentheses were
1692 found. This make it easy to reproduce these when printing. Once
1693 precedence is handled better I might be able to discard this.
1705 Expr -> Expr Eop Term ${ {
1706 struct binode *b = new(binode);
1712 | Term ${ $0 = $<1; }$
1714 Term -> Term Top Factor ${ {
1715 struct binode *b = new(binode);
1721 | Factor ${ $0 = $<1; }$
1723 Factor -> ( Expression ) ${ {
1724 struct binode *b = new_pos(binode, $1);
1730 struct binode *b = new(binode);
1735 | Value ${ $0 = $<1; }$
1736 | Variable ${ $0 = $<1; }$
1739 Eop -> + ${ $0.op = Plus; }$
1740 | - ${ $0.op = Minus; }$
1742 Uop -> + ${ $0.op = Absolute; }$
1743 | - ${ $0.op = Negate; }$
1745 Top -> * ${ $0.op = Times; }$
1746 | / ${ $0.op = Divide; }$
1747 | ++ ${ $0.op = Concat; }$
1749 ###### print binode cases
1755 print_exec(b->left, indent, 0);
1757 case Plus: printf(" + "); break;
1758 case Minus: printf(" - "); break;
1759 case Times: printf(" * "); break;
1760 case Divide: printf(" / "); break;
1761 case Concat: printf(" ++ "); break;
1764 print_exec(b->right, indent, 0);
1768 print_exec(b->right, indent, 0);
1772 print_exec(b->right, indent, 0);
1776 print_exec(b->right, indent, 0);
1780 ###### propagate binode cases
1785 /* both must be numbers, result is Vnum */
1788 /* as propagate_types ignores a NULL,
1789 * unary ops fit here too */
1790 propagate_types(b->left, c, ok, Vnum, 0);
1791 propagate_types(b->right, c, ok, Vnum, 0);
1792 if (!vtype_compat(type, Vnum, 0)) {
1793 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
1800 /* both must be Vstr, result is Vstr */
1801 propagate_types(b->left, c, ok, Vstr, 0);
1802 propagate_types(b->right, c, ok, Vstr, 0);
1803 if (!vtype_compat(type, Vstr, 0)) {
1804 type_err(c, "error: Concat returns %1 but %2 expected", prog,
1811 return propagate_types(b->right, c, ok, type, 0);
1813 ###### interp binode cases
1816 rv = interp_exec(b->left);
1817 right = interp_exec(b->right);
1818 mpq_add(rv.num, rv.num, right.num);
1821 rv = interp_exec(b->left);
1822 right = interp_exec(b->right);
1823 mpq_sub(rv.num, rv.num, right.num);
1826 rv = interp_exec(b->left);
1827 right = interp_exec(b->right);
1828 mpq_mul(rv.num, rv.num, right.num);
1831 rv = interp_exec(b->left);
1832 right = interp_exec(b->right);
1833 mpq_div(rv.num, rv.num, right.num);
1836 rv = interp_exec(b->right);
1837 mpq_neg(rv.num, rv.num);
1840 rv = interp_exec(b->right);
1841 mpq_abs(rv.num, rv.num);
1844 rv = interp_exec(b->right);
1847 left = interp_exec(b->left);
1848 right = interp_exec(b->right);
1850 rv.str = text_join(left.str, right.str);
1853 ### Blocks, Statements, and Statement lists.
1855 Now that we have expressions out of the way we need to turn to
1856 statements. There are simple statements and more complex statements.
1857 Simple statements do not contain newlines, complex statements do.
1859 Statements often come in sequences and we have corresponding simple
1860 statement lists and complex statement lists.
1861 The former comprise only simple statements separated by semicolons.
1862 The later comprise complex statements and simple statement lists. They are
1863 separated by newlines. Thus the semicolon is only used to separate
1864 simple statements on the one line. This may be overly restrictive,
1865 but I'm not sure I every want a complex statement to share a line with
1868 Note that a simple statement list can still use multiple lines if
1869 subsequent lines are indented, so
1871 ###### Example: wrapped simple statement list
1876 is a single simple statement list. This might allow room for
1877 confusion, so I'm not set on it yet.
1879 A simple statement list needs no extra syntax. A complex statement
1880 list has two syntactic forms. It can be enclosed in braces (much like
1881 C blocks), or it can be introduced by a colon and continue until an
1882 unindented newline (much like Python blocks). With this extra syntax
1883 it is referred to as a block.
1885 Note that a block does not have to include any newlines if it only
1886 contains simple statements. So both of:
1888 if condition: a=b; d=f
1890 if condition { a=b; print f }
1894 In either case the list is constructed from a `binode` list with
1895 `Block` as the operator. When parsing the list it is most convenient
1896 to append to the end, so a list is a list and a statement. When using
1897 the list it is more convenient to consider a list to be a statement
1898 and a list. So we need a function to re-order a list.
1899 `reorder_bilist` serves this purpose.
1901 The only stand-alone statement we introduce at this stage is `pass`
1902 which does nothing and is represented as a `NULL` pointer in a `Block`
1922 Block -> Open Statementlist Close ${ $0 = $<2; }$
1923 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1924 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1925 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1926 | : Statementlist ${ $0 = $<2; }$
1927 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1929 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1931 ComplexStatements -> ComplexStatements ComplexStatement ${
1937 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1938 | ComplexStatement ${
1946 ComplexStatement -> SimpleStatements NEWLINE ${
1947 $0 = reorder_bilist($<1);
1949 ## ComplexStatement Grammar
1952 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1958 | SimpleStatement ${
1964 | SimpleStatements ; ${ $0 = $<1; }$
1966 SimpleStatement -> pass ${ $0 = NULL; }$
1967 ## SimpleStatement Grammar
1969 ###### print binode cases
1973 if (b->left == NULL)
1976 print_exec(b->left, indent, 0);
1979 print_exec(b->right, indent, 0);
1982 // block, one per line
1983 if (b->left == NULL)
1984 do_indent(indent, "pass\n");
1986 print_exec(b->left, indent, bracket);
1988 print_exec(b->right, indent, bracket);
1992 ###### propagate binode cases
1995 /* If any statement returns something other then Vnone
1996 * or Vbool then all such must return same type.
1997 * As each statement may be Vnone or something else,
1998 * we must always pass Vunknown down, otherwise an incorrect
1999 * error might occur. We never return Vnone unless it is
2004 for (e = b; e; e = cast(binode, e->right)) {
2005 t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
2006 if (bool_permitted && t == Vbool)
2008 if (t != Vunknown && t != Vnone && t != Vbool) {
2009 if (type == Vunknown)
2011 else if (t != type) {
2012 type_err(c, "error: expected %1, found %2",
2021 ###### interp binode cases
2023 while (rv.vtype == Vnone &&
2026 rv = interp_exec(b->left);
2027 b = cast(binode, b->right);
2031 ### The Print statement
2033 `print` is a simple statement that takes a comma-separated list of
2034 expressions and prints the values separated by spaces and terminated
2035 by a newline. No control of formatting is possible.
2037 `print` faces the same list-ordering issue as blocks, and uses the
2043 ###### SimpleStatement Grammar
2045 | print ExpressionList ${
2046 $0 = reorder_bilist($<2);
2048 | print ExpressionList , ${
2053 $0 = reorder_bilist($0);
2064 ExpressionList -> ExpressionList , Expression ${
2077 ###### print binode cases
2080 do_indent(indent, "print");
2084 print_exec(b->left, -1, 0);
2088 b = cast(binode, b->right);
2094 ###### propagate binode cases
2097 /* don't care but all must be consistent */
2098 propagate_types(b->left, c, ok, Vnolabel, 0);
2099 propagate_types(b->right, c, ok, Vnolabel, 0);
2102 ###### interp binode cases
2108 for ( ; b; b = cast(binode, b->right))
2112 left = interp_exec(b->left);
2125 ###### Assignment statement
2127 An assignment will assign a value to a variable, providing it hasn't
2128 be declared as a constant. The analysis phase ensures that the type
2129 will be correct so the interpreter just needs to perform the
2130 calculation. There is a form of assignment which declares a new
2131 variable as well as assigning a value. If a name is assigned before
2132 it is declared, and error will be raised as the name is created as
2133 `Vlabel` and it is illegal to assign to such names.
2139 ###### SimpleStatement Grammar
2140 | Variable = Expression ${ {
2141 struct var *v = cast(var, $1);
2147 if (v->var && !v->var->constant) {
2151 | VariableDecl Expression ${
2158 ###### print binode cases
2161 do_indent(indent, "");
2162 print_exec(b->left, indent, 0);
2164 print_exec(b->right, indent, 0);
2170 do_indent(indent, "");
2171 print_exec(b->left, indent, 0);
2172 if (cast(var, b->left)->var->constant)
2176 print_exec(b->right, indent, 0);
2181 ###### propagate binode cases
2185 /* Both must match and not be labels, result is Vnone */
2186 t = propagate_types(b->left, c, ok, Vnolabel, 0);
2188 if (propagate_types(b->right, c, ok, t, 0) != t)
2189 if (b->left->type == Xvar)
2190 type_err(c, "info: variable %v was set as %1 here.",
2191 cast(var, b->left)->var->where_set, t, Vnone);
2193 t = propagate_types(b->right, c, ok, Vnolabel, 0);
2195 propagate_types(b->left, c, ok, t, 0);
2201 ###### interp binode cases
2206 struct variable *v = cast(var, b->left)->var;
2209 right = interp_exec(b->right);
2212 right.vtype = Vunknown;
2216 ### The `use` statement
2218 The `use` statement is the last "simple" statement. It is needed when
2219 the condition in a conditional statement is a block. `use` works much
2220 like `return` in C, but only completes the `condition`, not the whole
2226 ###### SimpleStatement Grammar
2228 $0 = new_pos(binode, $1);
2233 ###### print binode cases
2236 do_indent(indent, "use ");
2237 print_exec(b->right, -1, 0);
2242 ###### propagate binode cases
2245 /* result matches value */
2246 return propagate_types(b->right, c, ok, type, 0);
2248 ###### interp binode cases
2251 rv = interp_exec(b->right);
2254 ### The Conditional Statement
2256 This is the biggy and currently the only complex statement. This
2257 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2258 It is comprised of a number of parts, all of which are optional though
2259 set combinations apply. Each part is (usually) a key word (`then` is
2260 sometimes optional) followed by either an expression of a code block,
2261 except the `casepart` which is a "key word and an expression" followed
2262 by a code block. The code-block option is valid for all parts and,
2263 where an expression is also allowed, the code block can use the `use`
2264 statement to report a value. If the code block does no report a value
2265 the effect is similar to reporting `False`.
2267 The `else` and `case` parts, as well as `then` when combined with
2268 `if`, can contain a `use` statement which will apply to some
2269 containing conditional statement. `for` parts, `do` parts and `then`
2270 parts used with `for` can never contain a `use`, except in some
2271 subordinate conditional statement.
2273 If there is a `forpart`, it is executed first, only once.
2274 If there is a `dopart`, then it is executed repeatedly providing
2275 always that the `condpart` or `cond`, if present, does not return a non-True
2276 value. `condpart` can fail to return any value if it simply executes
2277 to completion. This is treated the same as returning `True`.
2279 If there is a `thenpart` it will be executed whenever the `condpart`
2280 or `cond` returns True (or does not return any value), but this will happen
2281 *after* `dopart` (when present).
2283 If `elsepart` is present it will be executed at most once when the
2284 condition returns `False` or some value that isn't `True` and isn't
2285 matched by any `casepart`. If there are any `casepart`s, they will be
2286 executed when the condition returns a matching value.
2288 The particular sorts of values allowed in case parts has not yet been
2289 determined in the language design, so nothing is prohibited.
2291 The various blocks in this complex statement potentially provide scope
2292 for variables as described earlier. Each such block must include the
2293 "OpenScope" nonterminal before parsing the block, and must call
2294 `var_block_close()` when closing the block.
2296 The code following "`if`", "`switch`" and "`for`" does not get its own
2297 scope, but is in a scope covering the whole statement, so names
2298 declared there cannot be redeclared elsewhere. Similarly the
2299 condition following "`while`" is in a scope the covers the body
2300 ("`do`" part) of the loop, and which does not allow conditional scope
2301 extension. Code following "`then`" (both looping and non-looping),
2302 "`else`" and "`case`" each get their own local scope.
2304 The type requirements on the code block in a `whilepart` are quite
2305 unusal. It is allowed to return a value of some identifiable type, in
2306 which case the loop abort and an appropriate `casepart` is run, or it
2307 can return a Boolean, in which case the loop either continues to the
2308 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2309 This is different both from the `ifpart` code block which is expected to
2310 return a Boolean, or the `switchpart` code block which is expected to
2311 return the same type as the casepart values. The correct analysis of
2312 the type of the `whilepart` code block is the reason for the
2313 `bool_permitted` flag which is passed to `propagate_types()`.
2315 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2324 struct exec *action;
2325 struct casepart *next;
2327 struct cond_statement {
2329 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2330 struct casepart *casepart;
2333 ###### ast functions
2335 static void free_casepart(struct casepart *cp)
2339 free_exec(cp->value);
2340 free_exec(cp->action);
2347 static void free_cond_statement(struct cond_statement *s)
2351 free_exec(s->forpart);
2352 free_exec(s->condpart);
2353 free_exec(s->dopart);
2354 free_exec(s->thenpart);
2355 free_exec(s->elsepart);
2356 free_casepart(s->casepart);
2360 ###### free exec cases
2361 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2363 ###### ComplexStatement Grammar
2364 | CondStatement ${ $0 = $<1; }$
2369 // both ForThen and Whilepart open scopes, and CondSuffix only
2370 // closes one - so in the first branch here we have another to close.
2371 CondStatement -> ForThen WhilePart CondSuffix ${
2373 $0->forpart = $1.forpart; $1.forpart = NULL;
2374 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2375 $0->condpart = $2.condpart; $2.condpart = NULL;
2376 $0->dopart = $2.dopart; $2.dopart = NULL;
2377 var_block_close(config2context(config), CloseSequential);
2379 | WhilePart CondSuffix ${
2381 $0->condpart = $1.condpart; $1.condpart = NULL;
2382 $0->dopart = $1.dopart; $1.dopart = NULL;
2384 | SwitchPart CondSuffix ${
2388 | IfPart IfSuffix ${
2390 $0->condpart = $1.condpart; $1.condpart = NULL;
2391 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2392 // This is where we close an "if" statement
2393 var_block_close(config2context(config), CloseSequential);
2396 CondSuffix -> IfSuffix ${
2398 // This is where we close scope of the whole
2399 // "for" or "while" statement
2400 var_block_close(config2context(config), CloseSequential);
2402 | CasePart CondSuffix ${
2404 $1->next = $0->casepart;
2409 CasePart -> Newlines case Expression OpenScope Block ${
2410 $0 = calloc(1,sizeof(struct casepart));
2413 var_block_close(config2context(config), CloseParallel);
2415 | case Expression OpenScope Block ${
2416 $0 = calloc(1,sizeof(struct casepart));
2419 var_block_close(config2context(config), CloseParallel);
2423 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2424 | Newlines else OpenScope Block ${
2425 $0 = new(cond_statement);
2427 var_block_close(config2context(config), CloseElse);
2429 | else OpenScope Block ${
2430 $0 = new(cond_statement);
2432 var_block_close(config2context(config), CloseElse);
2434 | Newlines else OpenScope CondStatement ${
2435 $0 = new(cond_statement);
2437 var_block_close(config2context(config), CloseElse);
2439 | else OpenScope CondStatement ${
2440 $0 = new(cond_statement);
2442 var_block_close(config2context(config), CloseElse);
2447 // These scopes are closed in CondSuffix
2448 ForPart -> for OpenScope SimpleStatements ${
2449 $0 = reorder_bilist($<3);
2451 | for OpenScope Block ${
2455 ThenPart -> then OpenScope SimpleStatements ${
2456 $0 = reorder_bilist($<3);
2457 var_block_close(config2context(config), CloseSequential);
2459 | then OpenScope Block ${
2461 var_block_close(config2context(config), CloseSequential);
2464 ThenPartNL -> ThenPart OptNL ${
2468 // This scope is closed in CondSuffix
2469 WhileHead -> while OpenScope Block ${
2474 ForThen -> ForPart OptNL ThenPartNL ${
2482 // This scope is closed in CondSuffix
2483 WhilePart -> while OpenScope Expression Block ${
2484 $0.type = Xcond_statement;
2488 | WhileHead OptNL do Block ${
2489 $0.type = Xcond_statement;
2494 IfPart -> if OpenScope Expression OpenScope Block ${
2495 $0.type = Xcond_statement;
2498 var_block_close(config2context(config), CloseParallel);
2500 | if OpenScope Block OptNL then OpenScope Block ${
2501 $0.type = Xcond_statement;
2504 var_block_close(config2context(config), CloseParallel);
2508 // This scope is closed in CondSuffix
2509 SwitchPart -> switch OpenScope Expression ${
2512 | switch OpenScope Block ${
2516 ###### print exec cases
2518 case Xcond_statement:
2520 struct cond_statement *cs = cast(cond_statement, e);
2521 struct casepart *cp;
2523 do_indent(indent, "for");
2524 if (bracket) printf(" {\n"); else printf(":\n");
2525 print_exec(cs->forpart, indent+1, bracket);
2528 do_indent(indent, "} then {\n");
2530 do_indent(indent, "then:\n");
2531 print_exec(cs->thenpart, indent+1, bracket);
2533 if (bracket) do_indent(indent, "}\n");
2537 if (cs->condpart && cs->condpart->type == Xbinode &&
2538 cast(binode, cs->condpart)->op == Block) {
2540 do_indent(indent, "while {\n");
2542 do_indent(indent, "while:\n");
2543 print_exec(cs->condpart, indent+1, bracket);
2545 do_indent(indent, "} do {\n");
2547 do_indent(indent, "do:\n");
2548 print_exec(cs->dopart, indent+1, bracket);
2550 do_indent(indent, "}\n");
2552 do_indent(indent, "while ");
2553 print_exec(cs->condpart, 0, bracket);
2558 print_exec(cs->dopart, indent+1, bracket);
2560 do_indent(indent, "}\n");
2565 do_indent(indent, "switch");
2567 do_indent(indent, "if");
2568 if (cs->condpart && cs->condpart->type == Xbinode &&
2569 cast(binode, cs->condpart)->op == Block) {
2574 print_exec(cs->condpart, indent+1, bracket);
2576 do_indent(indent, "}\n");
2578 do_indent(indent, "then:\n");
2579 print_exec(cs->thenpart, indent+1, bracket);
2583 print_exec(cs->condpart, 0, bracket);
2589 print_exec(cs->thenpart, indent+1, bracket);
2591 do_indent(indent, "}\n");
2596 for (cp = cs->casepart; cp; cp = cp->next) {
2597 do_indent(indent, "case ");
2598 print_exec(cp->value, -1, 0);
2603 print_exec(cp->action, indent+1, bracket);
2605 do_indent(indent, "}\n");
2608 do_indent(indent, "else");
2613 print_exec(cs->elsepart, indent+1, bracket);
2615 do_indent(indent, "}\n");
2620 ###### propagate exec cases
2621 case Xcond_statement:
2623 // forpart and dopart must return Vnone
2624 // thenpart must return Vnone if there is a dopart,
2625 // otherwise it is like elsepart.
2627 // be bool if there is not casepart
2628 // match casepart->values if there is a switchpart
2629 // either be bool or match casepart->value if there
2631 // elsepart, casepart->action must match there return type
2632 // expected of this statement.
2633 struct cond_statement *cs = cast(cond_statement, prog);
2634 struct casepart *cp;
2636 t = propagate_types(cs->forpart, c, ok, Vnone, 0);
2637 if (!vtype_compat(Vnone, t, 0))
2639 t = propagate_types(cs->dopart, c, ok, Vnone, 0);
2640 if (!vtype_compat(Vnone, t, 0))
2643 t = propagate_types(cs->thenpart, c, ok, Vnone, 0);
2644 if (!vtype_compat(Vnone, t, 0))
2647 if (cs->casepart == NULL)
2648 propagate_types(cs->condpart, c, ok, Vbool, 0);
2650 /* Condpart must match case values, with bool permitted */
2652 for (cp = cs->casepart;
2653 cp && (t == Vunknown); cp = cp->next)
2654 t = propagate_types(cp->value, c, ok, Vunknown, 0);
2655 if (t == Vunknown && cs->condpart)
2656 t = propagate_types(cs->condpart, c, ok, Vunknown, 1);
2657 // Now we have a type (I hope) push it down
2658 if (t != Vunknown) {
2659 for (cp = cs->casepart; cp; cp = cp->next)
2660 propagate_types(cp->value, c, ok, t, 0);
2661 propagate_types(cs->condpart, c, ok, t, 1);
2664 // (if)then, else, and case parts must return expected type.
2665 if (!cs->dopart && type == Vunknown)
2666 type = propagate_types(cs->thenpart, c, ok, Vunknown, bool_permitted);
2667 if (type == Vunknown)
2668 type = propagate_types(cs->elsepart, c, ok, Vunknown, bool_permitted);
2669 for (cp = cs->casepart;
2670 cp && type == Vunknown;
2672 type = propagate_types(cp->action, c, ok, Vunknown, bool_permitted);
2673 if (type > Vunknown) {
2675 propagate_types(cs->thenpart, c, ok, type, bool_permitted);
2676 propagate_types(cs->elsepart, c, ok, type, bool_permitted);
2677 for (cp = cs->casepart; cp ; cp = cp->next)
2678 propagate_types(cp->action, c, ok, type, bool_permitted);
2684 ###### interp exec cases
2685 case Xcond_statement:
2687 struct value v, cnd;
2688 struct casepart *cp;
2689 struct cond_statement *c = cast(cond_statement, e);
2691 interp_exec(c->forpart);
2694 cnd = interp_exec(c->condpart);
2697 if (!(cnd.vtype == Vnone ||
2698 (cnd.vtype == Vbool && cnd.bool != 0)))
2702 interp_exec(c->dopart);
2705 v = interp_exec(c->thenpart);
2706 if (v.vtype != Vnone || !c->dopart)
2710 } while (c->dopart);
2712 for (cp = c->casepart; cp; cp = cp->next) {
2713 v = interp_exec(cp->value);
2714 if (value_cmp(v, cnd) == 0) {
2717 return interp_exec(cp->action);
2723 return interp_exec(c->elsepart);
2728 ### Finally the whole program.
2730 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2731 the keyword "program" and a list of variable names which are assigned
2732 values from command line arguments. Following this is a `block` which
2733 is the code to execute.
2735 As this is the top level, several things are handled a bit
2737 The whole program is not interpreted by `interp_exec` as that isn't
2738 passed the argument list which the program requires. Similarly type
2739 analysis is a bit more interesting at this level.
2744 ###### Parser: grammar
2747 Program -> program OpenScope Varlist Block OptNL ${
2750 $0->left = reorder_bilist($<3);
2752 var_block_close(config2context(config), CloseSequential);
2753 if (config2context(config)->scope_stack) abort();
2756 Varlist -> Varlist ArgDecl ${
2765 ArgDecl -> IDENTIFIER ${ {
2766 struct variable *v = var_decl(config2context(config), $1.txt);
2773 ###### print binode cases
2775 do_indent(indent, "program");
2776 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2778 print_exec(b2->left, 0, 0);
2784 print_exec(b->right, indent+1, bracket);
2786 do_indent(indent, "}\n");
2789 ###### propagate binode cases
2790 case Program: abort();
2792 ###### core functions
2794 static int analyse_prog(struct exec *prog, struct parse_context *c)
2796 struct binode *b = cast(binode, prog);
2801 propagate_types(b->right, c, &ok, Vnone, 0);
2806 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2807 struct var *v = cast(var, b->left);
2808 if (v->var->val.vtype == Vunknown) {
2809 v->var->where_set = b;
2810 val_init(&v->var->val, Vstr);
2813 b = cast(binode, prog);
2816 propagate_types(b->right, c, &ok, Vnone, 0);
2821 /* Make sure everything is still consistent */
2822 propagate_types(b->right, c, &ok, Vnone, 0);
2826 static void interp_prog(struct exec *prog, char **argv)
2828 struct binode *p = cast(binode, prog);
2829 struct binode *al = cast(binode, p->left);
2833 struct var *v = cast(var, al->left);
2834 struct value *vl = &v->var->val;
2836 if (argv[0] == NULL) {
2837 printf("Not enough args\n");
2840 al = cast(binode, al->right);
2842 if (!parse_value(vl, argv[0]))
2846 v = interp_exec(p->right);
2850 ###### interp binode cases
2851 case Program: abort();
2853 ## And now to test it out.
2855 Having a language requires having a "hello world" program. I'll
2856 provide a little more than that: a program that prints "Hello world"
2857 finds the GCD of two numbers, prints the first few elements of
2858 Fibonacci, and performs a binary search for a number.
2860 ###### File: oceani.mk
2863 @echo "===== TEST ====="
2864 ./oceani --section "test: hello" oceani.mdc 55 33
2869 print "Hello World, what lovely oceans you have!"
2870 /* When a variable is defined in both branches of an 'if',
2871 * and used afterwards, the variables are merged.
2877 print "Is", A, "bigger than", B,"? ", bigger
2878 /* If a variable is not used after the 'if', no
2879 * merge happens, so types can be different
2883 print A, "is more than twice", B, "?", double
2886 print "double", A, "is only", double
2895 print "GCD of", A, "and", B,"is", a
2897 print a, "is not positive, cannot calculate GCD"
2899 print b, "is not positive, cannot calculate GCD"
2904 print "Fibonacci:", f1,f2,
2905 then togo = togo - 1
2913 /* Binary search... */
2918 mid := (lo + hi) / 2
2930 print "Yay, I found", target
2932 print "Closest I found was", mid