1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possible with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
213 for (ss = s; ss; ss = ss->next) {
214 struct text sec = ss->section;
215 if (sec.len == strlen(section) &&
216 strncmp(sec.txt, section, sec.len) == 0)
220 prog = parse_oceani(ss->code, &context.config,
221 dotrace ? stderr : NULL);
223 fprintf(stderr, "oceani: cannot find section %s\n",
228 prog = parse_oceani(s->code, &context.config,
229 dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: fatal parser error.\n");
232 context.parse_error = 1;
235 print_exec(*prog, 0, brackets);
236 if (prog && doexec && !context.parse_error) {
237 if (!analyse_prog(*prog, &context)) {
238 fprintf(stderr, "oceani: type error in program - not running.\n");
241 interp_prog(*prog, argv+optind+1);
248 struct section *t = s->next;
254 exit(context.parse_error ? 1 : 0);
259 These four requirements of parse, analyse, print, interpret apply to
260 each language element individually so that is how most of the code
263 Three of the four are fairly self explanatory. The one that requires
264 a little explanation is the analysis step.
266 The current language design does not require (or even allow) the types
267 of variables to be declared, but they must still have a single type.
268 Different operations impose different requirements on the variables,
269 for example addition requires both arguments to be numeric, and
270 assignment requires the variable on the left to have the same type as
271 the expression on the right.
273 Analysis involves propagating these type requirements around and
274 consequently setting the type of each variable. If any requirements
275 are violated (e.g. a string is compared with a number) or if a
276 variable needs to have two different types, then an error is raised
277 and the program will not run.
279 If the same variable is declared in both branchs of an 'if/else', or
280 in all cases of a 'switch' then the multiple instances may be merged
281 into just one variable if the variable is references after the
282 conditional statement. When this happens, the types must naturally be
283 consistent across all the branches. When the variable is not used
284 outside the if, the variables in the different branches are distinct
285 and can be of different types.
287 Determining the types of all variables early is important for
288 processing command line arguments. These can be assigned to any type
289 of variable, but we must first know the correct type so any required
290 conversion can happen. If a variable is associated with a command
291 line argument but no type can be interpreted (e.g. the variable is
292 only ever used in a `print` statement), then the type is set to
295 Undeclared names may only appear in "use" statements and "case" expressions.
296 These names are given a type of "label" and a unique value.
297 This allows them to fill the role of a name in an enumerated type, which
298 is useful for testing the `switch` statement.
300 As we will see, the condition part of a `while` statement can return
301 either a Boolean or some other type. This requires that the expect
302 type that gets passed around comprises a type (`enum vtype`) and a
303 flag to indicate that `Vbool` is also permitted.
305 As there are, as yet, no distinct types that are compatible, there
306 isn't much subtlety in the analysis. When we have distinct number
307 types, this will become more interesting.
311 When analysis discovers an inconsistency it needs to report an error;
312 just refusing to run the code esure that the error doesn't cascade,
313 but by itself it isn't very useful. A clear understand of the sort of
314 error message that are useful will help guide the process of analysis.
316 At a simplistic level, the only sort of error that type analysis can
317 report is that the type of some construct doesn't match a contextual
318 requirement. For example, in `4 + "hello"` the addition provides a
319 contextual requirement for numbers, but `"hello"` is not a number. In
320 this particular example no further information is needed as the types
321 are obvious from local information. When a variable is involved that
322 isn't the case. It may be helpful to explain why the variable has a
323 particular type, by indicating the location where the type was set,
324 whether by declaration or usage.
326 Using a recursive-descent analysis we can easily detect a problem at
327 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
328 will detect that one argument is not a number and the usage of `hello`
329 will detect that a number was wanted, but not provided. In this
330 (early) version of the language, we will generate error reports at
331 multiple locations, to the use of `hello` will report an error and
332 explain were the value was set, and the addition will report an error
333 and say why numbers are needed. To be able to report locations for
334 errors, each language element will need to record a file location
335 (line and column) and each variable will need to record the language
336 element where its type was set. For now we will assume that each line
337 of an error message indicates one location in the file, and up to 2
338 types. So we provide a `printf`-like function which takes a format, a
339 language (a `struct exec` which has not yet been introduced), and 2
340 types. "`$1`" reports the first type, "`$2`" reports the second. We
341 will need a function to print the location, once we know how that is
346 static void fput_loc(struct exec *loc, FILE *f);
348 ###### core functions
350 static void type_err(struct parse_context *c,
351 char *fmt, struct exec *loc,
352 enum vtype t1, enum vtype t2)
354 fprintf(stderr, "%s:", c->file_name);
355 fput_loc(loc, stderr);
356 for (; *fmt ; fmt++) {
363 case '%': fputc(*fmt, stderr); break;
364 default: fputc('?', stderr); break;
366 fputs(vtype_names[t1], stderr);
369 fputs(vtype_names[t2], stderr);
380 One last introductory step before detailing the language elements and
381 providing their four requirements is to establish the data structures
382 to store these elements.
384 There are two key objects that we need to work with: executable
385 elements which comprise the program, and values which the program
386 works with. Between these are the variables in their various scopes
387 which hold the values.
391 Values can be numbers, which we represent as multi-precision
392 fractions, strings, Booleans and labels. When analysing the program
393 we also need to allow for places where no value is meaningful
394 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`
395 which can be anything and `Vnolabel` which can be anything except a
396 label). A 2 character 'tail' is included in each value as the scanner
397 wants to parse that from the end of numbers and we need somewhere to
398 put it. It is currently ignored but one day might allow for
399 e.g. "imaginary" numbers.
401 Values are never shared, they are always copied when used, and freed
402 when no longer needed.
404 When propagating type information around the program, we need to
405 determine if two types are compatible, where `Vunknown` is compatible
406 which anything, and `Vnolabel` is compatible with anything except a
407 label. A separate funtion to encode this rule will simplify some code
410 When assigning command line arguments to variable, we need to be able
411 to parse each type from a string.
419 myLDLIBS := libnumber.o libstring.o -lgmp
420 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
424 enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
434 char *vtype_names[] = {"nolabel", "unknown", "none", "string",
435 "number", "Boolean", "label"};
438 static void free_value(struct value v)
443 case Vunknown: break;
444 case Vstr: free(v.str.txt); break;
445 case Vnum: mpq_clear(v.num); break;
451 static int vtype_compat(enum vtype require, enum vtype have, int bool_permitted)
453 if (bool_permitted && have == Vbool)
457 return have != Vlabel;
461 return have == Vunknown || require == have;
465 ###### value functions
467 static void val_init(struct value *val, enum vtype type)
473 case Vunknown: break;
475 mpq_init(val->num); break;
477 val->str.txt = malloc(1);
489 static struct value dup_value(struct value v)
496 case Vunknown: break;
505 mpq_set(rv.num, v.num);
508 rv.str.len = v.str.len;
509 rv.str.txt = malloc(rv.str.len);
510 memcpy(rv.str.txt, v.str.txt, v.str.len);
516 static int value_cmp(struct value left, struct value right)
519 if (left.vtype != right.vtype)
520 return left.vtype - right.vtype;
521 switch (left.vtype) {
522 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
523 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
524 case Vstr: cmp = text_cmp(left.str, right.str); break;
525 case Vbool: cmp = left.bool - right.bool; break;
528 case Vunknown: cmp = 0;
533 static struct text text_join(struct text a, struct text b)
536 rv.len = a.len + b.len;
537 rv.txt = malloc(rv.len);
538 memcpy(rv.txt, a.txt, a.len);
539 memcpy(rv.txt+a.len, b.txt, b.len);
543 static void print_value(struct value v)
547 printf("*Unknown*"); break;
550 printf("*no-value*"); break;
552 printf("*label-%p*", v.label); break;
554 printf("%.*s", v.str.len, v.str.txt); break;
556 printf("%s", v.bool ? "True":"False"); break;
561 mpf_set_q(fl, v.num);
562 gmp_printf("%Fg", fl);
569 static int parse_value(struct value *vl, char *arg)
580 vl->str.len = strlen(arg);
581 vl->str.txt = malloc(vl->str.len);
582 memcpy(vl->str.txt, arg, vl->str.len);
589 tx.txt = arg; tx.len = strlen(tx.txt);
590 if (number_parse(vl->num, vl->tail, tx) == 0)
593 mpq_neg(vl->num, vl->num);
596 if (strcasecmp(arg, "true") == 0 ||
597 strcmp(arg, "1") == 0)
599 else if (strcasecmp(arg, "false") == 0 ||
600 strcmp(arg, "0") == 0)
603 printf("Bad bool: %s\n", arg);
613 Variables are scoped named values. We store the names in a linked
614 list of "bindings" sorted lexically, and use sequential search and
621 struct binding *next; // in lexical order
625 This linked list is stored in the parse context so that "reduce"
626 functions can find or add variables, and so the analysis phase can
627 ensure that every variable gets a type.
631 struct binding *varlist; // In lexical order
635 static struct binding *find_binding(struct parse_context *c, struct text s)
637 struct binding **l = &c->varlist;
642 (cmp = text_cmp((*l)->name, s)) < 0)
646 n = calloc(1, sizeof(*n));
653 Each name can be linked to multiple variables defined in different
654 scopes. Each scope starts where the name is declared and continues
655 until the end of the containing code block. Scopes of a given name
656 cannot nest, so a declaration while a name is in-scope is an error.
658 ###### binding fields
659 struct variable *var;
663 struct variable *previous;
665 struct binding *name;
666 struct exec *where_decl;// where name was declared
667 struct exec *where_set; // where type was set
671 While the naming seems strange, we include local constants in the
672 definition of variables. A name declared `var := value` can
673 subsequently be changed, but a name declared `var ::= value` cannot -
676 ###### variable fields
679 Scopes in parallel branches can be partially merged. More
680 specifically, if a given name is declared in both branches of an
681 if/else then it's scope is a candidate for merging. Similarly if
682 every branch of an exhaustive switch (e.g. has an "else" clause)
683 declares a given name, then the scopes from the branches are
684 candidates for merging.
686 Note that names declared inside a loop (which is only parallel to
687 itself) are never visible after the loop. Similarly names defined in
688 scopes which are not parallel, such as those started by `for` and
689 `switch`, are never visible after the scope. Only variable defined in
690 both `then` and `else` (including the implicit then after an `if`, and
691 excluding `then` used with `for`) and in all `case`s and `else` of a
692 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
694 Labels, which are a bit like variables, follow different rules.
695 Labels are not explicitly declared, but if an undeclared name appears
696 in a context where a label is legal, that effectively declares the
697 name as a label. The declaration remains in force (or in scope) at
698 least to the end of the immediately containing block and conditionally
699 in any larger containing block which does not declare the name in some
700 other way. Importantly, the conditional scope extension happens even
701 if the label is only used in parallel branch of a conditional -- when
702 used in one branch it is treated as having been declared in all
705 Merge candidates are tentatively visible beyond the end of the
706 branching statement which creates them. If the name is used, the
707 merge is affirmed and they become a single variable visible at the
708 outer layer. If not - if it is redeclared first - the merge lapses.
710 To track scopes we have an extra stack, implemented as a linked list,
711 which roughly parallels the parse stack and which is used exclusively
712 for scoping. When a new scope is opened, a new frame is pushed and
713 the child-count of the parent frame is incremented. This child-count
714 is used to distinguish between the first of a set of parallel scopes,
715 in which declared variables must not be in scope, and subsequent
716 branches, whether they must already be conditionally scoped.
718 To push a new frame *before* any code in the frame is parsed, we need a
719 grammar reduction. This is most easily achieved with a grammar
720 element which derives the empty string, and created the new scope when
721 it is recognized. This can be placed, for example, between a keyword
722 like "if" and the code following it.
726 struct scope *parent;
732 struct scope *scope_stack;
735 static void scope_pop(struct parse_context *c)
737 struct scope *s = c->scope_stack;
739 c->scope_stack = s->parent;
744 static void scope_push(struct parse_context *c)
746 struct scope *s = calloc(1, sizeof(*s));
748 c->scope_stack->child_count += 1;
749 s->parent = c->scope_stack;
757 OpenScope -> ${ scope_push(config2context(config)); }$
760 Each variable records a scope depth and is in one of four states:
762 - "in scope". This is the case between the declaration of the
763 variable and the end of the containing block, and also between
764 the usage with affirms a merge and the end of the block.
766 The scope depth is not greater than the current parse context scope
767 nest depth. When the block of that depth closes, the state will
768 change. To achieve this, all "in scope" variables are linked
769 together as a stack in nesting order.
771 - "pending". The "in scope" block has closed, but other parallel
772 scopes are still being processed. So far, every parallel block at
773 the same level that has closed has declared the name.
775 The scope depth is the depth of the last parallel block that
776 enclosed the declaration, and that has closed.
778 - "conditionally in scope". The "in scope" block and all parallel
779 scopes have closed, and no further mention of the name has been
780 seen. This state includes a secondary nest depth which records the
781 outermost scope seen since the variable became conditionally in
782 scope. If a use of the name is found, the variable becomes "in
783 scope" and that secondary depth becomes the recorded scope depth.
784 If the name is declared as a new variable, the old variable becomes
785 "out of scope" and the recorded scope depth stays unchanged.
787 - "out of scope". The variable is neither in scope nor conditionally
788 in scope. It is permanently out of scope now and can be removed from
789 the "in scope" stack.
792 ###### variable fields
793 int depth, min_depth;
794 enum { OutScope, PendingScope, CondScope, InScope } scope;
795 struct variable *in_scope;
799 struct variable *in_scope;
801 All variables with the same name are linked together using the
802 'previous' link. Those variable that have
803 been affirmatively merged all have a 'merged' pointer that points to
804 one primary variable - the most recently declared instance. When
805 merging variables, we need to also adjust the 'merged' pointer on any
806 other variables that had previously been merged with the one that will
807 no longer be primary.
809 ###### variable fields
810 struct variable *merged;
814 static void variable_merge(struct variable *primary, struct variable *secondary)
820 primary = primary->merged;
822 for (v = primary->previous; v; v=v->previous)
823 if (v == secondary || v == secondary->merged ||
824 v->merged == secondary ||
825 (v->merged && v->merged == secondary->merged)) {
833 while (context.varlist) {
834 struct binding *b = context.varlist;
835 struct variable *v = b->var;
836 context.varlist = b->next;
839 struct variable *t = v;
847 #### Manipulating Bindings
849 When a name is conditionally visible, a new declaration discards the
850 old binding - the condition lapses. Conversely a usage of the name
851 affirms the visibility and extends it to the end of the containing
852 block - i.e. the block that contains both the original declaration and
853 the latest usage. This is determined from `min_depth`. When a
854 conditionally visible variable gets affirmed like this, it is also
855 merged with other conditionally visible variables with the same name.
857 When we parse a variable declaration we either signal an error if the
858 name is currently bound, or create a new variable at the current nest
859 depth if the name is unbound or bound to a conditionally scoped or
860 pending-scope variable. If the previous variable was conditionally
861 scoped, it and its homonyms becomes out-of-scope.
863 When we parse a variable reference (including non-declarative
864 assignment) we signal an error if the name is not bound or is bound to
865 a pending-scope variable; update the scope if the name is bound to a
866 conditionally scoped variable; or just proceed normally if the named
867 variable is in scope.
869 When we exit a scope, any variables bound at this level are either
870 marked out of scope or pending-scoped, depending on whether the
871 scope was sequential or parallel.
873 When exiting a parallel scope we check if there are any variables that
874 were previously pending and are still visible. If there are, then
875 there weren't redeclared in the most recent scope, so they cannot be
876 merged and must become out-of-scope. If it is not the first of
877 parallel scopes (based on `child_count`), we check that there was a
878 previous binding that is still pending-scope. If there isn't, the new
879 variable must now be out-of-scope.
881 When exiting a sequential scope that immediately enclosed parallel
882 scopes, we need to resolve any pending-scope variables. If there was
883 no `else` clause, and we cannot determine that the `switch` was exhaustive,
884 we need to mark all pending-scope variable as out-of-scope. Otherwise
885 all pending-scope variables become conditionally scoped.
888 enum closetype { CloseSequential, CloseParallel, CloseElse };
892 static struct variable *var_decl(struct parse_context *c, struct text s)
894 struct binding *b = find_binding(c, s);
895 struct variable *v = b->var;
897 switch (v ? v->scope : OutScope) {
899 /* Caller will report the error */
903 v && v->scope == CondScope;
909 v = calloc(1, sizeof(*v));
910 v->previous = b->var;
913 v->min_depth = v->depth = c->scope_depth;
915 v->in_scope = c->in_scope;
917 val_init(&v->val, Vunknown);
921 static struct variable *var_ref(struct parse_context *c, struct text s)
923 struct binding *b = find_binding(c, s);
924 struct variable *v = b->var;
927 switch (v ? v->scope : OutScope) {
930 /* Signal an error - once that is possible */
933 /* All CondScope variables of this name need to be merged
936 v->depth = v->min_depth;
938 for (v2 = v->previous;
939 v2 && v2->scope == CondScope;
941 variable_merge(v, v2);
949 static void var_block_close(struct parse_context *c, enum closetype ct)
951 /* close of all variables that are in_scope */
952 struct variable *v, **vp, *v2;
955 for (vp = &c->in_scope;
956 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
960 case CloseParallel: /* handle PendingScope */
964 if (c->scope_stack->child_count == 1)
965 v->scope = PendingScope;
966 else if (v->previous &&
967 v->previous->scope == PendingScope)
968 v->scope = PendingScope;
969 else if (v->val.vtype == Vlabel)
970 v->scope = PendingScope;
971 else if (v->name->var == v)
973 if (ct == CloseElse) {
974 /* All Pending variables with this name
975 * are now Conditional */
977 v2 && v2->scope == PendingScope;
979 v2->scope = CondScope;
984 v2 && v2->scope == PendingScope;
986 if (v2->val.vtype != Vlabel)
987 v2->scope = OutScope;
989 case OutScope: break;
992 case CloseSequential:
993 if (v->val.vtype == Vlabel)
994 v->scope = PendingScope;
1000 /* There was no 'else', so we can only become
1001 * conditional if we know the cases were exhaustive,
1002 * and that doesn't mean anything yet.
1003 * So only labels become conditional..
1006 v2 && v2->scope == PendingScope;
1008 if (v2->val.vtype == Vlabel) {
1009 v2->scope = CondScope;
1010 v2->min_depth = c->scope_depth;
1012 v2->scope = OutScope;
1015 case OutScope: break;
1019 if (v->scope == OutScope)
1028 Executables can be lots of different things. In many cases an
1029 executable is just an operation combined with one or two other
1030 executables. This allows for expressions and lists etc. Other times
1031 an executable is something quite specific like a constant or variable
1032 name. So we define a `struct exec` to be a general executable with a
1033 type, and a `struct binode` which is a subclass of `exec` and forms a
1034 node in a binary tree and holding an operation. There will be other
1035 subclasses, and to access these we need to be able to `cast` the
1036 `exec` into the various other types.
1039 #define cast(structname, pointer) ({ \
1040 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1041 if (__mptr && *__mptr != X##structname) abort(); \
1042 (struct structname *)( (char *)__mptr);})
1044 #define new(structname) ({ \
1045 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1046 __ptr->type = X##structname; \
1047 __ptr->line = -1; __ptr->column = -1; \
1050 #define new_pos(structname, token) ({ \
1051 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1052 __ptr->type = X##structname; \
1053 __ptr->line = token.line; __ptr->column = token.col; \
1062 enum exec_types type;
1070 struct exec *left, *right;
1073 ###### ast functions
1075 static int __fput_loc(struct exec *loc, FILE *f)
1077 if (loc->line >= 0) {
1078 fprintf(f, "%d:%d: ", loc->line, loc->column);
1081 if (loc->type == Xbinode)
1082 return __fput_loc(cast(binode,loc)->left, f) ||
1083 __fput_loc(cast(binode,loc)->right, f);
1086 static void fput_loc(struct exec *loc, FILE *f)
1088 if (!__fput_loc(loc, f))
1089 fprintf(f, "??:??: ");
1092 Each different type of `exec` node needs a number of functions
1093 defined, a bit like methods. We must be able to be able to free it,
1094 print it, analyse it and execute it. Once we have specific `exec`
1095 types we will need to parse them too. Let's take this a bit more
1100 The parser generator requires a `free_foo` function for each struct
1101 that stores attributes and they will be `exec`s of subtypes there-of.
1102 So we need `free_exec` which can handle all the subtypes, and we need
1105 ###### ast functions
1107 static void free_binode(struct binode *b)
1112 free_exec(b->right);
1116 ###### core functions
1117 static void free_exec(struct exec *e)
1126 ###### forward decls
1128 static void free_exec(struct exec *e);
1130 ###### free exec cases
1131 case Xbinode: free_binode(cast(binode, e)); break;
1135 Printing an `exec` requires that we know the current indent level for
1136 printing line-oriented components. As will become clear later, we
1137 also want to know what sort of bracketing to use.
1139 ###### ast functions
1141 static void do_indent(int i, char *str)
1148 ###### core functions
1149 static void print_binode(struct binode *b, int indent, int bracket)
1153 ## print binode cases
1157 static void print_exec(struct exec *e, int indent, int bracket)
1163 print_binode(cast(binode, e), indent, bracket); break;
1168 ###### forward decls
1170 static void print_exec(struct exec *e, int indent, int bracket);
1174 As discussed, analysis involves propagating type requirements around
1175 the program and looking for errors.
1177 So `propagate_types` is passed an expected type (being a `vtype`
1178 together with a `bool_permitted` flag) that the `exec` is expected to
1179 return, and returns the type that it does return, either of which can
1180 be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
1181 when an error is found, and `2` when any change is made. If it
1182 remains unchanged at `1`, then no more propagation is needed.
1184 ###### core functions
1186 static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1187 enum vtype type, int bool_permitted)
1194 switch (prog->type) {
1197 struct binode *b = cast(binode, prog);
1199 ## propagate binode cases
1203 ## propagate exec cases
1210 Interpreting an `exec` doesn't require anything but the `exec`. State
1211 is stored in variables and each variable will be directly linked from
1212 within the `exec` tree. The exception to this is the whole `program`
1213 which needs to look at command line arguments. The `program` will be
1214 interpreted separately.
1216 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1218 ###### core functions
1220 static struct value interp_exec(struct exec *e)
1230 struct binode *b = cast(binode, e);
1231 struct value left, right;
1232 left.vtype = right.vtype = Vnone;
1234 ## interp binode cases
1236 free_value(left); free_value(right);
1239 ## interp exec cases
1244 ## Language elements
1246 Each language element needs to be parsed, printed, analysed,
1247 interpreted, and freed. There are several, so let's just start with
1248 the easy ones and work our way up.
1252 We have already met values as separate objects. When manifest
1253 constants appear in the program text that must result in an executable
1254 which has a constant value. So the `val` structure embeds a value in
1270 $0 = new_pos(val, $1);
1271 $0->val.vtype = Vbool;
1275 $0 = new_pos(val, $1);
1276 $0->val.vtype = Vbool;
1280 $0 = new_pos(val, $1);
1281 $0->val.vtype = Vnum;
1282 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
1283 mpq_init($0->val.num);
1286 $0 = new_pos(val, $1);
1287 $0->val.vtype = Vstr;
1288 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1291 $0 = new_pos(val, $1);
1292 $0->val.vtype = Vstr;
1293 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1296 ###### print exec cases
1299 struct val *v = cast(val, e);
1300 if (v->val.vtype == Vstr)
1302 print_value(v->val);
1303 if (v->val.vtype == Vstr)
1308 ###### propagate exec cases
1311 struct val *val = cast(val, prog);
1312 if (!vtype_compat(type, val->val.vtype, bool_permitted)) {
1313 type_err(c, "error: expected %1 found %2",
1314 prog, type, val->val.vtype);
1317 return val->val.vtype;
1320 ###### interp exec cases
1322 return dup_value(cast(val, e)->val);
1324 ###### ast functions
1325 static void free_val(struct val *v)
1333 ###### free exec cases
1334 case Xval: free_val(cast(val, e)); break;
1336 ###### ast functions
1337 // Move all nodes from 'b' to 'rv', reversing the order.
1338 // In 'b' 'left' is a list, and 'right' is the last node.
1339 // In 'rv', left' is the first node and 'right' is a list.
1340 static struct binode *reorder_bilist(struct binode *b)
1342 struct binode *rv = NULL;
1345 struct exec *t = b->right;
1349 b = cast(binode, b->left);
1359 Just as we used as `val` to wrap a value into an `exec`, we similarly
1360 need a `var` to wrap a `variable` into an exec. While each `val`
1361 contained a copy of the value, each `var` hold a link to the variable
1362 because it really is the same variable no matter where it appears.
1363 When a variable is used, we need to remember to follow the `->merged`
1364 link to find the primary instance.
1372 struct variable *var;
1378 VariableDecl -> IDENTIFIER := ${ {
1379 struct variable *v = var_decl(config2context(config), $1.txt);
1380 $0 = new_pos(var, $1);
1385 v = var_ref(config2context(config), $1.txt);
1387 type_err(config2context(config), "error: variable '%v' redeclared",
1389 type_err(config2context(config), "info: this is where '%v' was first declared",
1390 v->where_decl, Vnone, Vnone);
1393 | IDENTIFIER ::= ${ {
1394 struct variable *v = var_decl(config2context(config), $1.txt);
1395 $0 = new_pos(var, $1);
1401 v = var_ref(config2context(config), $1.txt);
1403 type_err(config2context(config), "error: variable '%v' redeclared",
1405 type_err(config2context(config), "info: this is where '%v' was first declared",
1406 v->where_decl, Vnone, Vnone);
1410 Variable -> IDENTIFIER ${ {
1411 struct variable *v = var_ref(config2context(config), $1.txt);
1412 $0 = new_pos(var, $1);
1414 /* This might be a label - allocate a var just in case */
1415 v = var_decl(config2context(config), $1.txt);
1417 val_init(&v->val, Vlabel);
1424 ###### print exec cases
1427 struct var *v = cast(var, e);
1429 struct binding *b = v->var->name;
1430 printf("%.*s", b->name.len, b->name.txt);
1437 if (loc->type == Xvar) {
1438 struct var *v = cast(var, loc);
1440 struct binding *b = v->var->name;
1441 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1443 fputs("???", stderr);
1445 fputs("NOTVAR", stderr);
1448 ###### propagate exec cases
1452 struct var *var = cast(var, prog);
1453 struct variable *v = var->var;
1455 type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone);
1461 if (v->val.vtype == Vunknown) {
1462 if (type > Vunknown && *ok != 0) {
1463 val_init(&v->val, type);
1464 v->where_set = prog;
1469 if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
1470 type_err(c, "error: expected %1 but variable '%v' is %2", prog,
1471 type, v->val.vtype);
1472 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1473 v->val.vtype, Vnone);
1476 if (type <= Vunknown)
1477 return v->val.vtype;
1481 ###### interp exec cases
1484 struct var *var = cast(var, e);
1485 struct variable *v = var->var;
1489 return dup_value(v->val);
1492 ###### ast functions
1494 static void free_var(struct var *v)
1499 ###### free exec cases
1500 case Xvar: free_var(cast(var, e)); break;
1502 ### Expressions: Boolean
1504 Our first user of the `binode` will be expressions, and particularly
1505 Boolean expressions. As I haven't implemented precedence in the
1506 parser generator yet, we need different names from each precedence
1507 level used by expressions. The outer most or lowest level precedence
1508 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1519 Expression -> Expression or BTerm ${ {
1520 struct binode *b = new(binode);
1526 | BTerm ${ $0 = $<1; }$
1528 BTerm -> BTerm and BFact ${ {
1529 struct binode *b = new(binode);
1535 | BFact ${ $0 = $<1; }$
1537 BFact -> not BFact ${ {
1538 struct binode *b = new(binode);
1545 ###### print binode cases
1547 print_exec(b->left, -1, 0);
1549 print_exec(b->right, -1, 0);
1552 print_exec(b->left, -1, 0);
1554 print_exec(b->right, -1, 0);
1558 print_exec(b->right, -1, 0);
1561 ###### propagate binode cases
1565 /* both must be Vbool, result is Vbool */
1566 propagate_types(b->left, c, ok, Vbool, 0);
1567 propagate_types(b->right, c, ok, Vbool, 0);
1568 if (type != Vbool && type > Vunknown) {
1569 type_err(c, "error: %1 operation found where %2 expected", prog,
1575 ###### interp binode cases
1577 rv = interp_exec(b->left);
1578 right = interp_exec(b->right);
1579 rv.bool = rv.bool && right.bool;
1582 rv = interp_exec(b->left);
1583 right = interp_exec(b->right);
1584 rv.bool = rv.bool || right.bool;
1587 rv = interp_exec(b->right);
1591 ### Expressions: Comparison
1593 Of slightly higher precedence that Boolean expressions are
1595 A comparison takes arguments of any type, but the two types must be
1598 To simplify the parsing we introduce an `eop` which can return an
1599 expression operator.
1606 ###### ast functions
1607 static void free_eop(struct eop *e)
1622 | Expr CMPop Expr ${ {
1623 struct binode *b = new(binode);
1629 | Expr ${ $0 = $<1; }$
1634 CMPop -> < ${ $0.op = Less; }$
1635 | > ${ $0.op = Gtr; }$
1636 | <= ${ $0.op = LessEq; }$
1637 | >= ${ $0.op = GtrEq; }$
1638 | == ${ $0.op = Eql; }$
1639 | != ${ $0.op = NEql; }$
1641 ###### print binode cases
1649 print_exec(b->left, -1, 0);
1651 case Less: printf(" < "); break;
1652 case LessEq: printf(" <= "); break;
1653 case Gtr: printf(" > "); break;
1654 case GtrEq: printf(" >= "); break;
1655 case Eql: printf(" == "); break;
1656 case NEql: printf(" != "); break;
1659 print_exec(b->right, -1, 0);
1662 ###### propagate binode cases
1669 /* Both must match but not labels, result is Vbool */
1670 t = propagate_types(b->left, c, ok, Vnolabel, 0);
1672 propagate_types(b->right, c, ok, t, 0);
1674 t = propagate_types(b->right, c, ok, Vnolabel, 0);
1676 t = propagate_types(b->left, c, ok, t, 0);
1678 if (!vtype_compat(type, Vbool, 0)) {
1679 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1685 ###### interp binode cases
1694 left = interp_exec(b->left);
1695 right = interp_exec(b->right);
1696 cmp = value_cmp(left, right);
1699 case Less: rv.bool = cmp < 0; break;
1700 case LessEq: rv.bool = cmp <= 0; break;
1701 case Gtr: rv.bool = cmp > 0; break;
1702 case GtrEq: rv.bool = cmp >= 0; break;
1703 case Eql: rv.bool = cmp == 0; break;
1704 case NEql: rv.bool = cmp != 0; break;
1705 default: rv.bool = 0; break;
1710 ### Expressions: The rest
1712 The remaining expressions with the highest precedence are arithmetic
1713 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1714 The `Factor` is where the `Value` and `Variable` that we already have
1717 `+` and `-` are both infix and prefix operations (where they are
1718 absolute value and negation). These have different operator names.
1720 We also have a 'Bracket' operator which records where parentheses were
1721 found. This make it easy to reproduce these when printing. Once
1722 precedence is handled better I might be able to discard this.
1734 Expr -> Expr Eop Term ${ {
1735 struct binode *b = new(binode);
1741 | Term ${ $0 = $<1; }$
1743 Term -> Term Top Factor ${ {
1744 struct binode *b = new(binode);
1750 | Factor ${ $0 = $<1; }$
1752 Factor -> ( Expression ) ${ {
1753 struct binode *b = new_pos(binode, $1);
1759 struct binode *b = new(binode);
1764 | Value ${ $0 = $<1; }$
1765 | Variable ${ $0 = $<1; }$
1768 Eop -> + ${ $0.op = Plus; }$
1769 | - ${ $0.op = Minus; }$
1771 Uop -> + ${ $0.op = Absolute; }$
1772 | - ${ $0.op = Negate; }$
1774 Top -> * ${ $0.op = Times; }$
1775 | / ${ $0.op = Divide; }$
1776 | ++ ${ $0.op = Concat; }$
1778 ###### print binode cases
1784 print_exec(b->left, indent, 0);
1786 case Plus: printf(" + "); break;
1787 case Minus: printf(" - "); break;
1788 case Times: printf(" * "); break;
1789 case Divide: printf(" / "); break;
1790 case Concat: printf(" ++ "); break;
1793 print_exec(b->right, indent, 0);
1797 print_exec(b->right, indent, 0);
1801 print_exec(b->right, indent, 0);
1805 print_exec(b->right, indent, 0);
1809 ###### propagate binode cases
1814 /* both must be numbers, result is Vnum */
1817 /* as propagate_types ignores a NULL,
1818 * unary ops fit here too */
1819 propagate_types(b->left, c, ok, Vnum, 0);
1820 propagate_types(b->right, c, ok, Vnum, 0);
1821 if (!vtype_compat(type, Vnum, 0)) {
1822 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
1829 /* both must be Vstr, result is Vstr */
1830 propagate_types(b->left, c, ok, Vstr, 0);
1831 propagate_types(b->right, c, ok, Vstr, 0);
1832 if (!vtype_compat(type, Vstr, 0)) {
1833 type_err(c, "error: Concat returns %1 but %2 expected", prog,
1840 return propagate_types(b->right, c, ok, type, 0);
1842 ###### interp binode cases
1845 rv = interp_exec(b->left);
1846 right = interp_exec(b->right);
1847 mpq_add(rv.num, rv.num, right.num);
1850 rv = interp_exec(b->left);
1851 right = interp_exec(b->right);
1852 mpq_sub(rv.num, rv.num, right.num);
1855 rv = interp_exec(b->left);
1856 right = interp_exec(b->right);
1857 mpq_mul(rv.num, rv.num, right.num);
1860 rv = interp_exec(b->left);
1861 right = interp_exec(b->right);
1862 mpq_div(rv.num, rv.num, right.num);
1865 rv = interp_exec(b->right);
1866 mpq_neg(rv.num, rv.num);
1869 rv = interp_exec(b->right);
1870 mpq_abs(rv.num, rv.num);
1873 rv = interp_exec(b->right);
1876 left = interp_exec(b->left);
1877 right = interp_exec(b->right);
1879 rv.str = text_join(left.str, right.str);
1882 ### Blocks, Statements, and Statement lists.
1884 Now that we have expressions out of the way we need to turn to
1885 statements. There are simple statements and more complex statements.
1886 Simple statements do not contain newlines, complex statements do.
1888 Statements often come in sequences and we have corresponding simple
1889 statement lists and complex statement lists.
1890 The former comprise only simple statements separated by semicolons.
1891 The later comprise complex statements and simple statement lists. They are
1892 separated by newlines. Thus the semicolon is only used to separate
1893 simple statements on the one line. This may be overly restrictive,
1894 but I'm not sure I every want a complex statement to share a line with
1897 Note that a simple statement list can still use multiple lines if
1898 subsequent lines are indented, so
1900 ###### Example: wrapped simple statement list
1905 is a single simple statement list. This might allow room for
1906 confusion, so I'm not set on it yet.
1908 A simple statement list needs no extra syntax. A complex statement
1909 list has two syntactic forms. It can be enclosed in braces (much like
1910 C blocks), or it can be introduced by a colon and continue until an
1911 unindented newline (much like Python blocks). With this extra syntax
1912 it is referred to as a block.
1914 Note that a block does not have to include any newlines if it only
1915 contains simple statements. So both of:
1917 if condition: a=b; d=f
1919 if condition { a=b; print f }
1923 In either case the list is constructed from a `binode` list with
1924 `Block` as the operator. When parsing the list it is most convenient
1925 to append to the end, so a list is a list and a statement. When using
1926 the list it is more convenient to consider a list to be a statement
1927 and a list. So we need a function to re-order a list.
1928 `reorder_bilist` serves this purpose.
1930 The only stand-alone statement we introduce at this stage is `pass`
1931 which does nothing and is represented as a `NULL` pointer in a `Block`
1951 Block -> Open Statementlist Close ${ $0 = $<2; }$
1952 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1953 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1954 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1955 | : Statementlist ${ $0 = $<2; }$
1956 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1958 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1960 ComplexStatements -> ComplexStatements ComplexStatement ${
1966 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1967 | ComplexStatement ${
1975 ComplexStatement -> SimpleStatements NEWLINE ${
1976 $0 = reorder_bilist($<1);
1978 ## ComplexStatement Grammar
1981 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1987 | SimpleStatement ${
1993 | SimpleStatements ; ${ $0 = $<1; }$
1995 SimpleStatement -> pass ${ $0 = NULL; }$
1996 ## SimpleStatement Grammar
1998 ###### print binode cases
2002 if (b->left == NULL)
2005 print_exec(b->left, indent, 0);
2008 print_exec(b->right, indent, 0);
2011 // block, one per line
2012 if (b->left == NULL)
2013 do_indent(indent, "pass\n");
2015 print_exec(b->left, indent, bracket);
2017 print_exec(b->right, indent, bracket);
2021 ###### propagate binode cases
2024 /* If any statement returns something other then Vnone
2025 * or Vbool then all such must return same type.
2026 * As each statement may be Vnone or something else,
2027 * we must always pass Vunknown down, otherwise an incorrect
2028 * error might occur. We never return Vnone unless it is
2033 for (e = b; e; e = cast(binode, e->right)) {
2034 t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
2035 if (bool_permitted && t == Vbool)
2037 if (t != Vunknown && t != Vnone && t != Vbool) {
2038 if (type == Vunknown)
2040 else if (t != type) {
2041 type_err(c, "error: expected %1, found %2",
2050 ###### interp binode cases
2052 while (rv.vtype == Vnone &&
2055 rv = interp_exec(b->left);
2056 b = cast(binode, b->right);
2060 ### The Print statement
2062 `print` is a simple statement that takes a comma-separated list of
2063 expressions and prints the values separated by spaces and terminated
2064 by a newline. No control of formatting is possible.
2066 `print` faces the same list-ordering issue as blocks, and uses the
2072 ###### SimpleStatement Grammar
2074 | print ExpressionList ${
2075 $0 = reorder_bilist($<2);
2077 | print ExpressionList , ${
2082 $0 = reorder_bilist($0);
2093 ExpressionList -> ExpressionList , Expression ${
2106 ###### print binode cases
2109 do_indent(indent, "print");
2113 print_exec(b->left, -1, 0);
2117 b = cast(binode, b->right);
2123 ###### propagate binode cases
2126 /* don't care but all must be consistent */
2127 propagate_types(b->left, c, ok, Vnolabel, 0);
2128 propagate_types(b->right, c, ok, Vnolabel, 0);
2131 ###### interp binode cases
2137 for ( ; b; b = cast(binode, b->right))
2141 left = interp_exec(b->left);
2154 ###### Assignment statement
2156 An assignment will assign a value to a variable, providing it hasn't
2157 be declared as a constant. The analysis phase ensures that the type
2158 will be correct so the interpreter just needs to perform the
2159 calculation. There is a form of assignment which declares a new
2160 variable as well as assigning a value. If a name is assigned before
2161 it is declared, and error will be raised as the name is created as
2162 `Vlabel` and it is illegal to assign to such names.
2168 ###### SimpleStatement Grammar
2169 | Variable = Expression ${ {
2170 struct var *v = cast(var, $1);
2176 if (v->var && !v->var->constant) {
2180 | VariableDecl Expression ${
2187 ###### print binode cases
2190 do_indent(indent, "");
2191 print_exec(b->left, indent, 0);
2193 print_exec(b->right, indent, 0);
2199 do_indent(indent, "");
2200 print_exec(b->left, indent, 0);
2201 if (cast(var, b->left)->var->constant)
2205 print_exec(b->right, indent, 0);
2210 ###### propagate binode cases
2214 /* Both must match and not be labels, result is Vnone */
2215 t = propagate_types(b->left, c, ok, Vnolabel, 0);
2217 if (propagate_types(b->right, c, ok, t, 0) != t)
2218 if (b->left->type == Xvar)
2219 type_err(c, "info: variable '%v' was set as %1 here.",
2220 cast(var, b->left)->var->where_set, t, Vnone);
2222 t = propagate_types(b->right, c, ok, Vnolabel, 0);
2224 propagate_types(b->left, c, ok, t, 0);
2230 ###### interp binode cases
2235 struct variable *v = cast(var, b->left)->var;
2238 right = interp_exec(b->right);
2241 right.vtype = Vunknown;
2245 ### The `use` statement
2247 The `use` statement is the last "simple" statement. It is needed when
2248 the condition in a conditional statement is a block. `use` works much
2249 like `return` in C, but only completes the `condition`, not the whole
2255 ###### SimpleStatement Grammar
2257 $0 = new_pos(binode, $1);
2262 ###### print binode cases
2265 do_indent(indent, "use ");
2266 print_exec(b->right, -1, 0);
2271 ###### propagate binode cases
2274 /* result matches value */
2275 return propagate_types(b->right, c, ok, type, 0);
2277 ###### interp binode cases
2280 rv = interp_exec(b->right);
2283 ### The Conditional Statement
2285 This is the biggy and currently the only complex statement. This
2286 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2287 It is comprised of a number of parts, all of which are optional though
2288 set combinations apply. Each part is (usually) a key word (`then` is
2289 sometimes optional) followed by either an expression of a code block,
2290 except the `casepart` which is a "key word and an expression" followed
2291 by a code block. The code-block option is valid for all parts and,
2292 where an expression is also allowed, the code block can use the `use`
2293 statement to report a value. If the code block does no report a value
2294 the effect is similar to reporting `False`.
2296 The `else` and `case` parts, as well as `then` when combined with
2297 `if`, can contain a `use` statement which will apply to some
2298 containing conditional statement. `for` parts, `do` parts and `then`
2299 parts used with `for` can never contain a `use`, except in some
2300 subordinate conditional statement.
2302 If there is a `forpart`, it is executed first, only once.
2303 If there is a `dopart`, then it is executed repeatedly providing
2304 always that the `condpart` or `cond`, if present, does not return a non-True
2305 value. `condpart` can fail to return any value if it simply executes
2306 to completion. This is treated the same as returning `True`.
2308 If there is a `thenpart` it will be executed whenever the `condpart`
2309 or `cond` returns True (or does not return any value), but this will happen
2310 *after* `dopart` (when present).
2312 If `elsepart` is present it will be executed at most once when the
2313 condition returns `False` or some value that isn't `True` and isn't
2314 matched by any `casepart`. If there are any `casepart`s, they will be
2315 executed when the condition returns a matching value.
2317 The particular sorts of values allowed in case parts has not yet been
2318 determined in the language design, so nothing is prohibited.
2320 The various blocks in this complex statement potentially provide scope
2321 for variables as described earlier. Each such block must include the
2322 "OpenScope" nonterminal before parsing the block, and must call
2323 `var_block_close()` when closing the block.
2325 The code following "`if`", "`switch`" and "`for`" does not get its own
2326 scope, but is in a scope covering the whole statement, so names
2327 declared there cannot be redeclared elsewhere. Similarly the
2328 condition following "`while`" is in a scope the covers the body
2329 ("`do`" part) of the loop, and which does not allow conditional scope
2330 extension. Code following "`then`" (both looping and non-looping),
2331 "`else`" and "`case`" each get their own local scope.
2333 The type requirements on the code block in a `whilepart` are quite
2334 unusal. It is allowed to return a value of some identifiable type, in
2335 which case the loop abort and an appropriate `casepart` is run, or it
2336 can return a Boolean, in which case the loop either continues to the
2337 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2338 This is different both from the `ifpart` code block which is expected to
2339 return a Boolean, or the `switchpart` code block which is expected to
2340 return the same type as the casepart values. The correct analysis of
2341 the type of the `whilepart` code block is the reason for the
2342 `bool_permitted` flag which is passed to `propagate_types()`.
2344 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2353 struct exec *action;
2354 struct casepart *next;
2356 struct cond_statement {
2358 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2359 struct casepart *casepart;
2362 ###### ast functions
2364 static void free_casepart(struct casepart *cp)
2368 free_exec(cp->value);
2369 free_exec(cp->action);
2376 static void free_cond_statement(struct cond_statement *s)
2380 free_exec(s->forpart);
2381 free_exec(s->condpart);
2382 free_exec(s->dopart);
2383 free_exec(s->thenpart);
2384 free_exec(s->elsepart);
2385 free_casepart(s->casepart);
2389 ###### free exec cases
2390 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2392 ###### ComplexStatement Grammar
2393 | CondStatement ${ $0 = $<1; }$
2398 // both ForThen and Whilepart open scopes, and CondSuffix only
2399 // closes one - so in the first branch here we have another to close.
2400 CondStatement -> ForThen WhilePart CondSuffix ${
2402 $0->forpart = $1.forpart; $1.forpart = NULL;
2403 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2404 $0->condpart = $2.condpart; $2.condpart = NULL;
2405 $0->dopart = $2.dopart; $2.dopart = NULL;
2406 var_block_close(config2context(config), CloseSequential);
2408 | WhilePart CondSuffix ${
2410 $0->condpart = $1.condpart; $1.condpart = NULL;
2411 $0->dopart = $1.dopart; $1.dopart = NULL;
2413 | SwitchPart CondSuffix ${
2417 | IfPart IfSuffix ${
2419 $0->condpart = $1.condpart; $1.condpart = NULL;
2420 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2421 // This is where we close an "if" statement
2422 var_block_close(config2context(config), CloseSequential);
2425 CondSuffix -> IfSuffix ${
2427 // This is where we close scope of the whole
2428 // "for" or "while" statement
2429 var_block_close(config2context(config), CloseSequential);
2431 | CasePart CondSuffix ${
2433 $1->next = $0->casepart;
2438 CasePart -> Newlines case Expression OpenScope Block ${
2439 $0 = calloc(1,sizeof(struct casepart));
2442 var_block_close(config2context(config), CloseParallel);
2444 | case Expression OpenScope Block ${
2445 $0 = calloc(1,sizeof(struct casepart));
2448 var_block_close(config2context(config), CloseParallel);
2452 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2453 | Newlines else OpenScope Block ${
2454 $0 = new(cond_statement);
2456 var_block_close(config2context(config), CloseElse);
2458 | else OpenScope Block ${
2459 $0 = new(cond_statement);
2461 var_block_close(config2context(config), CloseElse);
2463 | Newlines else OpenScope CondStatement ${
2464 $0 = new(cond_statement);
2466 var_block_close(config2context(config), CloseElse);
2468 | else OpenScope CondStatement ${
2469 $0 = new(cond_statement);
2471 var_block_close(config2context(config), CloseElse);
2476 // These scopes are closed in CondSuffix
2477 ForPart -> for OpenScope SimpleStatements ${
2478 $0 = reorder_bilist($<3);
2480 | for OpenScope Block ${
2484 ThenPart -> then OpenScope SimpleStatements ${
2485 $0 = reorder_bilist($<3);
2486 var_block_close(config2context(config), CloseSequential);
2488 | then OpenScope Block ${
2490 var_block_close(config2context(config), CloseSequential);
2493 ThenPartNL -> ThenPart OptNL ${
2497 // This scope is closed in CondSuffix
2498 WhileHead -> while OpenScope Block ${
2503 ForThen -> ForPart OptNL ThenPartNL ${
2511 // This scope is closed in CondSuffix
2512 WhilePart -> while OpenScope Expression Block ${
2513 $0.type = Xcond_statement;
2517 | WhileHead OptNL do Block ${
2518 $0.type = Xcond_statement;
2523 IfPart -> if OpenScope Expression OpenScope Block ${
2524 $0.type = Xcond_statement;
2527 var_block_close(config2context(config), CloseParallel);
2529 | if OpenScope Block OptNL then OpenScope Block ${
2530 $0.type = Xcond_statement;
2533 var_block_close(config2context(config), CloseParallel);
2537 // This scope is closed in CondSuffix
2538 SwitchPart -> switch OpenScope Expression ${
2541 | switch OpenScope Block ${
2545 ###### print exec cases
2547 case Xcond_statement:
2549 struct cond_statement *cs = cast(cond_statement, e);
2550 struct casepart *cp;
2552 do_indent(indent, "for");
2553 if (bracket) printf(" {\n"); else printf(":\n");
2554 print_exec(cs->forpart, indent+1, bracket);
2557 do_indent(indent, "} then {\n");
2559 do_indent(indent, "then:\n");
2560 print_exec(cs->thenpart, indent+1, bracket);
2562 if (bracket) do_indent(indent, "}\n");
2566 if (cs->condpart && cs->condpart->type == Xbinode &&
2567 cast(binode, cs->condpart)->op == Block) {
2569 do_indent(indent, "while {\n");
2571 do_indent(indent, "while:\n");
2572 print_exec(cs->condpart, indent+1, bracket);
2574 do_indent(indent, "} do {\n");
2576 do_indent(indent, "do:\n");
2577 print_exec(cs->dopart, indent+1, bracket);
2579 do_indent(indent, "}\n");
2581 do_indent(indent, "while ");
2582 print_exec(cs->condpart, 0, bracket);
2587 print_exec(cs->dopart, indent+1, bracket);
2589 do_indent(indent, "}\n");
2594 do_indent(indent, "switch");
2596 do_indent(indent, "if");
2597 if (cs->condpart && cs->condpart->type == Xbinode &&
2598 cast(binode, cs->condpart)->op == Block) {
2603 print_exec(cs->condpart, indent+1, bracket);
2605 do_indent(indent, "}\n");
2607 do_indent(indent, "then:\n");
2608 print_exec(cs->thenpart, indent+1, bracket);
2612 print_exec(cs->condpart, 0, bracket);
2618 print_exec(cs->thenpart, indent+1, bracket);
2620 do_indent(indent, "}\n");
2625 for (cp = cs->casepart; cp; cp = cp->next) {
2626 do_indent(indent, "case ");
2627 print_exec(cp->value, -1, 0);
2632 print_exec(cp->action, indent+1, bracket);
2634 do_indent(indent, "}\n");
2637 do_indent(indent, "else");
2642 print_exec(cs->elsepart, indent+1, bracket);
2644 do_indent(indent, "}\n");
2649 ###### propagate exec cases
2650 case Xcond_statement:
2652 // forpart and dopart must return Vnone
2653 // thenpart must return Vnone if there is a dopart,
2654 // otherwise it is like elsepart.
2656 // be bool if there is not casepart
2657 // match casepart->values if there is a switchpart
2658 // either be bool or match casepart->value if there
2660 // elsepart, casepart->action must match there return type
2661 // expected of this statement.
2662 struct cond_statement *cs = cast(cond_statement, prog);
2663 struct casepart *cp;
2665 t = propagate_types(cs->forpart, c, ok, Vnone, 0);
2666 if (!vtype_compat(Vnone, t, 0))
2668 t = propagate_types(cs->dopart, c, ok, Vnone, 0);
2669 if (!vtype_compat(Vnone, t, 0))
2672 t = propagate_types(cs->thenpart, c, ok, Vnone, 0);
2673 if (!vtype_compat(Vnone, t, 0))
2676 if (cs->casepart == NULL)
2677 propagate_types(cs->condpart, c, ok, Vbool, 0);
2679 /* Condpart must match case values, with bool permitted */
2681 for (cp = cs->casepart;
2682 cp && (t == Vunknown); cp = cp->next)
2683 t = propagate_types(cp->value, c, ok, Vunknown, 0);
2684 if (t == Vunknown && cs->condpart)
2685 t = propagate_types(cs->condpart, c, ok, Vunknown, 1);
2686 // Now we have a type (I hope) push it down
2687 if (t != Vunknown) {
2688 for (cp = cs->casepart; cp; cp = cp->next)
2689 propagate_types(cp->value, c, ok, t, 0);
2690 propagate_types(cs->condpart, c, ok, t, 1);
2693 // (if)then, else, and case parts must return expected type.
2694 if (!cs->dopart && type == Vunknown)
2695 type = propagate_types(cs->thenpart, c, ok, Vunknown, bool_permitted);
2696 if (type == Vunknown)
2697 type = propagate_types(cs->elsepart, c, ok, Vunknown, bool_permitted);
2698 for (cp = cs->casepart;
2699 cp && type == Vunknown;
2701 type = propagate_types(cp->action, c, ok, Vunknown, bool_permitted);
2702 if (type > Vunknown) {
2704 propagate_types(cs->thenpart, c, ok, type, bool_permitted);
2705 propagate_types(cs->elsepart, c, ok, type, bool_permitted);
2706 for (cp = cs->casepart; cp ; cp = cp->next)
2707 propagate_types(cp->action, c, ok, type, bool_permitted);
2713 ###### interp exec cases
2714 case Xcond_statement:
2716 struct value v, cnd;
2717 struct casepart *cp;
2718 struct cond_statement *c = cast(cond_statement, e);
2720 interp_exec(c->forpart);
2723 cnd = interp_exec(c->condpart);
2726 if (!(cnd.vtype == Vnone ||
2727 (cnd.vtype == Vbool && cnd.bool != 0)))
2731 interp_exec(c->dopart);
2734 v = interp_exec(c->thenpart);
2735 if (v.vtype != Vnone || !c->dopart)
2739 } while (c->dopart);
2741 for (cp = c->casepart; cp; cp = cp->next) {
2742 v = interp_exec(cp->value);
2743 if (value_cmp(v, cnd) == 0) {
2746 return interp_exec(cp->action);
2752 return interp_exec(c->elsepart);
2757 ### Finally the whole program.
2759 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2760 the keyword "program" and a list of variable names which are assigned
2761 values from command line arguments. Following this is a `block` which
2762 is the code to execute.
2764 As this is the top level, several things are handled a bit
2766 The whole program is not interpreted by `interp_exec` as that isn't
2767 passed the argument list which the program requires. Similarly type
2768 analysis is a bit more interesting at this level.
2773 ###### Parser: grammar
2776 Program -> program OpenScope Varlist Block OptNL ${
2779 $0->left = reorder_bilist($<3);
2781 var_block_close(config2context(config), CloseSequential);
2782 if (config2context(config)->scope_stack) abort();
2785 fprintf(stderr, "%s:%d:%d: error: unhandled parse error.\n",
2786 config2context(config)->file_name, $1.line, $1.col);
2787 config2context(config)->parse_error = 1;
2790 Varlist -> Varlist ArgDecl ${
2799 ArgDecl -> IDENTIFIER ${ {
2800 struct variable *v = var_decl(config2context(config), $1.txt);
2807 ###### print binode cases
2809 do_indent(indent, "program");
2810 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2812 print_exec(b2->left, 0, 0);
2818 print_exec(b->right, indent+1, bracket);
2820 do_indent(indent, "}\n");
2823 ###### propagate binode cases
2824 case Program: abort();
2826 ###### core functions
2828 static int analyse_prog(struct exec *prog, struct parse_context *c)
2830 struct binode *b = cast(binode, prog);
2837 propagate_types(b->right, c, &ok, Vnone, 0);
2842 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2843 struct var *v = cast(var, b->left);
2844 if (v->var->val.vtype == Vunknown) {
2845 v->var->where_set = b;
2846 val_init(&v->var->val, Vstr);
2849 b = cast(binode, prog);
2852 propagate_types(b->right, c, &ok, Vnone, 0);
2857 /* Make sure everything is still consistent */
2858 propagate_types(b->right, c, &ok, Vnone, 0);
2862 static void interp_prog(struct exec *prog, char **argv)
2864 struct binode *p = cast(binode, prog);
2870 al = cast(binode, p->left);
2872 struct var *v = cast(var, al->left);
2873 struct value *vl = &v->var->val;
2875 if (argv[0] == NULL) {
2876 printf("Not enough args\n");
2879 al = cast(binode, al->right);
2881 if (!parse_value(vl, argv[0]))
2885 v = interp_exec(p->right);
2889 ###### interp binode cases
2890 case Program: abort();
2892 ## And now to test it out.
2894 Having a language requires having a "hello world" program. I'll
2895 provide a little more than that: a program that prints "Hello world"
2896 finds the GCD of two numbers, prints the first few elements of
2897 Fibonacci, and performs a binary search for a number.
2899 ###### File: oceani.mk
2902 @echo "===== TEST ====="
2903 ./oceani --section "test: hello" oceani.mdc 55 33
2908 print "Hello World, what lovely oceans you have!"
2909 /* When a variable is defined in both branches of an 'if',
2910 * and used afterwards, the variables are merged.
2916 print "Is", A, "bigger than", B,"? ", bigger
2917 /* If a variable is not used after the 'if', no
2918 * merge happens, so types can be different
2922 print A, "is more than twice", B, "?", double
2925 print "double", A, "is only", double
2934 print "GCD of", A, "and", B,"is", a
2936 print a, "is not positive, cannot calculate GCD"
2938 print b, "is not positive, cannot calculate GCD"
2943 print "Fibonacci:", f1,f2,
2944 then togo = togo - 1
2952 /* Binary search... */
2957 mid := (lo + hi) / 2
2969 print "Yay, I found", target
2971 print "Closest I found was", mid