1 # Ocean Interpreter - Falls Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is very much an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This initial version of the interpreter exists to test out the
33 structured statement providing conditions and iteration. Clearly we
34 need some minimal other functionality so that values can be tested and
35 instructions iterated over. All that functionality is clearly not
36 normative at this stage (not that anything is **really** normative
37 yet) and will change, so early test code will certainly break.
39 Beyond the structured statement and the `use` statement which is
40 intimately related to it we have:
42 - "blocks" of multiple statements.
43 - `pass`: a statement which does nothing.
44 - variables: any identifier is assumed to store a number, string,
46 - expressions: `+`, `-`, `*`, `/` can apply to integers and `++` can
47 catenate strings. `and`, `or`, `not` manipulate Booleans, and
48 normal comparison operators can work on all three types.
49 - assignments: can assign the value of an expression to a variable.
50 - `print`: will print the values in a list of expressions.
51 - `program`: is given a list of identifiers to initialize from
56 Versions of the interpreter which obviously do not support a complete
57 language will be named after creeks and streams. This one is Falls
60 Once we have something reasonably resembling a complete language, the
61 names of rivers will be used.
62 Early versions of the compiler will be named after seas. Major
63 releases of the compiler will be named after oceans. Hopefully I will
64 be finished once I get to the Pacific Ocean release.
68 As well as parsing and executing a program, the interpreter can print
69 out the program from the parsed internal structure. This is useful
70 for validating the parsing.
71 So the main requirements of the interpreter are:
74 - Analyse the parsed program to ensure consistency
78 This is all performed by a single C program extracted with
81 There will be two formats for printing the program a default and one
82 that uses bracketing. So an extra command line option is needed for
85 ###### File: oceani.mk
87 myCFLAGS := -Wall -g -fplan9-extensions
88 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
89 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
90 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
93 oceani.c oceani.h : oceani.mdc parsergen
94 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
95 oceani.mk: oceani.mdc md2c
100 ###### Parser: header
103 struct parse_context {
104 struct token_config config;
114 #include <sys/mman.h>
133 static char Usage[] = "Usage: oceani --trace --print --noexec prog.ocn\n";
134 static const struct option long_options[] = {
135 {"trace", 0, NULL, 't'},
136 {"print", 0, NULL, 'p'},
137 {"noexec", 0, NULL, 'n'},
138 {"brackets", 0, NULL, 'b'},
141 const char *options = "tpnb";
142 int main(int argc, char *argv[])
148 struct parse_context context = {
150 .ignored = (1 << TK_line_comment)
151 | (1 << TK_block_comment),
152 .number_chars = ".,_+-",
157 int doprint=0, dotrace=0, doexec=1, brackets=0;
160 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
163 case 't': dotrace=1; break;
164 case 'p': doprint=1; break;
165 case 'n': doexec=0; break;
166 case 'b': brackets=1; break;
167 default: fprintf(stderr, Usage);
171 if (optind >= argc) {
172 fprintf(stderr, "oceani: no input file given\n");
175 fd = open(argv[optind], O_RDONLY);
177 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
180 len = lseek(fd, 0, 2);
181 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
182 s = code_extract(file, file+len, NULL);
184 fprintf(stderr, "oceani: could not find any code in %s\n",
188 prog = parse_oceani(s->code, &context.config,
189 dotrace ? stderr : NULL);
191 print_exec(*prog, 0, brackets);
192 if (prog && doexec) {
193 if (!analyse_prog(*prog, &context)) {
194 fprintf(stderr, "oceani: type error in program\n");
197 interp_prog(*prog, argv+optind+1);
204 struct section *t = s->next;
215 These four requirements of parse, analyse, print, interpret apply to
216 each language element individually so that is how most of the code
219 Three of the four are fairly self explanatory. The one that requires
220 a little explanation is the analysis step.
222 The current language design does not require variables to be declared,
223 but they must have a single type. Different operations impose
224 different requirements on the variables, for example addition requires
225 both arguments to be numeric, and assignment requires the variable on
226 the left to have the same type as the expression on the right.
228 Analysis involves propagating these type requirements around
229 consequently setting the type of each variable. If any requirements
230 are violated (e.g. a string is compared with a number) or if a
231 variable needs to have two different types, then an error is raised
232 and the program will not run.
234 Determining the types of all variables early is important for
235 processing command line arguments. These can be assigned to any type
236 of variable, but we must first know the correct type so any required
237 conversion can happen. If a variable is associated with a command
238 line argument but no type can be interpreted (e.g. the variable is
239 only ever used in a `print` statement), then the type is set to
242 If the type of a variable cannot be determined at all, then it is set
243 to be a number and given a unique value. This allows it to fill the
244 role of a name in an enumerated type, which is useful for testing the
249 One last introductory step before detailing the language elements and
250 providing their four requirements is to establish the data structures
251 to store these elements.
253 There are two key objects that we need to work with: executable
254 elements which comprise the program, and values which the program
255 works with. Between these is the set of variables which hold the
260 Values can be numbers, which we represent as multi-precision
261 fractions, strings and Booleans. When analysing the program we also
262 need to allow for places where no value is meaningful (`Vnone`) and
263 where we don't know what type to expect yet (`Vunknown`).
264 A 2 character 'tail' is included in each value as the scanner wants
265 to parse that from the end of numbers and we need somewhere to put
266 it. It is currently ignored but one day might allow for
267 e.g. "imaginary" numbers.
269 Values are never shared, they are always copied when used, and freed
270 when no longer needed.
278 myLDLIBS := libnumber.o libstring.o -lgmp
279 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
283 enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool} vtype;
293 void free_value(struct value v)
297 case Vunknown: break;
298 case Vstr: free(v.str.txt); break;
299 case Vnum: mpq_clear(v.num); break;
304 ###### value functions
306 static void val_init(struct value *val, enum vtype type)
311 case Vunknown: break;
313 mpq_init(val->num); break;
315 val->str.txt = malloc(1);
324 static struct value dup_value(struct value v)
330 case Vunknown: break;
336 mpq_set(rv.num, v.num);
339 rv.str.len = v.str.len;
340 rv.str.txt = malloc(rv.str.len);
341 memcpy(rv.str.txt, v.str.txt, v.str.len);
347 static int value_cmp(struct value left, struct value right)
350 if (left.vtype != right.vtype)
351 return left.vtype - right.vtype;
352 switch (left.vtype) {
353 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
354 case Vstr: cmp = text_cmp(left.str, right.str); break;
355 case Vbool: cmp = left.bool - right.bool; break;
357 case Vunknown: cmp = 0;
362 static struct text text_join(struct text a, struct text b)
365 rv.len = a.len + b.len;
366 rv.txt = malloc(rv.len);
367 memcpy(rv.txt, a.txt, a.len);
368 memcpy(rv.txt+a.len, b.txt, b.len);
372 static void print_value(struct value v)
376 printf("*Unknown*"); break;
378 printf("*no-value*"); break;
380 printf("%.*s", v.str.len, v.str.txt); break;
382 printf("%s", v.bool ? "True":"False"); break;
387 mpf_set_q(fl, v.num);
388 gmp_printf("%Fg", fl);
395 static int parse_value(struct value *vl, char *arg)
404 vl->str.len = strlen(arg);
405 vl->str.txt = malloc(vl->str.len);
406 memcpy(vl->str.txt, arg, vl->str.len);
413 tx.txt = arg; tx.len = strlen(tx.txt);
414 if (number_parse(vl->num, vl->tail, tx) == 0)
417 mpq_neg(vl->num, vl->num);
420 if (strcasecmp(arg, "true") == 0 ||
421 strcmp(arg, "1") == 0)
423 else if (strcasecmp(arg, "false") == 0 ||
424 strcmp(arg, "0") == 0)
427 printf("Bad bool: %s\n", arg);
437 Variables are simply named values. We store them in a linked list
438 sorted by name and use sequential search and insertion sort.
440 This linked list is stored in the parse context so that reduce
441 functions can find or add variables, and so the analysis phase can
442 ensure that every variable gets a type.
448 struct variable *next;
454 #define container_of(ptr, type, member) ({ \
455 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
456 (type *)( (char *)__mptr - offsetof(type,member) );})
460 struct variable *varlist;
463 while (context.varlist) {
464 struct variable *v = context.varlist;
465 context.varlist = v->next;
472 static struct variable *find_variable(struct token_config *conf, struct text s)
474 struct variable **l = &container_of(conf, struct parse_context,
480 (cmp = text_cmp((*l)->name, s)) < 0)
484 n = calloc(1, sizeof(*n));
486 n->val.vtype = Vunknown;
494 Executables can be lots of different things. In many cases an
495 executable is just an operation combined with one or two other
496 executables. This allows for expressions and lists etc. Other times
497 an executable is something quite specific like a constant or variable
498 name. So we define a `struct exec` to be a general executable with a
499 type, and a `struct binode` which is a subclass of `exec` and forms a
500 node in a binary tree and holding an operation. There will be other
501 subclasses, and to access these we need to be able to `cast` the
502 `exec` into the various other types.
505 #define cast(structname, pointer) ({ \
506 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
507 if (__mptr && *__mptr != X##structname) abort(); \
508 (struct structname *)( (char *)__mptr);})
510 #define new(structname) ({ \
511 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
512 __ptr->type = X##structname; \
521 enum exec_types type;
528 struct exec *left, *right;
531 Each different type of `exec` node needs a number of functions
532 defined, a bit like methods. We must be able to be able to free it,
533 print it, analyse it and execute it. Once we have specific `exec`
534 types we will need to parse them to. Let's take this a bit more
539 The parser generator requires as `free_foo` function for each struct
540 that stores attributes and they will be `exec`s of subtypes there-of.
541 So we need `free_exec` which can handle all the subtypes, and we need
546 static void free_binode(struct binode *b)
555 ###### core functions
556 static void free_exec(struct exec *e)
567 static void free_exec(struct exec *e);
569 ###### free exec cases
570 case Xbinode: free_binode(cast(binode, e)); break;
574 Printing an `exec` requires that we know the current indent level for
575 printing line-oriented components. As will become clear later, we
576 also want to know what sort of bracketing to use.
580 static void do_indent(int i, char *str)
587 ###### core functions
588 static void print_binode(struct binode *b, int indent, int bracket)
592 ## print binode cases
596 static void print_exec(struct exec *e, int indent, int bracket)
600 print_binode(cast(binode, e), indent, bracket); break;
607 static void print_exec(struct exec *e, int indent, int bracket);
611 As discusses, analysis involves propagating type requirements around
612 the program and looking for errors.
614 So propagate_types is passed a type that the `exec` is expected to return,
615 and returns the type that it does return, either of which can be `Vunknown`.
616 An `ok` flag is passed by reference. It is set to `0` when an error is
617 found, and `2` when any change is made. If it remains unchanged at
618 `1`, then no more propagation is needed.
620 ###### core functions
622 static enum vtype propagate_types(struct exec *prog, enum vtype type,
628 if (type != Vunknown && type != Vnone)
633 switch (prog->type) {
636 struct binode *b = cast(binode, prog);
638 ## propagate binode cases
642 ## propagate exec cases
649 Interpreting an `exec` doesn't require anything but the `exec`. State
650 is stored in variables and each variable will be directly linked from
651 within the `exec` tree. The exception to this is the whole `program`
652 which needs to look at command line arguments. The `program` will be
653 interpreted separately.
655 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
657 ###### core functions
659 static struct value interp_exec(struct exec *e)
669 struct binode *b = cast(binode, e);
670 struct value left, right;
671 left.vtype = right.vtype = Vnone;
673 ## interp binode cases
675 free_value(left); free_value(right);
685 Each language element needs to be parsed, printed, analysed,
686 interpreted, and freed. There are several, so let's just start with
687 the easy ones and work our way up.
691 We have already met values and separate objects. When manifest
692 constants appear in the program text that must result in an executable
693 which has a constant value. So the `val` structure embeds a value in
710 $0->val.vtype = Vbool;
715 $0->val.vtype = Vbool;
720 $0->val.vtype = Vnum;
721 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
722 mpq_init($0->val.num);
726 $0->val.vtype = Vstr;
727 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
731 $0->val.vtype = Vstr;
732 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
735 ###### print exec cases
738 struct val *v = cast(val, e);
739 if (v->val.vtype == Vstr)
742 if (v->val.vtype == Vstr)
747 ###### propagate exec cases
750 struct val *val = cast(val, prog);
751 if (type != Vunknown &&
752 type != val->val.vtype)
754 return val->val.vtype;
757 ###### interp exec cases
759 return dup_value(cast(val, e)->val);
762 void free_val(struct val *v)
770 ###### free exec cases
771 case Xval: free_val(cast(val, e)); break;
774 // Move all nodes from 'b' to 'rv', reversing the order.
775 // In 'b' 'left' is a list, and 'right' is the last node.
776 // In 'rv', left' is the first node and 'right' is a list.
777 struct binode *reorder_bilist(struct binode *b)
779 struct binode *rv = NULL;
782 struct exec *t = b->right;
786 b = cast(binode, b->left);
796 Just as we used as `val` to wrap a value into an `exec`, we similarly
797 need a `var` to wrap a `variable` into an exec. While each `val`
798 contained a copy of the value, each `var` hold a link to the variable
799 because it really is the same variable no matter where it appears.
807 struct variable *var;
812 Variable -> IDENTIFIER ${
814 $0->var = find_variable(config, $1.txt);
817 ###### print exec cases
820 struct var *v = cast(var, e);
821 printf("%.*s", v->var->name.len, v->var->name.txt);
825 ###### propagate exec cases
829 struct var *var = cast(var, prog);
830 if (var->var->val.vtype == Vunknown) {
831 if (type != Vunknown && *ok != 0) {
832 val_init(&var->var->val, type);
837 if (type == Vunknown)
838 return var->var->val.vtype;
839 if (type != var->var->val.vtype)
844 ###### interp exec cases
846 return dup_value(cast(var, e)->var->val);
850 void free_var(struct var *v)
855 ###### free exec cases
856 case Xvar: free_var(cast(var, e)); break;
858 ### Expressions: Boolean
860 Our first user of the `binode` will be expressions, and particularly
861 Boolean expressions. As I haven't implemented precedence in the
862 parser generator yet, we need different names from each precedence
863 level used by expressions. The outer most or lowest level precedence
864 are Boolean `or` `and`, and `not` which form and `Expression` our of `BTerm`s
875 Expression -> Expression or BTerm ${
881 | BTerm ${ $0 = $<1; }$
883 BTerm -> BTerm and BFact ${
889 | BFact ${ $0 = $<1; }$
891 BFact -> not BFact ${
898 ###### print binode cases
900 print_exec(b->left, -1, 0);
902 print_exec(b->right, -1, 0);
905 print_exec(b->left, -1, 0);
907 print_exec(b->right, -1, 0);
911 print_exec(b->right, -1, 0);
914 ###### propagate binode cases
918 /* both must be Vbool, result is Vbool */
919 propagate_types(b->left, Vbool, ok);
920 propagate_types(b->right, Vbool, ok);
921 if (type != Vbool && type != Vunknown)
925 ###### interp binode cases
927 rv = interp_exec(b->left);
928 right = interp_exec(b->right);
929 rv.bool = rv.bool && right.bool;
932 rv = interp_exec(b->left);
933 right = interp_exec(b->right);
934 rv.bool = rv.bool || right.bool;
937 rv = interp_exec(b->right);
941 ### Expressions: Comparison
943 Of slightly higher precedence that Boolean expressions are
945 A comparison takes arguments of any type, but the two types must be
948 To simplify the parsing we introduce an `eop` which can return an
957 static void free_eop(struct eop *e)
978 | Expr ${ $0 = $<1; }$
983 CMPop -> < ${ $0.op = Less; }$
984 | > ${ $0.op = Gtr; }$
985 | <= ${ $0.op = LessEq; }$
986 | >= ${ $0.op = GtrEq; }$
987 | == ${ $0.op = Eql; }$
988 | != ${ $0.op = NEql; }$
990 ###### print binode cases
998 print_exec(b->left, -1, 0);
1000 case Less: printf(" < "); break;
1001 case LessEq: printf(" <= "); break;
1002 case Gtr: printf(" > "); break;
1003 case GtrEq: printf(" >= "); break;
1004 case Eql: printf(" == "); break;
1005 case NEql: printf(" != "); break;
1008 print_exec(b->right, -1, 0);
1011 ###### propagate binode cases
1018 /* Both must match, result is Vbool */
1019 t = propagate_types(b->left, Vunknown, ok);
1021 propagate_types(b->right, t, ok);
1023 t = propagate_types(b->right, Vunknown, ok);
1025 t = propagate_types(b->left, t, ok);
1027 if (type != Vbool && type != Vunknown)
1031 ###### interp binode cases
1040 left = interp_exec(b->left);
1041 right = interp_exec(b->right);
1042 cmp = value_cmp(left, right);
1045 case Less: rv.bool = cmp < 0; break;
1046 case LessEq: rv.bool = cmp <= 0; break;
1047 case Gtr: rv.bool = cmp > 0; break;
1048 case GtrEq: rv.bool = cmp >= 0; break;
1049 case Eql: rv.bool = cmp == 0; break;
1050 case NEql: rv.bool = cmp != 0; break;
1051 default: rv.bool = 0; break;
1056 ### Expressions: The rest
1058 The remaining expressions with the highest precedence are arithmetic
1059 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1060 The `Factor` is where the `Value` and `Variable` that we already have
1063 `+` and `-` are both infix and prefix operations (where they are
1064 absolute value and negation). These have different operator names.
1066 We also have a 'Bracket' operator which records where parentheses were
1067 found. This make it easy to reproduce these when printing. Once
1068 precedence is handled better I might be able to discard this.
1080 Expr -> Expr Eop Term ${
1086 | Term ${ $0 = $<1; }$
1088 Term -> Term Top Factor ${
1094 | Factor ${ $0 = $<1; }$
1096 Factor -> ( Expression ) ${
1106 | Value ${ $0 = (struct binode *)$<1; }$
1107 | Variable ${ $0 = (struct binode *)$<1; }$
1110 Eop -> + ${ $0.op = Plus; }$
1111 | - ${ $0.op = Minus; }$
1113 Uop -> + ${ $0.op = Absolute; }$
1114 | - ${ $0.op = Negate; }$
1116 Top -> * ${ $0.op = Times; }$
1117 | / ${ $0.op = Divide; }$
1118 | ++ ${ $0.op = Concat; }$
1120 ###### print binode cases
1126 print_exec(b->left, indent, 0);
1128 case Plus: printf(" + "); break;
1129 case Minus: printf(" - "); break;
1130 case Times: printf(" * "); break;
1131 case Divide: printf(" / "); break;
1132 case Concat: printf(" ++ "); break;
1135 print_exec(b->right, indent, 0);
1139 print_exec(b->right, indent, 0);
1143 print_exec(b->right, indent, 0);
1147 print_exec(b->right, indent, 0);
1151 ###### propagate binode cases
1156 /* both must be numbers, result is Vnum */
1159 /* as propagate_types ignores a NULL,
1160 * unary ops fit here too */
1161 propagate_types(b->left, Vnum, ok);
1162 propagate_types(b->right, Vnum, ok);
1163 if (type != Vnum && type != Vunknown)
1168 /* both must be Vstr, result is Vstr */
1169 propagate_types(b->left, Vstr, ok);
1170 propagate_types(b->right, Vstr, ok);
1171 if (type != Vstr && type != Vunknown)
1176 return propagate_types(b->right, type, ok);
1178 ###### interp binode cases
1181 rv = interp_exec(b->left);
1182 right = interp_exec(b->right);
1183 mpq_add(rv.num, rv.num, right.num);
1186 rv = interp_exec(b->left);
1187 right = interp_exec(b->right);
1188 mpq_sub(rv.num, rv.num, right.num);
1191 rv = interp_exec(b->left);
1192 right = interp_exec(b->right);
1193 mpq_mul(rv.num, rv.num, right.num);
1196 rv = interp_exec(b->left);
1197 right = interp_exec(b->right);
1198 mpq_div(rv.num, rv.num, right.num);
1201 rv = interp_exec(b->right);
1202 mpq_neg(rv.num, rv.num);
1205 rv = interp_exec(b->right);
1206 mpq_abs(rv.num, rv.num);
1209 rv = interp_exec(b->right);
1212 left = interp_exec(b->left);
1213 right = interp_exec(b->right);
1215 rv.str = text_join(left.str, right.str);
1218 ### Blocks, Statements, and Statement lists.
1220 Now that we have expressions out of the way we need to turn to
1221 statements. There are simple statements and more complex statements.
1222 Simple statements do not contain newlines, complex statements do.
1224 Statements often come in sequences and we have corresponding simple
1225 statement lists and complex statement lists.
1226 The former comprise only simple statements separated by semicolons.
1227 The later comprise complex statements and simple statement lists. They are
1228 separated by newlines. Thus the semicolon is only used to separate
1229 simple statements on the one line. This may be overly restrictive,
1230 but I'm not sure I every want a complex statement to share a line with
1233 Note that a simple statement list can still use multiple lines if
1234 subsequent lines are indented, so
1236 ###### Example: wrapped simple statement list
1241 is a single simple statement list. This might allow room for
1242 confusion, so I'm not set on it yet.
1244 A simple statement list needs no extra syntax. A complex statement
1245 list has two syntactic forms. It can be enclosed in braces (much like
1246 C blocks), or it can be introduced by a colon and continue until an
1247 unindented newline (much like Python blocks). With this extra syntax
1248 it is referred to as a block.
1250 Note that a block does not have to include any newlines if it only
1251 contains simple statements. So both of:
1253 if condition: a=b; d=f
1255 if condition { a=b; print f }
1259 In either case the list is constructed from a `binode` list with
1260 `Block` as the operator. When parsing the list it is most convenient
1261 to append to the end, so a list is a list and a statement. When using
1262 the list it is more convenient to consider a list to be a statement
1263 and a list. So we need a function to re-order a list.
1264 `reorder_bilist` serves this purpose.
1266 The only stand-alone statement we introduce at this stage is `pass`
1267 which does nothing and is represented as a `NULL` pointer in a `Block`
1287 Block -> Open Statementlist Close ${ $0 = $<2; }$
1288 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1289 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1290 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1291 | : Statementlist ${ $0 = $<2; }$
1292 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1294 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1296 ComplexStatements -> ComplexStatements ComplexStatement ${
1302 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1303 | ComplexStatement ${
1311 ComplexStatement -> SimpleStatements NEWLINE ${
1312 $0 = reorder_bilist($<1);
1314 ## ComplexStatement Grammar
1317 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1323 | SimpleStatement ${
1329 | SimpleStatements ; ${ $0 = $<1; }$
1331 SimpleStatement -> pass ${ $0 = NULL; }$
1332 ## SimpleStatement Grammar
1334 ###### print binode cases
1338 if (b->left == NULL)
1341 print_exec(b->left, indent, 0);
1344 print_exec(b->right, indent, 0);
1347 // block, one per line
1348 if (b->left == NULL)
1349 do_indent(indent, "pass\n");
1351 print_exec(b->left, indent, bracket);
1353 print_exec(b->right, indent, bracket);
1357 ###### propagate binode cases
1360 /* If any statement returns something other then Vnone
1361 * then all such must return same type.
1362 * As each statement may be Vnone or something else,
1363 * we must always pass Vunknown down, otherwise an incorrect
1364 * error might occur.
1368 for (e = b; e; e = cast(binode, e->right)) {
1369 t = propagate_types(e->left, Vunknown, ok);
1370 if (t != Vunknown && t != Vnone) {
1371 if (type == Vunknown)
1380 ###### interp binode cases
1382 while (rv.vtype == Vnone &&
1385 rv = interp_exec(b->left);
1386 b = cast(binode, b->right);
1390 ### The Print statement
1392 `print` is a simple statement that takes a comma-separated list of
1393 expressions and prints the values separated by spaces and terminated
1394 by a newline. No control of formatting is possible.
1396 `print` faces the same list-ordering issue as blocks, and uses the
1402 ###### SimpleStatement Grammar
1404 | print ExpressionList ${
1405 $0 = reorder_bilist($<2);
1407 | print ExpressionList , ${
1412 $0 = reorder_bilist($0);
1423 ExpressionList -> ExpressionList , Expression ${
1436 ###### print binode cases
1439 do_indent(indent, "print");
1443 print_exec(b->left, -1, 0);
1447 b = cast(binode, b->right);
1453 ###### propagate binode cases
1456 /* don't care but all must be consistent */
1457 propagate_types(b->left, Vunknown, ok);
1458 propagate_types(b->right, Vunknown, ok);
1461 ###### interp binode cases
1467 for ( ; b; b = cast(binode, b->right))
1471 left = interp_exec(b->left);
1484 ###### Assignment statement
1486 An assignment will assign a value to a variable. The analysis phase
1487 ensures that the type will be correct so the interpreted just needs to
1488 perform the calculation.
1493 ###### SimpleStatement Grammar
1494 | Variable = Expression ${
1501 ###### print binode cases
1504 do_indent(indent, "");
1505 print_exec(b->left, indent, 0);
1507 print_exec(b->right, indent, 0);
1512 ###### propagate binode cases
1515 /* Both must match, result is Vnone */
1516 t = propagate_types(b->left, Vunknown, ok);
1518 propagate_types(b->right, t, ok);
1520 t = propagate_types(b->right, Vunknown, ok);
1522 t = propagate_types(b->left, t, ok);
1526 ###### interp binode cases
1530 struct variable *v = cast(var, b->left)->var;
1531 right = interp_exec(b->right);
1534 right.vtype = Vunknown;
1538 ### The `use` statement
1540 The `use` statement is the last "simple" statement. It is needed when
1541 the condition in a conditional statement is a block. `use` works much
1542 like `return` in C, but only completes the `condition`, not the whole
1548 ###### SimpleStatement Grammar
1555 ###### print binode cases
1558 do_indent(indent, "use ");
1559 print_exec(b->right, -1, 0);
1564 ###### propagate binode cases
1567 /* result matches value */
1568 return propagate_types(b->right, type, ok);
1570 ###### interp binode cases
1573 rv = interp_exec(b->right);
1576 ### The Conditional Statement
1578 This is the biggy and currently the only complex statement.
1579 This subsumes `if`, `while`, `do/while`, `switch`, and some part of
1580 `for`. It is comprised of a number of parts, all of which are
1581 optional though set combinations apply.
1583 If there is a `forpart`, it is executed first, only once.
1584 If there is a `dopart`, then it is executed repeatedly providing
1585 always that the `condpart` or `cond`, if present, does not return a non-True
1586 value. `condpart` can fail to return any value if it simply executes
1587 to completion. This is treated the same as returning True.
1589 If there is a `thenpart` it will be executed whenever the `condpart`
1590 or `cond` returns True (or does not return), but this will happen
1591 *after* `dopart` (when present).
1593 If `elsepart` is present it will be executed at most once when the
1594 condition returns False. If there are any `casepart`s, they will be
1595 executed when the condition returns a matching value.
1597 The particular sorts of values allowed in case parts has not yet been
1598 determined in the language design.
1600 The cond_statement cannot fit into a `binode` so a new `exec` is
1609 struct exec *action;
1610 struct casepart *next;
1612 struct cond_statement {
1614 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
1615 struct casepart *casepart;
1618 ###### ast functions
1620 static void free_casepart(struct casepart *cp)
1624 free_exec(cp->value);
1625 free_exec(cp->action);
1632 void free_cond_statement(struct cond_statement *s)
1636 free_exec(s->forpart);
1637 free_exec(s->condpart);
1638 free_exec(s->dopart);
1639 free_exec(s->thenpart);
1640 free_exec(s->elsepart);
1641 free_casepart(s->casepart);
1645 ###### free exec cases
1646 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
1648 ###### ComplexStatement Grammar
1649 | CondStatement ${ $0 = $<1; }$
1654 CondStatement -> ForThen WhilePart CondSuffix ${
1656 $0->forpart = $1.forpart; $1.forpart = NULL;
1657 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1658 $0->condpart = $2.condpart; $2.condpart = NULL;
1659 $0->dopart = $2.dopart; $2.dopart = NULL;
1661 | WhilePart CondSuffix ${
1663 $0->condpart = $1.condpart; $1.condpart = NULL;
1664 $0->dopart = $1.dopart; $1.dopart = NULL;
1666 | SwitchPart CondSuffix ${
1670 | IfPart IfSuffix ${
1672 $0->condpart = $1.condpart; $1.condpart = NULL;
1673 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1676 CondSuffix -> IfSuffix ${ $0 = $<1; }$
1677 | Newlines case Expression Block CondSuffix ${ {
1678 struct casepart *cp = calloc(1, sizeof(*cp));
1682 cp->next = $0->casepart;
1685 | case Expression Block CondSuffix ${ {
1686 struct casepart *cp = calloc(1, sizeof(*cp));
1690 cp->next = $0->casepart;
1694 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
1695 | Newlines else Block ${
1696 $0 = new(cond_statement);
1700 $0 = new(cond_statement);
1703 | Newlines else CondStatement ${
1704 $0 = new(cond_statement);
1707 | else CondStatement ${
1708 $0 = new(cond_statement);
1714 ForPart -> for SimpleStatements ${
1715 $0 = reorder_bilist($<2);
1721 ThenPart -> then SimpleStatements ${
1722 $0 = reorder_bilist($<2);
1728 ThenPartNL -> ThenPart OptNL ${
1732 WhileHead -> while Block ${
1737 ForThen -> ForPart OptNL ThenPartNL ${
1745 WhilePart -> while Expression Block ${
1746 $0.type = Xcond_statement;
1750 | WhileHead OptNL do Block ${
1751 $0.type = Xcond_statement;
1756 IfPart -> if Expression Block ${
1757 $0.type = Xcond_statement;
1761 | if Block OptNL then Block ${
1762 $0.type = Xcond_statement;
1768 SwitchPart -> switch Expression ${
1775 ###### print exec cases
1777 case Xcond_statement:
1779 struct cond_statement *cs = cast(cond_statement, e);
1780 struct casepart *cp;
1782 do_indent(indent, "for");
1783 if (bracket) printf(" {\n"); else printf(":\n");
1784 print_exec(cs->forpart, indent+1, bracket);
1787 do_indent(indent, "} then {\n");
1789 do_indent(indent, "then:\n");
1790 print_exec(cs->thenpart, indent+1, bracket);
1792 if (bracket) do_indent(indent, "}\n");
1796 if (cs->condpart && cs->condpart->type == Xbinode &&
1797 cast(binode, cs->condpart)->op == Block) {
1799 do_indent(indent, "while {\n");
1801 do_indent(indent, "while:\n");
1802 print_exec(cs->condpart, indent+1, bracket);
1804 do_indent(indent, "} do {\n");
1806 do_indent(indent, "do:\n");
1807 print_exec(cs->dopart, indent+1, bracket);
1809 do_indent(indent, "}\n");
1811 do_indent(indent, "while ");
1812 print_exec(cs->condpart, 0, bracket);
1817 print_exec(cs->dopart, indent+1, bracket);
1819 do_indent(indent, "}\n");
1824 do_indent(indent, "switch");
1826 do_indent(indent, "if");
1827 if (cs->condpart && cs->condpart->type == Xbinode &&
1828 cast(binode, cs->condpart)->op == Block) {
1830 print_exec(cs->condpart, indent+1, bracket);
1832 do_indent(indent, "then:\n");
1833 print_exec(cs->thenpart, indent+1, bracket);
1837 print_exec(cs->condpart, 0, bracket);
1840 print_exec(cs->thenpart, indent+1, bracket);
1845 for (cp = cs->casepart; cp; cp = cp->next) {
1846 do_indent(indent, "case ");
1847 print_exec(cp->value, -1, 0);
1849 print_exec(cp->action, indent+1, bracket);
1852 do_indent(indent, "else:\n");
1853 print_exec(cs->elsepart, indent+1, bracket);
1858 ###### propagate exec cases
1859 case Xcond_statement:
1861 // forpart and dopart must return Vnone
1862 // condpart must be bool or match casepart->values
1863 // thenpart, elsepart, casepart->action must match
1865 struct cond_statement *cs = cast(cond_statement, prog);
1868 t = propagate_types(cs->forpart, Vnone, ok);
1869 if (t != Vunknown && t != Vnone)
1871 t = propagate_types(cs->dopart, Vnone, ok);
1872 if (t != Vunknown && t != Vnone)
1874 if (cs->casepart == NULL)
1875 propagate_types(cs->condpart, Vbool, ok);
1878 for (c = cs->casepart;
1879 c && (t == Vunknown); c = c->next)
1880 t = propagate_types(c->value, Vunknown, ok);
1881 if (t == Vunknown && cs->condpart)
1882 t = propagate_types(cs->condpart, Vunknown, ok);
1883 // Now we have a type (I hope) push it down
1884 if (t != Vunknown) {
1885 for (c = cs->casepart; c; c = c->next)
1886 propagate_types(c->value, t, ok);
1887 propagate_types(cs->condpart, t, ok);
1890 if (type == Vunknown || type == Vnone)
1891 type = propagate_types(cs->thenpart, Vunknown, ok);
1892 if (type == Vunknown || type == Vnone)
1893 type = propagate_types(cs->elsepart, Vunknown, ok);
1894 for (c = cs->casepart;
1895 c && (type == Vunknown || type == Vnone);
1897 type = propagate_types(c->action, Vunknown, ok);
1898 if (type != Vunknown && type != Vnone) {
1899 propagate_types(cs->thenpart, type, ok);
1900 propagate_types(cs->elsepart, type, ok);
1901 for (c = cs->casepart; c ; c = c->next)
1902 propagate_types(c->action, type, ok);
1908 ###### interp exec cases
1909 case Xcond_statement:
1911 struct value v, cnd;
1912 struct casepart *cp;
1913 struct cond_statement *c = cast(cond_statement, e);
1915 interp_exec(c->forpart);
1918 cnd = interp_exec(c->condpart);
1921 if (!(cnd.vtype == Vnone ||
1922 (cnd.vtype == Vbool && cnd.bool != 0)))
1926 interp_exec(c->dopart);
1929 v = interp_exec(c->thenpart);
1930 if (v.vtype != Vnone || !c->dopart)
1934 } while (c->dopart);
1936 for (cp = c->casepart; cp; cp = cp->next) {
1937 v = interp_exec(cp->value);
1938 if (value_cmp(v, cnd) == 0) {
1941 return interp_exec(cp->action);
1947 return interp_exec(c->elsepart);
1952 ### Finally the whole program.
1954 Somewhat reminiscent of Pascal a (current) Ocean program starts with
1955 the keyword "program" and list of variable names which are assigned
1956 values from command line arguments. Following this is a `block` which
1957 is the code to execute.
1959 As this is the top level, several things are handled a bit
1961 The whole program is not interpreted by `interp_exec` as that isn't
1962 passed the argument list which the program requires. Similarly type
1963 analysis is a bit more interesting at this level.
1968 ###### Parser: grammar
1971 Program -> program Varlist Block OptNL ${
1974 $0->left = reorder_bilist($<2);
1978 Varlist -> Varlist Variable ${
1987 ###### print binode cases
1989 do_indent(indent, "program");
1990 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
1992 print_exec(b2->left, 0, 0);
1998 print_exec(b->right, indent+1, bracket);
2000 do_indent(indent, "}\n");
2003 ###### propagate binode cases
2004 case Program: abort();
2006 ###### core functions
2008 static int analyse_prog(struct exec *prog, struct parse_context *c)
2010 struct binode *b = cast(binode, prog);
2016 propagate_types(b->right, Vnone, &ok);
2021 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2022 struct var *v = cast(var, b->left);
2023 if (v->var->val.vtype == Vunknown)
2024 val_init(&v->var->val, Vstr);
2026 b = cast(binode, prog);
2029 propagate_types(b->right, Vnone, &ok);
2034 for (v = c->varlist; v; v = v->next)
2035 if (v->val.vtype == Vunknown) {
2036 v->val.vtype = Vnum;
2037 mpq_init(v->val.num);
2038 mpq_set_ui(v->val.num, uniq, 1);
2041 /* Make sure everything is still consistent */
2042 propagate_types(b->right, Vnone, &ok);
2046 static void interp_prog(struct exec *prog, char **argv)
2048 struct binode *p = cast(binode, prog);
2049 struct binode *al = cast(binode, p->left);
2053 struct var *v = cast(var, al->left);
2054 struct value *vl = &v->var->val;
2056 if (argv[0] == NULL) {
2057 printf("Not enough args\n");
2060 al = cast(binode, al->right);
2062 if (!parse_value(vl, argv[0]))
2066 v = interp_exec(p->right);
2070 ###### interp binode cases
2071 case Program: abort();