1 # Ocean Interpreter - Falls Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is very much an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This initial version of the interpreter exists to test out the
33 structured statement providing conditions and iteration. Clearly we
34 need some minimal other functionality so that values can be tested and
35 instructions iterated over. All that functionality is clearly not
36 normative at this stage (not that anything is **really** normative
37 yet) and will change, so early test code will certainly break.
39 Beyond the structured statement and the `use` statement which is
40 intimately related to it we have:
42 - "blocks" of multiple statements.
43 - `pass`: a statement which does nothing.
44 - variables: any identifier is assumed to store a number, string,
46 - expressions: `+`, `-`, `*`, `/` can apply to integers and `++` can
47 catenate strings. `and`, `or`, `not` manipulate Booleans, and
48 normal comparison operators can work on all three types.
49 - assignments: can assign the value of an expression to a variable.
50 - `print`: will print the values in a list of expressions.
51 - `program`: is given a list of identifiers to initialize from
56 Versions of the interpreter which obviously do not support a complete
57 language will be named after creeks and streams. This one is Falls
60 Once we have something reasonably resembling a complete language, the
61 names of rivers will be used.
62 Early versions of the compiler will be named after seas. Major
63 releases of the compiler will be named after oceans. Hopefully I will
64 be finished once I get to the Pacific Ocean release.
68 As well as parsing and executing a program, the interpreter can print
69 out the program from the parsed internal structure. This is useful
70 for validating the parsing.
71 So the main requirements of the interpreter are:
74 - Analyse the parsed program to ensure consistency
78 This is all performed by a single C program extracted with
81 There will be two formats for printing the program a default and one
82 that uses bracketing. So an extra command line option is needed for
85 ###### File: oceani.mk
87 myCFLAGS := -Wall -g -fplan9-extensions
88 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
89 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
90 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
92 all :: $(LDLIBS) oceani
93 oceani.c oceani.h : oceani.mdc parsergen
94 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
95 oceani.mk: oceani.mdc md2c
100 ###### Parser: header
103 struct parse_context {
104 struct token_config config;
114 #include <sys/mman.h>
133 static char Usage[] = "Usage: oceani --trace --print --noexec prog.ocn\n";
134 static const struct option long_options[] = {
135 {"trace", 0, NULL, 't'},
136 {"print", 0, NULL, 'p'},
137 {"noexec", 0, NULL, 'n'},
138 {"brackets", 0, NULL, 'b'},
141 const char *options = "tpnb";
142 int main(int argc, char *argv[])
148 struct parse_context context = {
150 .ignored = (1 << TK_line_comment)
151 | (1 << TK_block_comment),
152 .number_chars = ".,_+-",
157 int doprint=0, dotrace=0, doexec=1, brackets=0;
160 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
163 case 't': dotrace=1; break;
164 case 'p': doprint=1; break;
165 case 'n': doexec=0; break;
166 case 'b': brackets=1; break;
167 default: fprintf(stderr, Usage);
171 if (optind >= argc) {
172 fprintf(stderr, "oceani: no input file given\n");
175 fd = open(argv[optind], O_RDONLY);
177 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
180 len = lseek(fd, 0, 2);
181 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
182 s = code_extract(file, file+len, NULL);
184 fprintf(stderr, "oceani: could not find any code in %s\n",
188 prog = parse_oceani(s->code, &context.config,
189 dotrace ? stderr : NULL);
191 print_exec(*prog, 0, brackets);
192 if (prog && doexec) {
193 if (!analyse_prog(*prog, &context)) {
194 fprintf(stderr, "oceani: type error in program\n");
197 interp_prog(*prog, argv+optind+1);
204 struct section *t = s->next;
215 These four requirements of parse, analyse, print, interpret apply to
216 each language element individually so that is how most of the code
219 Three of the four are fairly self explanatory. The one that requires
220 a little explanation is the analysis step.
222 The current language design does not require variables to be declared,
223 but they must have a single type. Different operations impose
224 different requirements on the variables, for example addition requires
225 both arguments to be numeric, and assignment requires the variable on
226 the left to have the same type as the expression on the right.
228 Analysis involves propagating these type requirements around
229 consequently setting the type of each variable. If any requirements
230 are violated (e.g. a string is compared with a number) or if a
231 variable needs to have two different types, then an error is raised
232 and the program will not run.
234 Determining the types of all variables early is important for
235 processing command line arguments. These can be assigned to any type
236 of variable, but we must first know the correct type so any required
237 conversion can happen. If a variable is associated with a command
238 line argument but no type can be interpreted (e.g. the variable is
239 only ever used in a `print` statement), then the type is set to
242 If the type of a variable cannot be determined at all, then it is set
243 to be a number and given a unique value. This allows it to fill the
244 role of a name in an enumerated type, which is useful for testing the
249 One last introductory step before detailing the language elements and
250 providing their four requirements is to establish the data structures
251 to store these elements.
253 There are two key objects that we need to work with: executable
254 elements which comprise the program, and values which the program
255 works with. Between these is the set of variables which hold the
260 Values can be numbers, which we represent as multi-precision
261 fractions, strings and Booleans. When analysing the program we also
262 need to allow for places where no value is meaningful (`Vnone`) and
263 where we don't know what type to expect yet (`Vunknown`).
264 A 2 character 'tail' is included in each value as the scanner wants
265 to parse that from the end of numbers and we need somewhere to put
266 it. It is currently ignored but one day might allow for
267 e.g. "imaginary" numbers.
269 Values are never shared, they are always copied when used, and freed
270 when no longer needed.
278 myLDLIBS := libnumber.o libstring.o -lgmp
279 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
283 enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool} vtype;
293 void free_value(struct value v)
297 case Vunknown: break;
298 case Vstr: free(v.str.txt); break;
299 case Vnum: mpq_clear(v.num); break;
304 ###### value functions
306 static void val_init(struct value *val, enum vtype type)
311 case Vunknown: break;
313 mpq_init(val->num); break;
315 val->str.txt = malloc(1);
324 static struct value dup_value(struct value v)
330 case Vunknown: break;
336 mpq_set(rv.num, v.num);
339 rv.str.len = v.str.len;
340 rv.str.txt = malloc(rv.str.len);
341 memcpy(rv.str.txt, v.str.txt, v.str.len);
347 static int value_cmp(struct value left, struct value right)
350 if (left.vtype != right.vtype)
351 return left.vtype - right.vtype;
352 switch (left.vtype) {
353 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
354 case Vstr: cmp = text_cmp(left.str, right.str); break;
355 case Vbool: cmp = left.bool - right.bool; break;
357 case Vunknown: cmp = 0;
362 static struct text text_join(struct text a, struct text b)
365 rv.len = a.len + b.len;
366 rv.txt = malloc(rv.len);
367 memcpy(rv.txt, a.txt, a.len);
368 memcpy(rv.txt+a.len, b.txt, b.len);
372 static void print_value(struct value v)
376 printf("*Unknown*"); break;
378 printf("*no-value*"); break;
380 printf("%.*s", v.str.len, v.str.txt); break;
382 printf("%s", v.bool ? "True":"False"); break;
387 mpf_set_q(fl, v.num);
388 gmp_printf("%Fg", fl);
395 static int parse_value(struct value *vl, char *arg)
404 vl->str.len = strlen(arg);
405 vl->str.txt = malloc(vl->str.len);
406 memcpy(vl->str.txt, arg, vl->str.len);
413 tx.txt = arg; tx.len = strlen(tx.txt);
414 if (number_parse(vl->num, vl->tail, tx) == 0)
417 mpq_neg(vl->num, vl->num);
420 if (strcasecmp(arg, "true") == 0 ||
421 strcmp(arg, "1") == 0)
423 else if (strcasecmp(arg, "false") == 0 ||
424 strcmp(arg, "0") == 0)
427 printf("Bad bool: %s\n", arg);
437 Variables are simply named values. We store them in a linked list
438 sorted by name and use sequential search and insertion sort.
440 This linked list is stored in the parse context so that reduce
441 functions can find or add variables, and so the analysis phase can
442 ensure that every variable gets a type.
448 struct variable *next;
454 #define container_of(ptr, type, member) ({ \
455 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
456 (type *)( (char *)__mptr - offsetof(type,member) );})
460 struct variable *varlist;
463 while (context.varlist) {
464 struct variable *v = context.varlist;
465 context.varlist = v->next;
472 static struct variable *find_variable(struct token_config *conf, struct text s)
474 struct variable **l = &container_of(conf, struct parse_context,
480 (cmp = text_cmp((*l)->name, s)) < 0)
484 n = calloc(1, sizeof(*n));
486 n->val.vtype = Vunknown;
494 Executables can be lots of different things. In many cases an
495 executable is just an operation combined with one or two other
496 executables. This allows for expressions and lists etc. Other times
497 an executable is something quite specific like a constant or variable
498 name. So we define a `struct exec` to be a general executable with a
499 type, and a `struct binode` which is a subclass of `exec` and forms a
500 node in a binary tree and holding an operation. There will be other
501 subclasses, and to access these we need to be able to `cast` the
502 `exec` into the various other types.
505 #define cast(structname, pointer) ({ \
506 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
507 if (__mptr && *__mptr != X##structname) abort(); \
508 (struct structname *)( (char *)__mptr);})
510 #define new(structname) ({ \
511 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
512 __ptr->type = X##structname; \
521 enum exec_types type;
528 struct exec *left, *right;
531 Each different type of `exec` node needs a number of functions
532 defined, a bit like methods. We must be able to be able to free it,
533 print it, analyse it and execute it. Once we have specific `exec`
534 types we will need to parse them too. Let's take this a bit more
539 The parser generator requires a `free_foo` function for each struct
540 that stores attributes and they will be `exec`s of subtypes there-of.
541 So we need `free_exec` which can handle all the subtypes, and we need
546 static void free_binode(struct binode *b)
555 ###### core functions
556 static void free_exec(struct exec *e)
567 static void free_exec(struct exec *e);
569 ###### free exec cases
570 case Xbinode: free_binode(cast(binode, e)); break;
574 Printing an `exec` requires that we know the current indent level for
575 printing line-oriented components. As will become clear later, we
576 also want to know what sort of bracketing to use.
580 static void do_indent(int i, char *str)
587 ###### core functions
588 static void print_binode(struct binode *b, int indent, int bracket)
592 ## print binode cases
596 static void print_exec(struct exec *e, int indent, int bracket)
600 print_binode(cast(binode, e), indent, bracket); break;
607 static void print_exec(struct exec *e, int indent, int bracket);
611 As discussed, analysis involves propagating type requirements around
612 the program and looking for errors.
614 So `propagate_types` is passed a type that the `exec` is expected to return,
615 and returns the type that it does return, either of which can be `Vunknown`.
616 An `ok` flag is passed by reference. It is set to `0` when an error is
617 found, and `2` when any change is made. If it remains unchanged at
618 `1`, then no more propagation is needed.
620 ###### core functions
622 static enum vtype propagate_types(struct exec *prog, enum vtype type,
630 switch (prog->type) {
633 struct binode *b = cast(binode, prog);
635 ## propagate binode cases
639 ## propagate exec cases
646 Interpreting an `exec` doesn't require anything but the `exec`. State
647 is stored in variables and each variable will be directly linked from
648 within the `exec` tree. The exception to this is the whole `program`
649 which needs to look at command line arguments. The `program` will be
650 interpreted separately.
652 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
654 ###### core functions
656 static struct value interp_exec(struct exec *e)
666 struct binode *b = cast(binode, e);
667 struct value left, right;
668 left.vtype = right.vtype = Vnone;
670 ## interp binode cases
672 free_value(left); free_value(right);
682 Each language element needs to be parsed, printed, analysed,
683 interpreted, and freed. There are several, so let's just start with
684 the easy ones and work our way up.
688 We have already met values and separate objects. When manifest
689 constants appear in the program text that must result in an executable
690 which has a constant value. So the `val` structure embeds a value in
707 $0->val.vtype = Vbool;
712 $0->val.vtype = Vbool;
717 $0->val.vtype = Vnum;
718 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
719 mpq_init($0->val.num);
723 $0->val.vtype = Vstr;
724 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
728 $0->val.vtype = Vstr;
729 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
732 ###### print exec cases
735 struct val *v = cast(val, e);
736 if (v->val.vtype == Vstr)
739 if (v->val.vtype == Vstr)
744 ###### propagate exec cases
747 struct val *val = cast(val, prog);
748 if (type != Vunknown &&
749 type != val->val.vtype)
751 return val->val.vtype;
754 ###### interp exec cases
756 return dup_value(cast(val, e)->val);
759 void free_val(struct val *v)
767 ###### free exec cases
768 case Xval: free_val(cast(val, e)); break;
771 // Move all nodes from 'b' to 'rv', reversing the order.
772 // In 'b' 'left' is a list, and 'right' is the last node.
773 // In 'rv', left' is the first node and 'right' is a list.
774 struct binode *reorder_bilist(struct binode *b)
776 struct binode *rv = NULL;
779 struct exec *t = b->right;
783 b = cast(binode, b->left);
793 Just as we used as `val` to wrap a value into an `exec`, we similarly
794 need a `var` to wrap a `variable` into an exec. While each `val`
795 contained a copy of the value, each `var` hold a link to the variable
796 because it really is the same variable no matter where it appears.
804 struct variable *var;
809 Variable -> IDENTIFIER ${
811 $0->var = find_variable(config, $1.txt);
814 ###### print exec cases
817 struct var *v = cast(var, e);
818 printf("%.*s", v->var->name.len, v->var->name.txt);
822 ###### propagate exec cases
826 struct var *var = cast(var, prog);
827 if (var->var->val.vtype == Vunknown) {
828 if (type != Vunknown && *ok != 0) {
829 val_init(&var->var->val, type);
834 if (type == Vunknown)
835 return var->var->val.vtype;
836 if (type != var->var->val.vtype)
841 ###### interp exec cases
843 return dup_value(cast(var, e)->var->val);
847 void free_var(struct var *v)
852 ###### free exec cases
853 case Xvar: free_var(cast(var, e)); break;
855 ### Expressions: Boolean
857 Our first user of the `binode` will be expressions, and particularly
858 Boolean expressions. As I haven't implemented precedence in the
859 parser generator yet, we need different names from each precedence
860 level used by expressions. The outer most or lowest level precedence
861 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
872 Expression -> Expression or BTerm ${
878 | BTerm ${ $0 = $<1; }$
880 BTerm -> BTerm and BFact ${
886 | BFact ${ $0 = $<1; }$
888 BFact -> not BFact ${
895 ###### print binode cases
897 print_exec(b->left, -1, 0);
899 print_exec(b->right, -1, 0);
902 print_exec(b->left, -1, 0);
904 print_exec(b->right, -1, 0);
908 print_exec(b->right, -1, 0);
911 ###### propagate binode cases
915 /* both must be Vbool, result is Vbool */
916 propagate_types(b->left, Vbool, ok);
917 propagate_types(b->right, Vbool, ok);
918 if (type != Vbool && type != Vunknown)
922 ###### interp binode cases
924 rv = interp_exec(b->left);
925 right = interp_exec(b->right);
926 rv.bool = rv.bool && right.bool;
929 rv = interp_exec(b->left);
930 right = interp_exec(b->right);
931 rv.bool = rv.bool || right.bool;
934 rv = interp_exec(b->right);
938 ### Expressions: Comparison
940 Of slightly higher precedence that Boolean expressions are
942 A comparison takes arguments of any type, but the two types must be
945 To simplify the parsing we introduce an `eop` which can return an
954 static void free_eop(struct eop *e)
975 | Expr ${ $0 = $<1; }$
980 CMPop -> < ${ $0.op = Less; }$
981 | > ${ $0.op = Gtr; }$
982 | <= ${ $0.op = LessEq; }$
983 | >= ${ $0.op = GtrEq; }$
984 | == ${ $0.op = Eql; }$
985 | != ${ $0.op = NEql; }$
987 ###### print binode cases
995 print_exec(b->left, -1, 0);
997 case Less: printf(" < "); break;
998 case LessEq: printf(" <= "); break;
999 case Gtr: printf(" > "); break;
1000 case GtrEq: printf(" >= "); break;
1001 case Eql: printf(" == "); break;
1002 case NEql: printf(" != "); break;
1005 print_exec(b->right, -1, 0);
1008 ###### propagate binode cases
1015 /* Both must match, result is Vbool */
1016 t = propagate_types(b->left, Vunknown, ok);
1018 propagate_types(b->right, t, ok);
1020 t = propagate_types(b->right, Vunknown, ok);
1022 t = propagate_types(b->left, t, ok);
1024 if (type != Vbool && type != Vunknown)
1028 ###### interp binode cases
1037 left = interp_exec(b->left);
1038 right = interp_exec(b->right);
1039 cmp = value_cmp(left, right);
1042 case Less: rv.bool = cmp < 0; break;
1043 case LessEq: rv.bool = cmp <= 0; break;
1044 case Gtr: rv.bool = cmp > 0; break;
1045 case GtrEq: rv.bool = cmp >= 0; break;
1046 case Eql: rv.bool = cmp == 0; break;
1047 case NEql: rv.bool = cmp != 0; break;
1048 default: rv.bool = 0; break;
1053 ### Expressions: The rest
1055 The remaining expressions with the highest precedence are arithmetic
1056 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1057 The `Factor` is where the `Value` and `Variable` that we already have
1060 `+` and `-` are both infix and prefix operations (where they are
1061 absolute value and negation). These have different operator names.
1063 We also have a 'Bracket' operator which records where parentheses were
1064 found. This make it easy to reproduce these when printing. Once
1065 precedence is handled better I might be able to discard this.
1077 Expr -> Expr Eop Term ${
1083 | Term ${ $0 = $<1; }$
1085 Term -> Term Top Factor ${
1091 | Factor ${ $0 = $<1; }$
1093 Factor -> ( Expression ) ${
1103 | Value ${ $0 = (struct binode *)$<1; }$
1104 | Variable ${ $0 = (struct binode *)$<1; }$
1107 Eop -> + ${ $0.op = Plus; }$
1108 | - ${ $0.op = Minus; }$
1110 Uop -> + ${ $0.op = Absolute; }$
1111 | - ${ $0.op = Negate; }$
1113 Top -> * ${ $0.op = Times; }$
1114 | / ${ $0.op = Divide; }$
1115 | ++ ${ $0.op = Concat; }$
1117 ###### print binode cases
1123 print_exec(b->left, indent, 0);
1125 case Plus: printf(" + "); break;
1126 case Minus: printf(" - "); break;
1127 case Times: printf(" * "); break;
1128 case Divide: printf(" / "); break;
1129 case Concat: printf(" ++ "); break;
1132 print_exec(b->right, indent, 0);
1136 print_exec(b->right, indent, 0);
1140 print_exec(b->right, indent, 0);
1144 print_exec(b->right, indent, 0);
1148 ###### propagate binode cases
1153 /* both must be numbers, result is Vnum */
1156 /* as propagate_types ignores a NULL,
1157 * unary ops fit here too */
1158 propagate_types(b->left, Vnum, ok);
1159 propagate_types(b->right, Vnum, ok);
1160 if (type != Vnum && type != Vunknown)
1165 /* both must be Vstr, result is Vstr */
1166 propagate_types(b->left, Vstr, ok);
1167 propagate_types(b->right, Vstr, ok);
1168 if (type != Vstr && type != Vunknown)
1173 return propagate_types(b->right, type, ok);
1175 ###### interp binode cases
1178 rv = interp_exec(b->left);
1179 right = interp_exec(b->right);
1180 mpq_add(rv.num, rv.num, right.num);
1183 rv = interp_exec(b->left);
1184 right = interp_exec(b->right);
1185 mpq_sub(rv.num, rv.num, right.num);
1188 rv = interp_exec(b->left);
1189 right = interp_exec(b->right);
1190 mpq_mul(rv.num, rv.num, right.num);
1193 rv = interp_exec(b->left);
1194 right = interp_exec(b->right);
1195 mpq_div(rv.num, rv.num, right.num);
1198 rv = interp_exec(b->right);
1199 mpq_neg(rv.num, rv.num);
1202 rv = interp_exec(b->right);
1203 mpq_abs(rv.num, rv.num);
1206 rv = interp_exec(b->right);
1209 left = interp_exec(b->left);
1210 right = interp_exec(b->right);
1212 rv.str = text_join(left.str, right.str);
1215 ### Blocks, Statements, and Statement lists.
1217 Now that we have expressions out of the way we need to turn to
1218 statements. There are simple statements and more complex statements.
1219 Simple statements do not contain newlines, complex statements do.
1221 Statements often come in sequences and we have corresponding simple
1222 statement lists and complex statement lists.
1223 The former comprise only simple statements separated by semicolons.
1224 The later comprise complex statements and simple statement lists. They are
1225 separated by newlines. Thus the semicolon is only used to separate
1226 simple statements on the one line. This may be overly restrictive,
1227 but I'm not sure I every want a complex statement to share a line with
1230 Note that a simple statement list can still use multiple lines if
1231 subsequent lines are indented, so
1233 ###### Example: wrapped simple statement list
1238 is a single simple statement list. This might allow room for
1239 confusion, so I'm not set on it yet.
1241 A simple statement list needs no extra syntax. A complex statement
1242 list has two syntactic forms. It can be enclosed in braces (much like
1243 C blocks), or it can be introduced by a colon and continue until an
1244 unindented newline (much like Python blocks). With this extra syntax
1245 it is referred to as a block.
1247 Note that a block does not have to include any newlines if it only
1248 contains simple statements. So both of:
1250 if condition: a=b; d=f
1252 if condition { a=b; print f }
1256 In either case the list is constructed from a `binode` list with
1257 `Block` as the operator. When parsing the list it is most convenient
1258 to append to the end, so a list is a list and a statement. When using
1259 the list it is more convenient to consider a list to be a statement
1260 and a list. So we need a function to re-order a list.
1261 `reorder_bilist` serves this purpose.
1263 The only stand-alone statement we introduce at this stage is `pass`
1264 which does nothing and is represented as a `NULL` pointer in a `Block`
1284 Block -> Open Statementlist Close ${ $0 = $<2; }$
1285 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1286 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1287 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1288 | : Statementlist ${ $0 = $<2; }$
1289 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1291 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1293 ComplexStatements -> ComplexStatements ComplexStatement ${
1299 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1300 | ComplexStatement ${
1308 ComplexStatement -> SimpleStatements NEWLINE ${
1309 $0 = reorder_bilist($<1);
1311 ## ComplexStatement Grammar
1314 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1320 | SimpleStatement ${
1326 | SimpleStatements ; ${ $0 = $<1; }$
1328 SimpleStatement -> pass ${ $0 = NULL; }$
1329 ## SimpleStatement Grammar
1331 ###### print binode cases
1335 if (b->left == NULL)
1338 print_exec(b->left, indent, 0);
1341 print_exec(b->right, indent, 0);
1344 // block, one per line
1345 if (b->left == NULL)
1346 do_indent(indent, "pass\n");
1348 print_exec(b->left, indent, bracket);
1350 print_exec(b->right, indent, bracket);
1354 ###### propagate binode cases
1357 /* If any statement returns something other then Vnone
1358 * then all such must return same type.
1359 * As each statement may be Vnone or something else,
1360 * we must always pass Vunknown down, otherwise an incorrect
1361 * error might occur.
1365 for (e = b; e; e = cast(binode, e->right)) {
1366 t = propagate_types(e->left, Vunknown, ok);
1367 if (t != Vunknown && t != Vnone) {
1368 if (type == Vunknown)
1377 ###### interp binode cases
1379 while (rv.vtype == Vnone &&
1382 rv = interp_exec(b->left);
1383 b = cast(binode, b->right);
1387 ### The Print statement
1389 `print` is a simple statement that takes a comma-separated list of
1390 expressions and prints the values separated by spaces and terminated
1391 by a newline. No control of formatting is possible.
1393 `print` faces the same list-ordering issue as blocks, and uses the
1399 ###### SimpleStatement Grammar
1401 | print ExpressionList ${
1402 $0 = reorder_bilist($<2);
1404 | print ExpressionList , ${
1409 $0 = reorder_bilist($0);
1420 ExpressionList -> ExpressionList , Expression ${
1433 ###### print binode cases
1436 do_indent(indent, "print");
1440 print_exec(b->left, -1, 0);
1444 b = cast(binode, b->right);
1450 ###### propagate binode cases
1453 /* don't care but all must be consistent */
1454 propagate_types(b->left, Vunknown, ok);
1455 propagate_types(b->right, Vunknown, ok);
1458 ###### interp binode cases
1464 for ( ; b; b = cast(binode, b->right))
1468 left = interp_exec(b->left);
1481 ###### Assignment statement
1483 An assignment will assign a value to a variable. The analysis phase
1484 ensures that the type will be correct so the interpreted just needs to
1485 perform the calculation.
1490 ###### SimpleStatement Grammar
1491 | Variable = Expression ${
1498 ###### print binode cases
1501 do_indent(indent, "");
1502 print_exec(b->left, indent, 0);
1504 print_exec(b->right, indent, 0);
1509 ###### propagate binode cases
1512 /* Both must match, result is Vnone */
1513 t = propagate_types(b->left, Vunknown, ok);
1515 propagate_types(b->right, t, ok);
1517 t = propagate_types(b->right, Vunknown, ok);
1519 t = propagate_types(b->left, t, ok);
1523 ###### interp binode cases
1527 struct variable *v = cast(var, b->left)->var;
1528 right = interp_exec(b->right);
1531 right.vtype = Vunknown;
1535 ### The `use` statement
1537 The `use` statement is the last "simple" statement. It is needed when
1538 the condition in a conditional statement is a block. `use` works much
1539 like `return` in C, but only completes the `condition`, not the whole
1545 ###### SimpleStatement Grammar
1552 ###### print binode cases
1555 do_indent(indent, "use ");
1556 print_exec(b->right, -1, 0);
1561 ###### propagate binode cases
1564 /* result matches value */
1565 return propagate_types(b->right, type, ok);
1567 ###### interp binode cases
1570 rv = interp_exec(b->right);
1573 ### The Conditional Statement
1575 This is the biggy and currently the only complex statement.
1576 This subsumes `if`, `while`, `do/while`, `switch`, and some parts of
1577 `for`. It is comprised of a number of parts, all of which are
1578 optional though set combinations apply.
1580 If there is a `forpart`, it is executed first, only once.
1581 If there is a `dopart`, then it is executed repeatedly providing
1582 always that the `condpart` or `cond`, if present, does not return a non-True
1583 value. `condpart` can fail to return any value if it simply executes
1584 to completion. This is treated the same as returning True.
1586 If there is a `thenpart` it will be executed whenever the `condpart`
1587 or `cond` returns True (or does not return any value), but this will happen
1588 *after* `dopart` (when present).
1590 If `elsepart` is present it will be executed at most once when the
1591 condition returns False. If there are any `casepart`s, they will be
1592 executed when the condition returns a matching value.
1594 The particular sorts of values allowed in case parts has not yet been
1595 determined in the language design.
1597 The `cond_statement` cannot fit into a `binode` so a new `exec` is
1606 struct exec *action;
1607 struct casepart *next;
1609 struct cond_statement {
1611 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
1612 struct casepart *casepart;
1615 ###### ast functions
1617 static void free_casepart(struct casepart *cp)
1621 free_exec(cp->value);
1622 free_exec(cp->action);
1629 void free_cond_statement(struct cond_statement *s)
1633 free_exec(s->forpart);
1634 free_exec(s->condpart);
1635 free_exec(s->dopart);
1636 free_exec(s->thenpart);
1637 free_exec(s->elsepart);
1638 free_casepart(s->casepart);
1642 ###### free exec cases
1643 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
1645 ###### ComplexStatement Grammar
1646 | CondStatement ${ $0 = $<1; }$
1651 CondStatement -> ForThen WhilePart CondSuffix ${
1653 $0->forpart = $1.forpart; $1.forpart = NULL;
1654 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1655 $0->condpart = $2.condpart; $2.condpart = NULL;
1656 $0->dopart = $2.dopart; $2.dopart = NULL;
1658 | WhilePart CondSuffix ${
1660 $0->condpart = $1.condpart; $1.condpart = NULL;
1661 $0->dopart = $1.dopart; $1.dopart = NULL;
1663 | SwitchPart CondSuffix ${
1667 | IfPart IfSuffix ${
1669 $0->condpart = $1.condpart; $1.condpart = NULL;
1670 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1673 CondSuffix -> IfSuffix ${ $0 = $<1; }$
1674 | Newlines case Expression Block CondSuffix ${ {
1675 struct casepart *cp = calloc(1, sizeof(*cp));
1679 cp->next = $0->casepart;
1682 | case Expression Block CondSuffix ${ {
1683 struct casepart *cp = calloc(1, sizeof(*cp));
1687 cp->next = $0->casepart;
1691 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
1692 | Newlines else Block ${
1693 $0 = new(cond_statement);
1697 $0 = new(cond_statement);
1700 | Newlines else CondStatement ${
1701 $0 = new(cond_statement);
1704 | else CondStatement ${
1705 $0 = new(cond_statement);
1711 ForPart -> for SimpleStatements ${
1712 $0 = reorder_bilist($<2);
1718 ThenPart -> then SimpleStatements ${
1719 $0 = reorder_bilist($<2);
1725 ThenPartNL -> ThenPart OptNL ${
1729 WhileHead -> while Block ${
1734 ForThen -> ForPart OptNL ThenPartNL ${
1742 WhilePart -> while Expression Block ${
1743 $0.type = Xcond_statement;
1747 | WhileHead OptNL do Block ${
1748 $0.type = Xcond_statement;
1753 IfPart -> if Expression Block ${
1754 $0.type = Xcond_statement;
1758 | if Block OptNL then Block ${
1759 $0.type = Xcond_statement;
1765 SwitchPart -> switch Expression ${
1772 ###### print exec cases
1774 case Xcond_statement:
1776 struct cond_statement *cs = cast(cond_statement, e);
1777 struct casepart *cp;
1779 do_indent(indent, "for");
1780 if (bracket) printf(" {\n"); else printf(":\n");
1781 print_exec(cs->forpart, indent+1, bracket);
1784 do_indent(indent, "} then {\n");
1786 do_indent(indent, "then:\n");
1787 print_exec(cs->thenpart, indent+1, bracket);
1789 if (bracket) do_indent(indent, "}\n");
1793 if (cs->condpart && cs->condpart->type == Xbinode &&
1794 cast(binode, cs->condpart)->op == Block) {
1796 do_indent(indent, "while {\n");
1798 do_indent(indent, "while:\n");
1799 print_exec(cs->condpart, indent+1, bracket);
1801 do_indent(indent, "} do {\n");
1803 do_indent(indent, "do:\n");
1804 print_exec(cs->dopart, indent+1, bracket);
1806 do_indent(indent, "}\n");
1808 do_indent(indent, "while ");
1809 print_exec(cs->condpart, 0, bracket);
1814 print_exec(cs->dopart, indent+1, bracket);
1816 do_indent(indent, "}\n");
1821 do_indent(indent, "switch");
1823 do_indent(indent, "if");
1824 if (cs->condpart && cs->condpart->type == Xbinode &&
1825 cast(binode, cs->condpart)->op == Block) {
1827 print_exec(cs->condpart, indent+1, bracket);
1829 do_indent(indent, "then:\n");
1830 print_exec(cs->thenpart, indent+1, bracket);
1834 print_exec(cs->condpart, 0, bracket);
1837 print_exec(cs->thenpart, indent+1, bracket);
1842 for (cp = cs->casepart; cp; cp = cp->next) {
1843 do_indent(indent, "case ");
1844 print_exec(cp->value, -1, 0);
1846 print_exec(cp->action, indent+1, bracket);
1849 do_indent(indent, "else:\n");
1850 print_exec(cs->elsepart, indent+1, bracket);
1855 ###### propagate exec cases
1856 case Xcond_statement:
1858 // forpart and dopart must return Vnone
1859 // condpart must be bool or match casepart->values
1860 // thenpart, elsepart, casepart->action must match
1862 struct cond_statement *cs = cast(cond_statement, prog);
1865 t = propagate_types(cs->forpart, Vnone, ok);
1866 if (t != Vunknown && t != Vnone)
1868 t = propagate_types(cs->dopart, Vnone, ok);
1869 if (t != Vunknown && t != Vnone)
1871 if (cs->casepart == NULL)
1872 propagate_types(cs->condpart, Vbool, ok);
1875 for (c = cs->casepart;
1876 c && (t == Vunknown); c = c->next)
1877 t = propagate_types(c->value, Vunknown, ok);
1878 if (t == Vunknown && cs->condpart)
1879 t = propagate_types(cs->condpart, Vunknown, ok);
1880 // Now we have a type (I hope) push it down
1881 if (t != Vunknown) {
1882 for (c = cs->casepart; c; c = c->next)
1883 propagate_types(c->value, t, ok);
1884 propagate_types(cs->condpart, t, ok);
1887 if (type == Vunknown || type == Vnone)
1888 type = propagate_types(cs->thenpart, Vunknown, ok);
1889 if (type == Vunknown || type == Vnone)
1890 type = propagate_types(cs->elsepart, Vunknown, ok);
1891 for (c = cs->casepart;
1892 c && (type == Vunknown || type == Vnone);
1894 type = propagate_types(c->action, Vunknown, ok);
1895 if (type != Vunknown && type != Vnone) {
1896 propagate_types(cs->thenpart, type, ok);
1897 propagate_types(cs->elsepart, type, ok);
1898 for (c = cs->casepart; c ; c = c->next)
1899 propagate_types(c->action, type, ok);
1905 ###### interp exec cases
1906 case Xcond_statement:
1908 struct value v, cnd;
1909 struct casepart *cp;
1910 struct cond_statement *c = cast(cond_statement, e);
1912 interp_exec(c->forpart);
1915 cnd = interp_exec(c->condpart);
1918 if (!(cnd.vtype == Vnone ||
1919 (cnd.vtype == Vbool && cnd.bool != 0)))
1923 interp_exec(c->dopart);
1926 v = interp_exec(c->thenpart);
1927 if (v.vtype != Vnone || !c->dopart)
1931 } while (c->dopart);
1933 for (cp = c->casepart; cp; cp = cp->next) {
1934 v = interp_exec(cp->value);
1935 if (value_cmp(v, cnd) == 0) {
1938 return interp_exec(cp->action);
1944 return interp_exec(c->elsepart);
1949 ### Finally the whole program.
1951 Somewhat reminiscent of Pascal a (current) Ocean program starts with
1952 the keyword "program" and a list of variable names which are assigned
1953 values from command line arguments. Following this is a `block` which
1954 is the code to execute.
1956 As this is the top level, several things are handled a bit
1958 The whole program is not interpreted by `interp_exec` as that isn't
1959 passed the argument list which the program requires. Similarly type
1960 analysis is a bit more interesting at this level.
1965 ###### Parser: grammar
1968 Program -> program Varlist Block OptNL ${
1971 $0->left = reorder_bilist($<2);
1975 Varlist -> Varlist Variable ${
1984 ###### print binode cases
1986 do_indent(indent, "program");
1987 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
1989 print_exec(b2->left, 0, 0);
1995 print_exec(b->right, indent+1, bracket);
1997 do_indent(indent, "}\n");
2000 ###### propagate binode cases
2001 case Program: abort();
2003 ###### core functions
2005 static int analyse_prog(struct exec *prog, struct parse_context *c)
2007 struct binode *b = cast(binode, prog);
2013 propagate_types(b->right, Vnone, &ok);
2018 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2019 struct var *v = cast(var, b->left);
2020 if (v->var->val.vtype == Vunknown)
2021 val_init(&v->var->val, Vstr);
2023 b = cast(binode, prog);
2026 propagate_types(b->right, Vnone, &ok);
2031 for (v = c->varlist; v; v = v->next)
2032 if (v->val.vtype == Vunknown) {
2033 v->val.vtype = Vnum;
2034 mpq_init(v->val.num);
2035 mpq_set_ui(v->val.num, uniq, 1);
2038 /* Make sure everything is still consistent */
2039 propagate_types(b->right, Vnone, &ok);
2043 static void interp_prog(struct exec *prog, char **argv)
2045 struct binode *p = cast(binode, prog);
2046 struct binode *al = cast(binode, p->left);
2050 struct var *v = cast(var, al->left);
2051 struct value *vl = &v->var->val;
2053 if (argv[0] == NULL) {
2054 printf("Not enough args\n");
2057 al = cast(binode, al->right);
2059 if (!parse_value(vl, argv[0]))
2063 v = interp_exec(p->right);
2067 ###### interp binode cases
2068 case Program: abort();