1 <link href="http://jasonm23.github.io/markdown-css-themes/markdown.css" rel="stylesheet"/>
2 # Ocean Interpreter - Falls Creek version
4 Ocean is intended to be an compiled language, so this interpreter is
5 not targeted at being the final product. It is very much an intermediate
6 stage, and fills that role in two distinct ways.
8 Firstly, it exists as a platform to experiment with the early language
9 design. An interpreter is easy to write and easy to get working, so
10 the barrier for entry is lower if I aim to start with an interpreter.
12 Secondly, the plan for the Ocean compiler is to write it in the
13 [Ocean language](http://ocean-lang.org). To achieve this we naturally
14 need some sort of boot-strap process and this interpreter - written in
15 portable C - will fill that role. It will be used to bootstrap the
18 Two features that are not needed to fill either of these roles are
19 performance and completeness. The interpreter only needs to be fast
20 enough to run small test programs and occasionally to run the compiler
21 on itself. It only needs to be complete enough to test aspects of the
22 design which are developed before the compiler is working, and to run
23 the compiler on itself. Any features not used by the compiler when
24 compiling itself are superfluous. They may be included anyway, but
27 Nonetheless, the interpreter should end up being reasonably complete,
28 and any performance bottlenecks which appear and are easily fixed, will
33 This initial version of the interpreter exists to test out the
34 structured statement providing conditions and iteration. Clearly we
35 need some minimal other functionality so that values can be tested and
36 instructions iterated over. All that functionality is clearly not
37 normative at this stage (not that anything is **really** normative
38 yet) and will change, so early test code will certainly break.
40 Beyond the structured statement and the `use` statement which is
41 intimately related to it we have:
43 - "blocks" of multiple statements.
44 - `pass`: a statement which does nothing.
45 - variables: any identifier is assumed to store a number, string,
47 - expressions: `+`, `-`, `*`, `/` can apply to integers and `++` can
48 catenate strings. `and`, `or`, `not` manipulate Booleans, and
49 normal comparison operators can work on all three types.
50 - assignments: can assign the value of an expression to a variable.
51 - `print`: will print the values in a list of expressions.
52 - `program`: is given a list of identifiers to initialize from
57 Versions of the interpreter which obviously do not support a complete
58 language will be named after creeks and streams. This one is Falls
61 Once we have something reasonably resembling a complete language, the
62 names of rivers will be used.
63 Early versions of the compiler will be named after seas. Major
64 releases of the compiler will be named after oceans. Hopefully I will
65 be finished once I get to the Pacific Ocean release.
69 As well as parsing and executing a program, the interpreter can print
70 out the program from the parsed internal structure. This is useful
71 for validating the parsing.
72 So the main requirements of the interpreter are:
75 - Analyse the parsed program to ensure consistency
79 This is all performed by a single C program extracted with
82 There will be two formats for printing the program a default and one
83 that uses bracketing. So an extra command line option is needed for
86 ###### File: oceani.mk
88 myCFLAGS := -Wall -g -fplan9-extensions
89 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
90 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
91 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
94 oceani.c oceani.h : oceani.mdc parsergen
95 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
96 oceani.mk: oceani.mdc md2c
101 ###### Parser: header
104 struct parse_context {
105 struct token_config config;
115 #include <sys/mman.h>
134 static char Usage[] = "Usage: oceani --trace --print --noexec prog.ocn\n";
135 static const struct option long_options[] = {
136 {"trace", 0, NULL, 't'},
137 {"print", 0, NULL, 'p'},
138 {"noexec", 0, NULL, 'n'},
139 {"brackets", 0, NULL, 'b'},
142 const char *options = "tpnb";
143 int main(int argc, char *argv[])
149 struct parse_context context = {
151 .ignored = (1 << TK_line_comment)
152 | (1 << TK_block_comment),
153 .number_chars = ".,_+-",
158 int doprint=0, dotrace=0, doexec=1, brackets=0;
161 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
164 case 't': dotrace=1; break;
165 case 'p': doprint=1; break;
166 case 'n': doexec=0; break;
167 case 'b': brackets=1; break;
168 default: fprintf(stderr, Usage);
172 if (optind >= argc) {
173 fprintf(stderr, "oceani: no input file given\n");
176 fd = open(argv[optind], O_RDONLY);
178 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
181 len = lseek(fd, 0, 2);
182 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
183 s = code_extract(file, file+len, NULL);
185 fprintf(stderr, "oceani: could not find any code in %s\n",
189 prog = parse_oceani(s->code, &context.config,
190 dotrace ? stderr : NULL);
192 print_exec(*prog, 0, brackets);
193 if (prog && doexec) {
194 if (!analyse_prog(*prog, &context)) {
195 fprintf(stderr, "oceani: type error in program\n");
198 interp_prog(*prog, argv+optind+1);
205 struct section *t = s->next;
216 These four requirements of parse, analyse, print, interpret apply to
217 each language element individually so that is how most of the code
220 Three of the four are fairly self explanatory. The one that requires
221 a little explanation is the analysis step.
223 The current language design does not require variables to be declared,
224 but they must have a single type. Different operations impose
225 different requirements on the variables, for example addition requires
226 both arguments to be numeric, and assignment requires the variable on
227 the left to have the same type as the expression on the right.
229 Analysis involves propagating these type requirements around
230 consequently setting the type of each variable. If any requirements
231 are violated (e.g. a string is compared with a number) or if a
232 variable needs to have two different types, then an error is raised
233 and the program will not run.
235 Determining the types of all variables early is important for
236 processing command line arguments. These can be assigned to any type
237 of variable, but we must first know the correct type so any required
238 conversion can happen. If a variable is associated with a command
239 line argument but no type can be interpreted (e.g. the variable is
240 only ever used in a `print` statement), then the type is set to
243 If the type of a variable cannot be determined at all, then it is set
244 to be a number and given a unique value. This allows it to fill the
245 role of a name in an enumerated type, which is useful for testing the
250 One last introductory step before detailing the language elements and
251 providing their four requirements is to establish the data structures
252 to store these elements.
254 There are two key objects that we need to work with: executable
255 elements which comprise the program, and values which the program
256 works with. Between these is the set of variables which hold the
261 Values can be numbers, which we represent as multi-precision
262 fractions, strings and Booleans. When analysing the program we also
263 need to allow for places where no value is meaningful (`Vnone`) and
264 where we don't know what type to expect yet (`Vunknown`).
265 A 2 character 'tail' is included in each value as the scanner wants
266 to parse that from the end of numbers and we need somewhere to put
267 it. It is currently ignored but one day might allow for
268 e.g. "imaginary" numbers.
270 Values are never shared, they are always copied when used, and freed
271 when no longer needed.
279 myLDLIBS := libnumber.o libstring.o -lgmp
280 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
284 enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool} vtype;
294 void free_value(struct value v)
298 case Vunknown: break;
299 case Vstr: free(v.str.txt); break;
300 case Vnum: mpq_clear(v.num); break;
305 ###### value functions
307 static void val_init(struct value *val, enum vtype type)
312 case Vunknown: break;
314 mpq_init(val->num); break;
316 val->str.txt = malloc(1);
325 static struct value dup_value(struct value v)
331 case Vunknown: break;
337 mpq_set(rv.num, v.num);
340 rv.str.len = v.str.len;
341 rv.str.txt = malloc(rv.str.len);
342 memcpy(rv.str.txt, v.str.txt, v.str.len);
348 static int value_cmp(struct value left, struct value right)
351 if (left.vtype != right.vtype)
352 return left.vtype - right.vtype;
353 switch (left.vtype) {
354 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
355 case Vstr: cmp = text_cmp(left.str, right.str); break;
356 case Vbool: cmp = left.bool - right.bool; break;
358 case Vunknown: cmp = 0;
363 static struct text text_join(struct text a, struct text b)
366 rv.len = a.len + b.len;
367 rv.txt = malloc(rv.len);
368 memcpy(rv.txt, a.txt, a.len);
369 memcpy(rv.txt+a.len, b.txt, b.len);
373 static void print_value(struct value v)
377 printf("*Unknown*"); break;
379 printf("*no-value*"); break;
381 printf("%.*s", v.str.len, v.str.txt); break;
383 printf("%s", v.bool ? "True":"False"); break;
388 mpf_set_q(fl, v.num);
389 gmp_printf("%Fg", fl);
396 static int parse_value(struct value *vl, char *arg)
405 vl->str.len = strlen(arg);
408 tx.txt = arg; tx.len = strlen(tx.txt);
409 if (number_parse(vl->num, vl->tail, tx) == 0)
413 if (strcasecmp(arg, "true") == 0 ||
414 strcmp(arg, "1") == 0)
416 else if (strcasecmp(arg, "false") == 0 ||
417 strcmp(arg, "0") == 0)
420 printf("Bad bool: %s\n", arg);
430 Variables are simply named values. We store them in a linked list
431 sorted by name and use sequential search and insertion sort.
433 This linked list is stored in the parse context so that reduce
434 functions can find or add variables, and so the analysis phase can
435 ensure that every variable gets a type.
441 struct variable *next;
447 #define container_of(ptr, type, member) ({ \
448 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
449 (type *)( (char *)__mptr - offsetof(type,member) );})
453 struct variable *varlist;
456 while (context.varlist) {
457 struct variable *v = context.varlist;
458 context.varlist = v->next;
465 static struct variable *find_variable(struct token_config *conf, struct text s)
467 struct variable **l = &container_of(conf, struct parse_context,
473 (cmp = text_cmp((*l)->name, s)) < 0)
477 n = calloc(1, sizeof(*n));
479 n->val.vtype = Vunknown;
487 Executables can be lots of different things. In many cases an
488 executable is just an operation combined with one or two other
489 executables. This allows for expressions and lists etc. Other times
490 an executable is something quite specific like a constant or variable
491 name. So we define a `struct exec` to be a general executable with a
492 type, and a `struct binode` which is a subclass of `exec` and forms a
493 node in a binary tree and holding an operation. There will be other
494 subclasses, and to access these we need to be able to `cast` the
495 `exec` into the various other types.
498 #define cast(structname, pointer) ({ \
499 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
500 if (__mptr && *__mptr != X##structname) abort(); \
501 (struct structname *)( (char *)__mptr);})
503 #define new(structname) ({ \
504 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
505 __ptr->type = X##structname; \
514 enum exec_types type;
521 struct exec *left, *right;
524 Each different type of `exec` node needs a number of functions
525 defined, a bit like methods. We must be able to be able to free it,
526 print it, analyse it and execute it. Once we have specific `exec`
527 types we will need to parse them to. Let's take this a bit more
532 The parser generator requires as `free_foo` function for each struct
533 that stores attributes and they will be `exec`s of subtypes there-of.
534 So we need `free_exec` which can handle all the subtypes, and we need
539 static void free_binode(struct binode *b)
548 ###### core functions
549 static void free_exec(struct exec *e)
560 static void free_exec(struct exec *e);
562 ###### free exec cases
563 case Xbinode: free_binode(cast(binode, e)); break;
567 Printing an `exec` requires that we know the current indent level for
568 printing line-oriented components. As will become clear later, we
569 also want to know what sort of bracketing to use.
573 static void do_indent(int i, char *str)
580 ###### core functions
581 static void print_binode(struct binode *b, int indent, int bracket)
585 ## print binode cases
589 static void print_exec(struct exec *e, int indent, int bracket)
593 print_binode(cast(binode, e), indent, bracket); break;
600 static void print_exec(struct exec *e, int indent, int bracket);
604 As discusses, analysis involves propagating type requirements around
605 the program and looking for errors.
607 So propagate_types is passed a type that the `exec` is expected to return,
608 and returns the type that it does return, either of which can be `Vunknown`.
609 An `ok` flag is passed by reference. It is set to `0` when an error is
610 found, and `2` when any change is made. If it remains unchanged at
611 `1`, then no more propagation is needed.
613 ###### core functions
615 static enum vtype propagate_types(struct exec *prog, enum vtype type,
621 if (type != Vunknown && type != Vnone)
626 switch (prog->type) {
629 struct binode *b = cast(binode, prog);
631 ## propagate binode cases
635 ## propagate exec cases
642 Interpreting an `exec` doesn't require anything but the `exec`. State
643 is stored in variables and each variable will be directly linked from
644 within the `exec` tree. The exception to this is the whole `program`
645 which needs to look at command line arguments. The `program` will be
646 interpreted separately.
648 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
650 ###### core functions
652 static struct value interp_exec(struct exec *e)
662 struct binode *b = cast(binode, e);
663 struct value left, right;
664 left.vtype = right.vtype = Vnone;
666 ## interp binode cases
668 free_value(left); free_value(right);
678 Each language element needs to be parsed, printed, analysed,
679 interpreted, and freed. There are several, so let's just start with
680 the easy ones and work our way up.
684 We have already met values and separate objects. When manifest
685 constants appear in the program text that must result in an executable
686 which has a constant value. So the `val` structure embeds a value in
703 $0->val.vtype = Vbool;
708 $0->val.vtype = Vbool;
713 $0->val.vtype = Vnum;
714 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
715 mpq_init($0->val.num);
719 $0->val.vtype = Vstr;
720 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
723 ###### print exec cases
726 struct val *v = cast(val, e);
727 if (v->val.vtype == Vstr)
730 if (v->val.vtype == Vstr)
735 ###### propagate exec cases
738 struct val *val = cast(val, prog);
739 if (type != Vunknown &&
740 type != val->val.vtype)
742 return val->val.vtype;
745 ###### interp exec cases
747 return dup_value(cast(val, e)->val);
750 void free_val(struct val *v)
758 ###### free exec cases
759 case Xval: free_val(cast(val, e)); break;
762 // Move all nodes from 'b' to 'rv', reversing the order.
763 // In 'b' 'left' is a list, and 'right' is the last node.
764 // In 'rv', left' is the first node and 'right' is a list.
765 struct binode *reorder_bilist(struct binode *b)
767 struct binode *rv = NULL;
770 struct exec *t = b->right;
774 b = cast(binode, b->left);
784 Just as we used as `val` to wrap a value into an `exec`, we similarly
785 need a `var` to wrap a `variable` into an exec. While each `val`
786 contained a copy of the value, each `var` hold a link to the variable
787 because it really is the same variable no matter where it appears.
795 struct variable *var;
800 Variable -> IDENTIFIER ${
802 $0->var = find_variable(config, $1.txt);
805 ###### print exec cases
808 struct var *v = cast(var, e);
809 printf("%.*s", v->var->name.len, v->var->name.txt);
813 ###### propagate exec cases
817 struct var *var = cast(var, prog);
818 if (var->var->val.vtype == Vunknown) {
819 if (type != Vunknown && *ok != 0) {
820 val_init(&var->var->val, type);
825 if (type == Vunknown)
826 return var->var->val.vtype;
827 if (type != var->var->val.vtype)
832 ###### interp exec cases
834 return dup_value(cast(var, e)->var->val);
838 void free_var(struct var *v)
843 ###### free exec cases
844 case Xvar: free_var(cast(var, e)); break;
846 ### Expressions: Boolean
848 Our first user of the `binode` will be expressions, and particularly
849 Boolean expressions. As I haven't implemented precedence in the
850 parser generator yet, we need different names from each precedence
851 level used by expressions. The outer most or lowest level precedence
852 are Boolean `or` `and`, and `not` which form and `Expression` our of `BTerm`s
863 Expression -> Expression or BTerm ${
869 | BTerm ${ $0 = $<1; }$
871 BTerm -> BTerm and BFact ${
877 | BFact ${ $0 = $<1; }$
879 BFact -> not BFact ${
886 ###### print binode cases
888 print_exec(b->left, -1, 0);
890 print_exec(b->right, -1, 0);
893 print_exec(b->left, -1, 0);
895 print_exec(b->right, -1, 0);
899 print_exec(b->right, -1, 0);
902 ###### propagate binode cases
906 /* both must be Vbool, result is Vbool */
907 propagate_types(b->left, Vbool, ok);
908 propagate_types(b->right, Vbool, ok);
909 if (type != Vbool && type != Vunknown)
913 ###### interp binode cases
915 rv = interp_exec(b->left);
916 right = interp_exec(b->right);
917 rv.bool = rv.bool && right.bool;
920 rv = interp_exec(b->left);
921 right = interp_exec(b->right);
922 rv.bool = rv.bool || right.bool;
925 rv = interp_exec(b->right);
929 ### Expressions: Comparison
931 Of slightly higher precedence that Boolean expressions are
933 A comparison takes arguments of any type, but the two types must be
936 To simplify the parsing we introduce an `eop` which can return an
945 static void free_eop(struct eop *e)
966 | Expr ${ $0 = $<1; }$
971 CMPop -> < ${ $0.op = Less; }$
972 | > ${ $0.op = Gtr; }$
973 | <= ${ $0.op = LessEq; }$
974 | >= ${ $0.op = GtrEq; }$
975 | == ${ $0.op = Eql; }$
976 | != ${ $0.op = NEql; }$
978 ###### print binode cases
986 print_exec(b->left, -1, 0);
988 case Less: printf(" < "); break;
989 case LessEq: printf(" <= "); break;
990 case Gtr: printf(" > "); break;
991 case GtrEq: printf(" >= "); break;
992 case Eql: printf(" == "); break;
993 case NEql: printf(" != "); break;
996 print_exec(b->right, -1, 0);
999 ###### propagate binode cases
1006 /* Both must match, result is Vbool */
1007 t = propagate_types(b->left, Vunknown, ok);
1009 propagate_types(b->right, t, ok);
1011 t = propagate_types(b->right, Vunknown, ok);
1013 t = propagate_types(b->left, t, ok);
1015 if (type != Vbool && type != Vunknown)
1019 ###### interp binode cases
1028 left = interp_exec(b->left);
1029 right = interp_exec(b->right);
1030 cmp = value_cmp(left, right);
1033 case Less: rv.bool = cmp < 0; break;
1034 case LessEq: rv.bool = cmp <= 0; break;
1035 case Gtr: rv.bool = cmp > 0; break;
1036 case GtrEq: rv.bool = cmp >= 0; break;
1037 case Eql: rv.bool = cmp == 0; break;
1038 case NEql: rv.bool = cmp != 0; break;
1039 default: rv.bool = 0; break;
1044 ### Expressions: The rest
1046 The remaining expressions with the highest precedence are arithmetic
1047 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1048 The `Factor` is where the `Value` and `Variable` that we already have
1051 `+` and `-` are both infix and prefix operations (where they are
1052 absolute value and negation). These have different operator names.
1054 We also have a 'Bracket' operator which records where parentheses were
1055 found. This make it easy to reproduce these when printing. Once
1056 precedence is handled better I might be able to discard this.
1068 Expr -> Expr Eop Term ${
1074 | Term ${ $0 = $<1; }$
1076 Term -> Term Top Factor ${
1082 | Factor ${ $0 = $<1; }$
1084 Factor -> ( Expression ) ${
1094 | Value ${ $0 = (struct binode *)$<1; }$
1095 | Variable ${ $0 = (struct binode *)$<1; }$
1098 Eop -> + ${ $0.op = Plus; }$
1099 | - ${ $0.op = Minus; }$
1101 Uop -> + ${ $0.op = Absolute; }$
1102 | - ${ $0.op = Negate; }$
1104 Top -> * ${ $0.op = Times; }$
1105 | / ${ $0.op = Divide; }$
1106 | ++ ${ $0.op = Concat; }$
1108 ###### print binode cases
1114 print_exec(b->left, indent, 0);
1116 case Plus: printf(" + "); break;
1117 case Minus: printf(" - "); break;
1118 case Times: printf(" * "); break;
1119 case Divide: printf(" / "); break;
1120 case Concat: printf(" ++ "); break;
1123 print_exec(b->right, indent, 0);
1127 print_exec(b->right, indent, 0);
1131 print_exec(b->right, indent, 0);
1135 print_exec(b->right, indent, 0);
1139 ###### propagate binode cases
1144 /* both must be numbers, result is Vnum */
1147 /* as propagate_types ignores a NULL,
1148 * unary ops fit here too */
1149 propagate_types(b->left, Vnum, ok);
1150 propagate_types(b->right, Vnum, ok);
1151 if (type != Vnum && type != Vunknown)
1156 /* both must be Vstr, result is Vstr */
1157 propagate_types(b->left, Vstr, ok);
1158 propagate_types(b->right, Vstr, ok);
1159 if (type != Vstr && type != Vunknown)
1164 return propagate_types(b->right, type, ok);
1166 ###### interp binode cases
1169 rv = interp_exec(b->left);
1170 right = interp_exec(b->right);
1171 mpq_add(rv.num, rv.num, right.num);
1174 rv = interp_exec(b->left);
1175 right = interp_exec(b->right);
1176 mpq_sub(rv.num, rv.num, right.num);
1179 rv = interp_exec(b->left);
1180 right = interp_exec(b->right);
1181 mpq_mul(rv.num, rv.num, right.num);
1184 rv = interp_exec(b->left);
1185 right = interp_exec(b->right);
1186 mpq_div(rv.num, rv.num, right.num);
1189 rv = interp_exec(b->right);
1190 mpq_neg(rv.num, rv.num);
1193 rv = interp_exec(b->right);
1194 mpq_abs(rv.num, rv.num);
1197 rv = interp_exec(b->right);
1200 left = interp_exec(b->left);
1201 right = interp_exec(b->right);
1203 rv.str = text_join(left.str, right.str);
1206 ### Blocks, Statements, and Statement lists.
1208 Now that we have expressions out of the way we need to turn to
1209 statements. There are simple statements and more complex statements.
1210 Simple statements do not contain newlines, complex statements do.
1212 Statements often come in sequences and we have corresponding simple
1213 statement lists and complex statement lists.
1214 The former comprise only simple statements separated by semicolons.
1215 The later comprise complex statements and simple statement lists. They are
1216 separated by newlines. Thus the semicolon is only used to separate
1217 simple statements on the one line. This may be overly restrictive,
1218 but I'm not sure I every want a complex statement to share a line with
1221 Note that a simple statement list can still use multiple lines if
1222 subsequent lines are indented, so
1224 ###### Example: wrapped simple statement list
1229 is a single simple statement list. This might allow room for
1230 confusion, so I'm not set on it yet.
1232 A simple statement list needs no extra syntax. A complex statement
1233 list has two syntactic forms. It can be enclosed in braces (much like
1234 C blocks), or it can be introduced by a colon and continue until an
1235 unindented newline (much like Python blocks). With this extra syntax
1236 it is referred to as a block.
1238 Note that a block does not have to include any newlines if it only
1239 contains simple statements. So both of:
1241 if condition: a=b; d=f
1243 if condition { a=b; print f }
1247 In either case the list is constructed from a `binode` list with
1248 `Block` as the operator. When parsing the list it is most convenient
1249 to append to the end, so a list is a list and a statement. When using
1250 the list it is more convenient to consider a list to be a statement
1251 and a list. So we need a function to re-order a list.
1252 `reorder_bilist` serves this purpose.
1254 The only stand-alone statement we introduce at this stage is `pass`
1255 which does nothing and is represented as a `NULL` pointer in a `Block`
1275 Block -> Open Statementlist Close ${ $0 = $<2; }$
1276 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1277 | : Statementlist ${ $0 = $<2; }$
1278 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1280 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1282 ComplexStatements -> ComplexStatements ComplexStatement ${
1288 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1289 | ComplexStatement ${
1297 ComplexStatement -> SimpleStatements NEWLINE ${
1298 $0 = reorder_bilist($<1);
1300 ## ComplexStatement Grammar
1303 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1309 | SimpleStatement ${
1315 | SimpleStatements ; ${ $0 = $<1; }$
1317 SimpleStatement -> pass ${ $0 = NULL; }$
1318 ## SimpleStatement Grammar
1320 ###### print binode cases
1324 if (b->left == NULL)
1327 print_exec(b->left, indent, 0);
1330 print_exec(b->right, indent, 0);
1333 // block, one per line
1334 if (b->left == NULL)
1335 do_indent(indent, "pass\n");
1337 print_exec(b->left, indent, bracket);
1339 print_exec(b->right, indent, bracket);
1343 ###### propagate binode cases
1346 /* If any statement returns something other then Vnone
1347 * then all such must return same type.
1348 * As each statement may be Vnone or something else,
1349 * we must always pass Vunknown down, otherwise an incorrect
1350 * error might occur.
1354 for (e = b; e; e = cast(binode, e->right)) {
1355 t = propagate_types(e->left, Vunknown, ok);
1356 if (t != Vunknown && t != Vnone) {
1357 if (type == Vunknown)
1366 ###### interp binode cases
1368 while (rv.vtype == Vnone &&
1371 rv = interp_exec(b->left);
1372 b = cast(binode, b->right);
1376 ### The Print statement
1378 `print` is a simple statement that takes a comma-separated list of
1379 expressions and prints the values separated by spaces and terminated
1380 by a newline. No control of formatting is possible.
1382 `print` faces the same list-ordering issue as blocks, and uses the
1388 ###### SimpleStatement Grammar
1390 | print ExpressionList ${
1391 $0 = reorder_bilist($<2);
1393 | print ExpressionList , ${
1398 $0 = reorder_bilist($0);
1409 ExpressionList -> ExpressionList , Expression ${
1422 ###### print binode cases
1425 do_indent(indent, "print");
1429 print_exec(b->left, -1, 0);
1433 b = cast(binode, b->right);
1439 ###### propagate binode cases
1442 /* don't care but all must be consistent */
1443 propagate_types(b->left, Vunknown, ok);
1444 propagate_types(b->right, Vunknown, ok);
1447 ###### interp binode cases
1453 for ( ; b; b = cast(binode, b->right))
1457 left = interp_exec(b->left);
1470 ###### Assignment statement
1472 An assignment will assign a value to a variable. The analysis phase
1473 ensures that the type will be correct so the interpreted just needs to
1474 perform the calculation.
1479 ###### SimpleStatement Grammar
1480 | Variable = Expression ${
1487 ###### print binode cases
1490 do_indent(indent, "");
1491 print_exec(b->left, indent, 0);
1493 print_exec(b->right, indent, 0);
1498 ###### propagate binode cases
1501 /* Both must match, result is Vnone */
1502 t = propagate_types(b->left, Vunknown, ok);
1504 propagate_types(b->right, t, ok);
1506 t = propagate_types(b->right, Vunknown, ok);
1508 t = propagate_types(b->left, t, ok);
1512 ###### interp binode cases
1516 struct variable *v = cast(var, b->left)->var;
1517 right = interp_exec(b->right);
1520 right.vtype = Vunknown;
1524 ### The `use` statement
1526 The `use` statement is the last "simple" statement. It is needed when
1527 the condition in a conditional statement is a block. `use` works much
1528 like `return` in C, but only completes the `condition`, not the whole
1534 ###### SimpleStatement Grammar
1541 ###### print binode cases
1544 do_indent(indent, "use ");
1545 print_exec(b->right, -1, 0);
1550 ###### propagate binode cases
1553 /* result matches value */
1554 return propagate_types(b->right, type, ok);
1556 ###### interp binode cases
1559 rv = interp_exec(b->right);
1562 ### The Conditional Statement
1564 This is the biggy and currently the only complex statement.
1565 This subsumes `if`, `while`, `do/while`, `switch`, and some part of
1566 `for`. It is comprised of a number of parts, all of which are
1567 optional though set combinations apply.
1569 If there is a `forpart`, it is executed first, only once.
1570 If there is a `dopart`, then it is executed repeatedly providing
1571 always that the `condpart` or `cond`, if present, does not return a non-True
1572 value. `condpart` can fail to return any value if it simply executes
1573 to completion. This is treated the same as returning True.
1575 If there is a `thenpart` it will be executed whenever the `condpart`
1576 or `cond` returns True (or does not return), but this will happen
1577 *after* `dopart` (when present).
1579 If `elsepart` is present it will be executed at most once when the
1580 condition returns False. If there are any `casepart`s, they will be
1581 executed when the condition returns a matching value.
1583 The particular sorts of values allowed in case parts has not yet been
1584 determined in the language design.
1586 The cond_statement cannot fit into a `binode` so a new `exec` is
1595 struct exec *action;
1596 struct casepart *next;
1598 struct cond_statement {
1600 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
1601 struct casepart *casepart;
1604 ###### ast functions
1606 static void free_casepart(struct casepart *cp)
1610 free_exec(cp->value);
1611 free_exec(cp->action);
1618 void free_cond_statement(struct cond_statement *s)
1622 free_exec(s->forpart);
1623 free_exec(s->condpart);
1624 free_exec(s->dopart);
1625 free_exec(s->thenpart);
1626 free_exec(s->elsepart);
1627 free_casepart(s->casepart);
1631 ###### free exec cases
1632 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
1634 ###### ComplexStatement Grammar
1635 | CondStatement ${ $0 = $<1; }$
1640 CondStatement -> ForThen WhilePart CondSuffix ${
1642 $0->forpart = $1.forpart; $1.forpart = NULL;
1643 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1644 $0->condpart = $2.condpart; $2.condpart = NULL;
1645 $0->dopart = $2.dopart; $2.dopart = NULL;
1647 | WhilePart CondSuffix ${
1649 $0->condpart = $1.condpart; $1.condpart = NULL;
1650 $0->dopart = $1.dopart; $1.dopart = NULL;
1652 | SwitchPart CondSuffix ${
1656 | IfPart IfSuffix ${
1658 $0->condpart = $1.condpart; $1.condpart = NULL;
1659 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1662 CondSuffix -> IfSuffix ${ $0 = $<1; }$
1663 | Newlines case Expression Block CondSuffix ${ {
1664 struct casepart *cp = calloc(1, sizeof(*cp));
1668 cp->next = $0->casepart;
1671 | case Expression Block CondSuffix ${ {
1672 struct casepart *cp = calloc(1, sizeof(*cp));
1676 cp->next = $0->casepart;
1680 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
1681 | Newlines else Block ${
1682 $0 = new(cond_statement);
1686 $0 = new(cond_statement);
1689 | Newlines else CondStatement ${
1690 $0 = new(cond_statement);
1693 | else CondStatement ${
1694 $0 = new(cond_statement);
1700 ForPart -> for SimpleStatements ${
1701 $0 = reorder_bilist($<2);
1707 ThenPart -> then SimpleStatements ${
1708 $0 = reorder_bilist($<2);
1714 ThenPartNL -> ThenPart OptNL ${
1718 WhileHead -> while Block ${
1723 ForThen -> ForPart OptNL ThenPartNL ${
1731 WhilePart -> while Expression Block ${
1732 $0.type = Xcond_statement;
1736 | WhileHead OptNL do Block ${
1737 $0.type = Xcond_statement;
1742 IfPart -> if Expression Block ${
1743 $0.type = Xcond_statement;
1747 | if Block OptNL then Block ${
1748 $0.type = Xcond_statement;
1754 SwitchPart -> switch Expression ${
1761 ###### print exec cases
1763 case Xcond_statement:
1765 struct cond_statement *cs = cast(cond_statement, e);
1766 struct casepart *cp;
1768 do_indent(indent, "for");
1769 if (bracket) printf(" {\n"); else printf(":\n");
1770 print_exec(cs->forpart, indent+1, bracket);
1773 do_indent(indent, "} then {\n");
1775 do_indent(indent, "then:\n");
1776 print_exec(cs->thenpart, indent+1, bracket);
1778 if (bracket) do_indent(indent, "}\n");
1782 if (cs->condpart && cs->condpart->type == Xbinode &&
1783 cast(binode, cs->condpart)->op == Block) {
1785 do_indent(indent, "while {\n");
1787 do_indent(indent, "while:\n");
1788 print_exec(cs->condpart, indent+1, bracket);
1790 do_indent(indent, "} do {\n");
1792 do_indent(indent, "do:\n");
1793 print_exec(cs->dopart, indent+1, bracket);
1795 do_indent(indent, "}\n");
1797 do_indent(indent, "while ");
1798 print_exec(cs->condpart, 0, bracket);
1803 print_exec(cs->dopart, indent+1, bracket);
1805 do_indent(indent, "}\n");
1810 do_indent(indent, "switch");
1812 do_indent(indent, "if");
1813 if (cs->condpart && cs->condpart->type == Xbinode &&
1814 cast(binode, cs->condpart)->op == Block) {
1816 print_exec(cs->condpart, indent+1, bracket);
1817 do_indent(indent, "then:\n");
1818 print_exec(cs->thenpart, indent+1, bracket);
1821 print_exec(cs->condpart, 0, bracket);
1823 print_exec(cs->thenpart, indent+1, bracket);
1826 for (cp = cs->casepart; cp; cp = cp->next) {
1827 do_indent(indent, "case ");
1828 print_exec(cp->value, -1, 0);
1830 print_exec(cp->action, indent+1, bracket);
1833 do_indent(indent, "else:\n");
1834 print_exec(cs->elsepart, indent+1, bracket);
1839 ###### propagate exec cases
1840 case Xcond_statement:
1842 // forpart and dopart must return Vnone
1843 // condpart must be bool or match casepart->values
1844 // thenpart, elsepart, casepart->action must match
1846 struct cond_statement *cs = cast(cond_statement, prog);
1849 t = propagate_types(cs->forpart, Vnone, ok);
1850 if (t != Vunknown && t != Vnone)
1852 t = propagate_types(cs->dopart, Vnone, ok);
1853 if (t != Vunknown && t != Vnone)
1855 if (cs->casepart == NULL)
1856 propagate_types(cs->condpart, Vbool, ok);
1859 for (c = cs->casepart;
1860 c && (t == Vunknown); c = c->next)
1861 t = propagate_types(c->value, Vunknown, ok);
1862 if (t == Vunknown && cs->condpart)
1863 t = propagate_types(cs->condpart, Vunknown, ok);
1864 // Now we have a type (I hope) push it down
1865 if (t != Vunknown) {
1866 for (c = cs->casepart; c; c = c->next)
1867 propagate_types(c->value, t, ok);
1868 propagate_types(cs->condpart, t, ok);
1871 if (type == Vunknown || type == Vnone)
1872 type = propagate_types(cs->thenpart, Vunknown, ok);
1873 if (type == Vunknown || type == Vnone)
1874 type = propagate_types(cs->elsepart, Vunknown, ok);
1875 for (c = cs->casepart;
1876 c && (type == Vunknown || type == Vnone);
1878 type = propagate_types(c->action, Vunknown, ok);
1879 if (type != Vunknown && type != Vnone) {
1880 propagate_types(cs->thenpart, type, ok);
1881 propagate_types(cs->elsepart, type, ok);
1882 for (c = cs->casepart; c ; c = c->next)
1883 propagate_types(c->action, type, ok);
1889 ###### interp exec cases
1890 case Xcond_statement:
1892 struct value v, cnd;
1893 struct casepart *cp;
1894 struct cond_statement *c = cast(cond_statement, e);
1896 interp_exec(c->forpart);
1899 cnd = interp_exec(c->condpart);
1902 if (!(cnd.vtype == Vnone ||
1903 (cnd.vtype == Vbool && cnd.bool != 0)))
1907 interp_exec(c->dopart);
1910 v = interp_exec(c->thenpart);
1911 if (v.vtype != Vnone || !c->dopart)
1915 } while (c->dopart);
1917 for (cp = c->casepart; cp; cp = cp->next) {
1918 v = interp_exec(cp->value);
1919 if (value_cmp(v, cnd) == 0) {
1922 return interp_exec(cp->action);
1928 return interp_exec(c->elsepart);
1933 ### Finally the whole program.
1935 Somewhat reminiscent of Pascal a (current) Ocean program starts with
1936 the keyword "program" and list of variable names which are assigned
1937 values from command line arguments. Following this is a `block` which
1938 is the code to execute.
1940 As this is the top level, several things are handled a bit
1942 The whole program is not interpreted by `interp_exec` as that isn't
1943 passed the argument list which the program requires. Similarly type
1944 analysis is a bit more interesting at this level.
1949 ###### Parser: grammar
1952 Program -> program Varlist Block OptNL ${
1955 $0->left = reorder_bilist($<2);
1959 Varlist -> Varlist Variable ${
1968 ###### print binode cases
1970 do_indent(indent, "program");
1971 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
1973 print_exec(b2->left, 0, 0);
1979 print_exec(b->right, indent+1, bracket);
1981 do_indent(indent, "}\n");
1984 ###### propagate binode cases
1985 case Program: abort();
1987 ###### core functions
1989 static int analyse_prog(struct exec *prog, struct parse_context *c)
1991 struct binode *b = cast(binode, prog);
1997 propagate_types(b->right, Vnone, &ok);
2002 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2003 struct var *v = cast(var, b->left);
2004 if (v->var->val.vtype == Vunknown)
2005 val_init(&v->var->val, Vstr);
2007 b = cast(binode, prog);
2010 propagate_types(b->right, Vnone, &ok);
2015 for (v = c->varlist; v; v = v->next)
2016 if (v->val.vtype == Vunknown) {
2017 v->val.vtype = Vnum;
2018 mpq_init(v->val.num);
2019 mpq_set_ui(v->val.num, uniq, 1);
2022 /* Make sure everything is still consistent */
2023 propagate_types(b->right, Vnone, &ok);
2027 static void interp_prog(struct exec *prog, char **argv)
2029 struct binode *p = cast(binode, prog);
2030 struct binode *al = cast(binode, p->left);
2034 struct var *v = cast(var, al->left);
2035 struct value *vl = &v->var->val;
2037 if (argv[0] == NULL) {
2038 printf("Not enough args\n");
2041 al = cast(binode, al->right);
2043 if (!parse_value(vl, argv[0]))
2047 v = interp_exec(p->right);
2051 ###### interp binode cases
2052 case Program: abort();