1 # Ocean Interpreter - Falls Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is very much an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This initial version of the interpreter exists to test out the
33 structured statement providing conditions and iteration. Clearly we
34 need some minimal other functionality so that values can be tested and
35 instructions iterated over. All that functionality is clearly not
36 normative at this stage (not that anything is **really** normative
37 yet) and will change, so early test code will certainly break.
39 Beyond the structured statement and the `use` statement which is
40 intimately related to it we have:
42 - "blocks" of multiple statements.
43 - `pass`: a statement which does nothing.
44 - variables: any identifier is assumed to store a number, string,
46 - expressions: `+`, `-`, `*`, `/` can apply to integers and `++` can
47 catenate strings. `and`, `or`, `not` manipulate Booleans, and
48 normal comparison operators can work on all three types.
49 - assignments: can assign the value of an expression to a variable.
50 - `print`: will print the values in a list of expressions.
51 - `program`: is given a list of identifiers to initialize from
56 Versions of the interpreter which obviously do not support a complete
57 language will be named after creeks and streams. This one is Falls
60 Once we have something reasonably resembling a complete language, the
61 names of rivers will be used.
62 Early versions of the compiler will be named after seas. Major
63 releases of the compiler will be named after oceans. Hopefully I will
64 be finished once I get to the Pacific Ocean release.
68 As well as parsing and executing a program, the interpreter can print
69 out the program from the parsed internal structure. This is useful
70 for validating the parsing.
71 So the main requirements of the interpreter are:
74 - Analyse the parsed program to ensure consistency
78 This is all performed by a single C program extracted with
81 There will be two formats for printing the program a default and one
82 that uses bracketing. So an extra command line option is needed for
85 ###### File: oceani.mk
87 myCFLAGS := -Wall -g -fplan9-extensions
88 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
89 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
90 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
93 oceani.c oceani.h : oceani.mdc parsergen
94 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
95 oceani.mk: oceani.mdc md2c
100 ###### Parser: header
103 struct parse_context {
104 struct token_config config;
114 #include <sys/mman.h>
133 static char Usage[] = "Usage: oceani --trace --print --noexec prog.ocn\n";
134 static const struct option long_options[] = {
135 {"trace", 0, NULL, 't'},
136 {"print", 0, NULL, 'p'},
137 {"noexec", 0, NULL, 'n'},
138 {"brackets", 0, NULL, 'b'},
141 const char *options = "tpnb";
142 int main(int argc, char *argv[])
148 struct parse_context context = {
150 .ignored = (1 << TK_line_comment)
151 | (1 << TK_block_comment),
152 .number_chars = ".,_+-",
157 int doprint=0, dotrace=0, doexec=1, brackets=0;
160 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
163 case 't': dotrace=1; break;
164 case 'p': doprint=1; break;
165 case 'n': doexec=0; break;
166 case 'b': brackets=1; break;
167 default: fprintf(stderr, Usage);
171 if (optind >= argc) {
172 fprintf(stderr, "oceani: no input file given\n");
175 fd = open(argv[optind], O_RDONLY);
177 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
180 len = lseek(fd, 0, 2);
181 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
182 s = code_extract(file, file+len, NULL);
184 fprintf(stderr, "oceani: could not find any code in %s\n",
188 prog = parse_oceani(s->code, &context.config,
189 dotrace ? stderr : NULL);
191 print_exec(*prog, 0, brackets);
192 if (prog && doexec) {
193 if (!analyse_prog(*prog, &context)) {
194 fprintf(stderr, "oceani: type error in program\n");
197 interp_prog(*prog, argv+optind+1);
204 struct section *t = s->next;
215 These four requirements of parse, analyse, print, interpret apply to
216 each language element individually so that is how most of the code
219 Three of the four are fairly self explanatory. The one that requires
220 a little explanation is the analysis step.
222 The current language design does not require variables to be declared,
223 but they must have a single type. Different operations impose
224 different requirements on the variables, for example addition requires
225 both arguments to be numeric, and assignment requires the variable on
226 the left to have the same type as the expression on the right.
228 Analysis involves propagating these type requirements around
229 consequently setting the type of each variable. If any requirements
230 are violated (e.g. a string is compared with a number) or if a
231 variable needs to have two different types, then an error is raised
232 and the program will not run.
234 Determining the types of all variables early is important for
235 processing command line arguments. These can be assigned to any type
236 of variable, but we must first know the correct type so any required
237 conversion can happen. If a variable is associated with a command
238 line argument but no type can be interpreted (e.g. the variable is
239 only ever used in a `print` statement), then the type is set to
242 If the type of a variable cannot be determined at all, then it is set
243 to be a number and given a unique value. This allows it to fill the
244 role of a name in an enumerated type, which is useful for testing the
249 One last introductory step before detailing the language elements and
250 providing their four requirements is to establish the data structures
251 to store these elements.
253 There are two key objects that we need to work with: executable
254 elements which comprise the program, and values which the program
255 works with. Between these is the set of variables which hold the
260 Values can be numbers, which we represent as multi-precision
261 fractions, strings and Booleans. When analysing the program we also
262 need to allow for places where no value is meaningful (`Vnone`) and
263 where we don't know what type to expect yet (`Vunknown`).
264 A 2 character 'tail' is included in each value as the scanner wants
265 to parse that from the end of numbers and we need somewhere to put
266 it. It is currently ignored but one day might allow for
267 e.g. "imaginary" numbers.
269 Values are never shared, they are always copied when used, and freed
270 when no longer needed.
278 myLDLIBS := libnumber.o libstring.o -lgmp
279 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
283 enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool} vtype;
293 void free_value(struct value v)
297 case Vunknown: break;
298 case Vstr: free(v.str.txt); break;
299 case Vnum: mpq_clear(v.num); break;
304 ###### value functions
306 static void val_init(struct value *val, enum vtype type)
311 case Vunknown: break;
313 mpq_init(val->num); break;
315 val->str.txt = malloc(1);
324 static struct value dup_value(struct value v)
330 case Vunknown: break;
336 mpq_set(rv.num, v.num);
339 rv.str.len = v.str.len;
340 rv.str.txt = malloc(rv.str.len);
341 memcpy(rv.str.txt, v.str.txt, v.str.len);
347 static int value_cmp(struct value left, struct value right)
350 if (left.vtype != right.vtype)
351 return left.vtype - right.vtype;
352 switch (left.vtype) {
353 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
354 case Vstr: cmp = text_cmp(left.str, right.str); break;
355 case Vbool: cmp = left.bool - right.bool; break;
357 case Vunknown: cmp = 0;
362 static struct text text_join(struct text a, struct text b)
365 rv.len = a.len + b.len;
366 rv.txt = malloc(rv.len);
367 memcpy(rv.txt, a.txt, a.len);
368 memcpy(rv.txt+a.len, b.txt, b.len);
372 static void print_value(struct value v)
376 printf("*Unknown*"); break;
378 printf("*no-value*"); break;
380 printf("%.*s", v.str.len, v.str.txt); break;
382 printf("%s", v.bool ? "True":"False"); break;
387 mpf_set_q(fl, v.num);
388 gmp_printf("%Fg", fl);
395 static int parse_value(struct value *vl, char *arg)
404 vl->str.len = strlen(arg);
407 tx.txt = arg; tx.len = strlen(tx.txt);
408 if (number_parse(vl->num, vl->tail, tx) == 0)
412 if (strcasecmp(arg, "true") == 0 ||
413 strcmp(arg, "1") == 0)
415 else if (strcasecmp(arg, "false") == 0 ||
416 strcmp(arg, "0") == 0)
419 printf("Bad bool: %s\n", arg);
429 Variables are simply named values. We store them in a linked list
430 sorted by name and use sequential search and insertion sort.
432 This linked list is stored in the parse context so that reduce
433 functions can find or add variables, and so the analysis phase can
434 ensure that every variable gets a type.
440 struct variable *next;
446 #define container_of(ptr, type, member) ({ \
447 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
448 (type *)( (char *)__mptr - offsetof(type,member) );})
452 struct variable *varlist;
455 while (context.varlist) {
456 struct variable *v = context.varlist;
457 context.varlist = v->next;
464 static struct variable *find_variable(struct token_config *conf, struct text s)
466 struct variable **l = &container_of(conf, struct parse_context,
472 (cmp = text_cmp((*l)->name, s)) < 0)
476 n = calloc(1, sizeof(*n));
478 n->val.vtype = Vunknown;
486 Executables can be lots of different things. In many cases an
487 executable is just an operation combined with one or two other
488 executables. This allows for expressions and lists etc. Other times
489 an executable is something quite specific like a constant or variable
490 name. So we define a `struct exec` to be a general executable with a
491 type, and a `struct binode` which is a subclass of `exec` and forms a
492 node in a binary tree and holding an operation. There will be other
493 subclasses, and to access these we need to be able to `cast` the
494 `exec` into the various other types.
497 #define cast(structname, pointer) ({ \
498 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
499 if (__mptr && *__mptr != X##structname) abort(); \
500 (struct structname *)( (char *)__mptr);})
502 #define new(structname) ({ \
503 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
504 __ptr->type = X##structname; \
513 enum exec_types type;
520 struct exec *left, *right;
523 Each different type of `exec` node needs a number of functions
524 defined, a bit like methods. We must be able to be able to free it,
525 print it, analyse it and execute it. Once we have specific `exec`
526 types we will need to parse them to. Let's take this a bit more
531 The parser generator requires as `free_foo` function for each struct
532 that stores attributes and they will be `exec`s of subtypes there-of.
533 So we need `free_exec` which can handle all the subtypes, and we need
538 static void free_binode(struct binode *b)
547 ###### core functions
548 static void free_exec(struct exec *e)
559 static void free_exec(struct exec *e);
561 ###### free exec cases
562 case Xbinode: free_binode(cast(binode, e)); break;
566 Printing an `exec` requires that we know the current indent level for
567 printing line-oriented components. As will become clear later, we
568 also want to know what sort of bracketing to use.
572 static void do_indent(int i, char *str)
579 ###### core functions
580 static void print_binode(struct binode *b, int indent, int bracket)
584 ## print binode cases
588 static void print_exec(struct exec *e, int indent, int bracket)
592 print_binode(cast(binode, e), indent, bracket); break;
599 static void print_exec(struct exec *e, int indent, int bracket);
603 As discusses, analysis involves propagating type requirements around
604 the program and looking for errors.
606 So propagate_types is passed a type that the `exec` is expected to return,
607 and returns the type that it does return, either of which can be `Vunknown`.
608 An `ok` flag is passed by reference. It is set to `0` when an error is
609 found, and `2` when any change is made. If it remains unchanged at
610 `1`, then no more propagation is needed.
612 ###### core functions
614 static enum vtype propagate_types(struct exec *prog, enum vtype type,
620 if (type != Vunknown && type != Vnone)
625 switch (prog->type) {
628 struct binode *b = cast(binode, prog);
630 ## propagate binode cases
634 ## propagate exec cases
641 Interpreting an `exec` doesn't require anything but the `exec`. State
642 is stored in variables and each variable will be directly linked from
643 within the `exec` tree. The exception to this is the whole `program`
644 which needs to look at command line arguments. The `program` will be
645 interpreted separately.
647 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
649 ###### core functions
651 static struct value interp_exec(struct exec *e)
661 struct binode *b = cast(binode, e);
662 struct value left, right;
663 left.vtype = right.vtype = Vnone;
665 ## interp binode cases
667 free_value(left); free_value(right);
677 Each language element needs to be parsed, printed, analysed,
678 interpreted, and freed. There are several, so let's just start with
679 the easy ones and work our way up.
683 We have already met values and separate objects. When manifest
684 constants appear in the program text that must result in an executable
685 which has a constant value. So the `val` structure embeds a value in
702 $0->val.vtype = Vbool;
707 $0->val.vtype = Vbool;
712 $0->val.vtype = Vnum;
713 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
714 mpq_init($0->val.num);
718 $0->val.vtype = Vstr;
719 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
722 ###### print exec cases
725 struct val *v = cast(val, e);
726 if (v->val.vtype == Vstr)
729 if (v->val.vtype == Vstr)
734 ###### propagate exec cases
737 struct val *val = cast(val, prog);
738 if (type != Vunknown &&
739 type != val->val.vtype)
741 return val->val.vtype;
744 ###### interp exec cases
746 return dup_value(cast(val, e)->val);
749 void free_val(struct val *v)
757 ###### free exec cases
758 case Xval: free_val(cast(val, e)); break;
761 // Move all nodes from 'b' to 'rv', reversing the order.
762 // In 'b' 'left' is a list, and 'right' is the last node.
763 // In 'rv', left' is the first node and 'right' is a list.
764 struct binode *reorder_bilist(struct binode *b)
766 struct binode *rv = NULL;
769 struct exec *t = b->right;
773 b = cast(binode, b->left);
783 Just as we used as `val` to wrap a value into an `exec`, we similarly
784 need a `var` to wrap a `variable` into an exec. While each `val`
785 contained a copy of the value, each `var` hold a link to the variable
786 because it really is the same variable no matter where it appears.
794 struct variable *var;
799 Variable -> IDENTIFIER ${
801 $0->var = find_variable(config, $1.txt);
804 ###### print exec cases
807 struct var *v = cast(var, e);
808 printf("%.*s", v->var->name.len, v->var->name.txt);
812 ###### propagate exec cases
816 struct var *var = cast(var, prog);
817 if (var->var->val.vtype == Vunknown) {
818 if (type != Vunknown && *ok != 0) {
819 val_init(&var->var->val, type);
824 if (type == Vunknown)
825 return var->var->val.vtype;
826 if (type != var->var->val.vtype)
831 ###### interp exec cases
833 return dup_value(cast(var, e)->var->val);
837 void free_var(struct var *v)
842 ###### free exec cases
843 case Xvar: free_var(cast(var, e)); break;
845 ### Expressions: Boolean
847 Our first user of the `binode` will be expressions, and particularly
848 Boolean expressions. As I haven't implemented precedence in the
849 parser generator yet, we need different names from each precedence
850 level used by expressions. The outer most or lowest level precedence
851 are Boolean `or` `and`, and `not` which form and `Expression` our of `BTerm`s
862 Expression -> Expression or BTerm ${
868 | BTerm ${ $0 = $<1; }$
870 BTerm -> BTerm and BFact ${
876 | BFact ${ $0 = $<1; }$
878 BFact -> not BFact ${
885 ###### print binode cases
887 print_exec(b->left, -1, 0);
889 print_exec(b->right, -1, 0);
892 print_exec(b->left, -1, 0);
894 print_exec(b->right, -1, 0);
898 print_exec(b->right, -1, 0);
901 ###### propagate binode cases
905 /* both must be Vbool, result is Vbool */
906 propagate_types(b->left, Vbool, ok);
907 propagate_types(b->right, Vbool, ok);
908 if (type != Vbool && type != Vunknown)
912 ###### interp binode cases
914 rv = interp_exec(b->left);
915 right = interp_exec(b->right);
916 rv.bool = rv.bool && right.bool;
919 rv = interp_exec(b->left);
920 right = interp_exec(b->right);
921 rv.bool = rv.bool || right.bool;
924 rv = interp_exec(b->right);
928 ### Expressions: Comparison
930 Of slightly higher precedence that Boolean expressions are
932 A comparison takes arguments of any type, but the two types must be
935 To simplify the parsing we introduce an `eop` which can return an
944 static void free_eop(struct eop *e)
965 | Expr ${ $0 = $<1; }$
970 CMPop -> < ${ $0.op = Less; }$
971 | > ${ $0.op = Gtr; }$
972 | <= ${ $0.op = LessEq; }$
973 | >= ${ $0.op = GtrEq; }$
974 | == ${ $0.op = Eql; }$
975 | != ${ $0.op = NEql; }$
977 ###### print binode cases
985 print_exec(b->left, -1, 0);
987 case Less: printf(" < "); break;
988 case LessEq: printf(" <= "); break;
989 case Gtr: printf(" > "); break;
990 case GtrEq: printf(" >= "); break;
991 case Eql: printf(" == "); break;
992 case NEql: printf(" != "); break;
995 print_exec(b->right, -1, 0);
998 ###### propagate binode cases
1005 /* Both must match, result is Vbool */
1006 t = propagate_types(b->left, Vunknown, ok);
1008 propagate_types(b->right, t, ok);
1010 t = propagate_types(b->right, Vunknown, ok);
1012 t = propagate_types(b->left, t, ok);
1014 if (type != Vbool && type != Vunknown)
1018 ###### interp binode cases
1027 left = interp_exec(b->left);
1028 right = interp_exec(b->right);
1029 cmp = value_cmp(left, right);
1032 case Less: rv.bool = cmp < 0; break;
1033 case LessEq: rv.bool = cmp <= 0; break;
1034 case Gtr: rv.bool = cmp > 0; break;
1035 case GtrEq: rv.bool = cmp >= 0; break;
1036 case Eql: rv.bool = cmp == 0; break;
1037 case NEql: rv.bool = cmp != 0; break;
1038 default: rv.bool = 0; break;
1043 ### Expressions: The rest
1045 The remaining expressions with the highest precedence are arithmetic
1046 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1047 The `Factor` is where the `Value` and `Variable` that we already have
1050 `+` and `-` are both infix and prefix operations (where they are
1051 absolute value and negation). These have different operator names.
1053 We also have a 'Bracket' operator which records where parentheses were
1054 found. This make it easy to reproduce these when printing. Once
1055 precedence is handled better I might be able to discard this.
1067 Expr -> Expr Eop Term ${
1073 | Term ${ $0 = $<1; }$
1075 Term -> Term Top Factor ${
1081 | Factor ${ $0 = $<1; }$
1083 Factor -> ( Expression ) ${
1093 | Value ${ $0 = (struct binode *)$<1; }$
1094 | Variable ${ $0 = (struct binode *)$<1; }$
1097 Eop -> + ${ $0.op = Plus; }$
1098 | - ${ $0.op = Minus; }$
1100 Uop -> + ${ $0.op = Absolute; }$
1101 | - ${ $0.op = Negate; }$
1103 Top -> * ${ $0.op = Times; }$
1104 | / ${ $0.op = Divide; }$
1105 | ++ ${ $0.op = Concat; }$
1107 ###### print binode cases
1113 print_exec(b->left, indent, 0);
1115 case Plus: printf(" + "); break;
1116 case Minus: printf(" - "); break;
1117 case Times: printf(" * "); break;
1118 case Divide: printf(" / "); break;
1119 case Concat: printf(" ++ "); break;
1122 print_exec(b->right, indent, 0);
1126 print_exec(b->right, indent, 0);
1130 print_exec(b->right, indent, 0);
1134 print_exec(b->right, indent, 0);
1138 ###### propagate binode cases
1143 /* both must be numbers, result is Vnum */
1146 /* as propagate_types ignores a NULL,
1147 * unary ops fit here too */
1148 propagate_types(b->left, Vnum, ok);
1149 propagate_types(b->right, Vnum, ok);
1150 if (type != Vnum && type != Vunknown)
1155 /* both must be Vstr, result is Vstr */
1156 propagate_types(b->left, Vstr, ok);
1157 propagate_types(b->right, Vstr, ok);
1158 if (type != Vstr && type != Vunknown)
1163 return propagate_types(b->right, type, ok);
1165 ###### interp binode cases
1168 rv = interp_exec(b->left);
1169 right = interp_exec(b->right);
1170 mpq_add(rv.num, rv.num, right.num);
1173 rv = interp_exec(b->left);
1174 right = interp_exec(b->right);
1175 mpq_sub(rv.num, rv.num, right.num);
1178 rv = interp_exec(b->left);
1179 right = interp_exec(b->right);
1180 mpq_mul(rv.num, rv.num, right.num);
1183 rv = interp_exec(b->left);
1184 right = interp_exec(b->right);
1185 mpq_div(rv.num, rv.num, right.num);
1188 rv = interp_exec(b->right);
1189 mpq_neg(rv.num, rv.num);
1192 rv = interp_exec(b->right);
1193 mpq_abs(rv.num, rv.num);
1196 rv = interp_exec(b->right);
1199 left = interp_exec(b->left);
1200 right = interp_exec(b->right);
1202 rv.str = text_join(left.str, right.str);
1205 ### Blocks, Statements, and Statement lists.
1207 Now that we have expressions out of the way we need to turn to
1208 statements. There are simple statements and more complex statements.
1209 Simple statements do not contain newlines, complex statements do.
1211 Statements often come in sequences and we have corresponding simple
1212 statement lists and complex statement lists.
1213 The former comprise only simple statements separated by semicolons.
1214 The later comprise complex statements and simple statement lists. They are
1215 separated by newlines. Thus the semicolon is only used to separate
1216 simple statements on the one line. This may be overly restrictive,
1217 but I'm not sure I every want a complex statement to share a line with
1220 Note that a simple statement list can still use multiple lines if
1221 subsequent lines are indented, so
1223 ###### Example: wrapped simple statement list
1228 is a single simple statement list. This might allow room for
1229 confusion, so I'm not set on it yet.
1231 A simple statement list needs no extra syntax. A complex statement
1232 list has two syntactic forms. It can be enclosed in braces (much like
1233 C blocks), or it can be introduced by a colon and continue until an
1234 unindented newline (much like Python blocks). With this extra syntax
1235 it is referred to as a block.
1237 Note that a block does not have to include any newlines if it only
1238 contains simple statements. So both of:
1240 if condition: a=b; d=f
1242 if condition { a=b; print f }
1246 In either case the list is constructed from a `binode` list with
1247 `Block` as the operator. When parsing the list it is most convenient
1248 to append to the end, so a list is a list and a statement. When using
1249 the list it is more convenient to consider a list to be a statement
1250 and a list. So we need a function to re-order a list.
1251 `reorder_bilist` serves this purpose.
1253 The only stand-alone statement we introduce at this stage is `pass`
1254 which does nothing and is represented as a `NULL` pointer in a `Block`
1274 Block -> Open Statementlist Close ${ $0 = $<2; }$
1275 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1276 | : Statementlist ${ $0 = $<2; }$
1277 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1279 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1281 ComplexStatements -> ComplexStatements ComplexStatement ${
1287 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1288 | ComplexStatement ${
1296 ComplexStatement -> SimpleStatements NEWLINE ${
1297 $0 = reorder_bilist($<1);
1299 ## ComplexStatement Grammar
1302 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1308 | SimpleStatement ${
1314 | SimpleStatements ; ${ $0 = $<1; }$
1316 SimpleStatement -> pass ${ $0 = NULL; }$
1317 ## SimpleStatement Grammar
1319 ###### print binode cases
1323 if (b->left == NULL)
1326 print_exec(b->left, indent, 0);
1329 print_exec(b->right, indent, 0);
1332 // block, one per line
1333 if (b->left == NULL)
1334 do_indent(indent, "pass\n");
1336 print_exec(b->left, indent, bracket);
1338 print_exec(b->right, indent, bracket);
1342 ###### propagate binode cases
1345 /* If any statement returns something other then Vnone
1346 * then all such must return same type.
1347 * As each statement may be Vnone or something else,
1348 * we must always pass Vunknown down, otherwise an incorrect
1349 * error might occur.
1353 for (e = b; e; e = cast(binode, e->right)) {
1354 t = propagate_types(e->left, Vunknown, ok);
1355 if (t != Vunknown && t != Vnone) {
1356 if (type == Vunknown)
1365 ###### interp binode cases
1367 while (rv.vtype == Vnone &&
1370 rv = interp_exec(b->left);
1371 b = cast(binode, b->right);
1375 ### The Print statement
1377 `print` is a simple statement that takes a comma-separated list of
1378 expressions and prints the values separated by spaces and terminated
1379 by a newline. No control of formatting is possible.
1381 `print` faces the same list-ordering issue as blocks, and uses the
1387 ###### SimpleStatement Grammar
1389 | print ExpressionList ${
1390 $0 = reorder_bilist($<2);
1392 | print ExpressionList , ${
1397 $0 = reorder_bilist($0);
1408 ExpressionList -> ExpressionList , Expression ${
1421 ###### print binode cases
1424 do_indent(indent, "print");
1428 print_exec(b->left, -1, 0);
1432 b = cast(binode, b->right);
1438 ###### propagate binode cases
1441 /* don't care but all must be consistent */
1442 propagate_types(b->left, Vunknown, ok);
1443 propagate_types(b->right, Vunknown, ok);
1446 ###### interp binode cases
1452 for ( ; b; b = cast(binode, b->right))
1456 left = interp_exec(b->left);
1469 ###### Assignment statement
1471 An assignment will assign a value to a variable. The analysis phase
1472 ensures that the type will be correct so the interpreted just needs to
1473 perform the calculation.
1478 ###### SimpleStatement Grammar
1479 | Variable = Expression ${
1486 ###### print binode cases
1489 do_indent(indent, "");
1490 print_exec(b->left, indent, 0);
1492 print_exec(b->right, indent, 0);
1497 ###### propagate binode cases
1500 /* Both must match, result is Vnone */
1501 t = propagate_types(b->left, Vunknown, ok);
1503 propagate_types(b->right, t, ok);
1505 t = propagate_types(b->right, Vunknown, ok);
1507 t = propagate_types(b->left, t, ok);
1511 ###### interp binode cases
1515 struct variable *v = cast(var, b->left)->var;
1516 right = interp_exec(b->right);
1519 right.vtype = Vunknown;
1523 ### The `use` statement
1525 The `use` statement is the last "simple" statement. It is needed when
1526 the condition in a conditional statement is a block. `use` works much
1527 like `return` in C, but only completes the `condition`, not the whole
1533 ###### SimpleStatement Grammar
1540 ###### print binode cases
1543 do_indent(indent, "use ");
1544 print_exec(b->right, -1, 0);
1549 ###### propagate binode cases
1552 /* result matches value */
1553 return propagate_types(b->right, type, ok);
1555 ###### interp binode cases
1558 rv = interp_exec(b->right);
1561 ### The Conditional Statement
1563 This is the biggy and currently the only complex statement.
1564 This subsumes `if`, `while`, `do/while`, `switch`, and some part of
1565 `for`. It is comprised of a number of parts, all of which are
1566 optional though set combinations apply.
1568 If there is a `forpart`, it is executed first, only once.
1569 If there is a `dopart`, then it is executed repeatedly providing
1570 always that the `condpart` or `cond`, if present, does not return a non-True
1571 value. `condpart` can fail to return any value if it simply executes
1572 to completion. This is treated the same as returning True.
1574 If there is a `thenpart` it will be executed whenever the `condpart`
1575 or `cond` returns True (or does not return), but this will happen
1576 *after* `dopart` (when present).
1578 If `elsepart` is present it will be executed at most once when the
1579 condition returns False. If there are any `casepart`s, they will be
1580 executed when the condition returns a matching value.
1582 The particular sorts of values allowed in case parts has not yet been
1583 determined in the language design.
1585 The cond_statement cannot fit into a `binode` so a new `exec` is
1594 struct exec *action;
1595 struct casepart *next;
1597 struct cond_statement {
1599 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
1600 struct casepart *casepart;
1603 ###### ast functions
1605 static void free_casepart(struct casepart *cp)
1609 free_exec(cp->value);
1610 free_exec(cp->action);
1617 void free_cond_statement(struct cond_statement *s)
1621 free_exec(s->forpart);
1622 free_exec(s->condpart);
1623 free_exec(s->dopart);
1624 free_exec(s->thenpart);
1625 free_exec(s->elsepart);
1626 free_casepart(s->casepart);
1630 ###### free exec cases
1631 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
1633 ###### ComplexStatement Grammar
1634 | CondStatement ${ $0 = $<1; }$
1639 CondStatement -> ForThen WhilePart CondSuffix ${
1641 $0->forpart = $1.forpart; $1.forpart = NULL;
1642 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1643 $0->condpart = $2.condpart; $2.condpart = NULL;
1644 $0->dopart = $2.dopart; $2.dopart = NULL;
1646 | WhilePart CondSuffix ${
1648 $0->condpart = $1.condpart; $1.condpart = NULL;
1649 $0->dopart = $1.dopart; $1.dopart = NULL;
1651 | SwitchPart CondSuffix ${
1655 | IfPart IfSuffix ${
1657 $0->condpart = $1.condpart; $1.condpart = NULL;
1658 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1661 CondSuffix -> IfSuffix ${ $0 = $<1; }$
1662 | Newlines case Expression Block CondSuffix ${ {
1663 struct casepart *cp = calloc(1, sizeof(*cp));
1667 cp->next = $0->casepart;
1670 | case Expression Block CondSuffix ${ {
1671 struct casepart *cp = calloc(1, sizeof(*cp));
1675 cp->next = $0->casepart;
1679 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
1680 | Newlines else Block ${
1681 $0 = new(cond_statement);
1685 $0 = new(cond_statement);
1688 | Newlines else CondStatement ${
1689 $0 = new(cond_statement);
1692 | else CondStatement ${
1693 $0 = new(cond_statement);
1699 ForPart -> for SimpleStatements ${
1700 $0 = reorder_bilist($<2);
1706 ThenPart -> then SimpleStatements ${
1707 $0 = reorder_bilist($<2);
1713 ThenPartNL -> ThenPart OptNL ${
1717 WhileHead -> while Block ${
1722 ForThen -> ForPart OptNL ThenPartNL ${
1730 WhilePart -> while Expression Block ${
1731 $0.type = Xcond_statement;
1735 | WhileHead OptNL do Block ${
1736 $0.type = Xcond_statement;
1741 IfPart -> if Expression Block ${
1742 $0.type = Xcond_statement;
1746 | if Block OptNL then Block ${
1747 $0.type = Xcond_statement;
1753 SwitchPart -> switch Expression ${
1760 ###### print exec cases
1762 case Xcond_statement:
1764 struct cond_statement *cs = cast(cond_statement, e);
1765 struct casepart *cp;
1767 do_indent(indent, "for");
1768 if (bracket) printf(" {\n"); else printf(":\n");
1769 print_exec(cs->forpart, indent+1, bracket);
1772 do_indent(indent, "} then {\n");
1774 do_indent(indent, "then:\n");
1775 print_exec(cs->thenpart, indent+1, bracket);
1777 if (bracket) do_indent(indent, "}\n");
1781 if (cs->condpart && cs->condpart->type == Xbinode &&
1782 cast(binode, cs->condpart)->op == Block) {
1784 do_indent(indent, "while {\n");
1786 do_indent(indent, "while:\n");
1787 print_exec(cs->condpart, indent+1, bracket);
1789 do_indent(indent, "} do {\n");
1791 do_indent(indent, "do:\n");
1792 print_exec(cs->dopart, indent+1, bracket);
1794 do_indent(indent, "}\n");
1796 do_indent(indent, "while ");
1797 print_exec(cs->condpart, 0, bracket);
1802 print_exec(cs->dopart, indent+1, bracket);
1804 do_indent(indent, "}\n");
1809 do_indent(indent, "switch");
1811 do_indent(indent, "if");
1812 if (cs->condpart && cs->condpart->type == Xbinode &&
1813 cast(binode, cs->condpart)->op == Block) {
1815 print_exec(cs->condpart, indent+1, bracket);
1816 do_indent(indent, "then:\n");
1817 print_exec(cs->thenpart, indent+1, bracket);
1820 print_exec(cs->condpart, 0, bracket);
1822 print_exec(cs->thenpart, indent+1, bracket);
1825 for (cp = cs->casepart; cp; cp = cp->next) {
1826 do_indent(indent, "case ");
1827 print_exec(cp->value, -1, 0);
1829 print_exec(cp->action, indent+1, bracket);
1832 do_indent(indent, "else:\n");
1833 print_exec(cs->elsepart, indent+1, bracket);
1838 ###### propagate exec cases
1839 case Xcond_statement:
1841 // forpart and dopart must return Vnone
1842 // condpart must be bool or match casepart->values
1843 // thenpart, elsepart, casepart->action must match
1845 struct cond_statement *cs = cast(cond_statement, prog);
1848 t = propagate_types(cs->forpart, Vnone, ok);
1849 if (t != Vunknown && t != Vnone)
1851 t = propagate_types(cs->dopart, Vnone, ok);
1852 if (t != Vunknown && t != Vnone)
1854 if (cs->casepart == NULL)
1855 propagate_types(cs->condpart, Vbool, ok);
1858 for (c = cs->casepart;
1859 c && (t == Vunknown); c = c->next)
1860 t = propagate_types(c->value, Vunknown, ok);
1861 if (t == Vunknown && cs->condpart)
1862 t = propagate_types(cs->condpart, Vunknown, ok);
1863 // Now we have a type (I hope) push it down
1864 if (t != Vunknown) {
1865 for (c = cs->casepart; c; c = c->next)
1866 propagate_types(c->value, t, ok);
1867 propagate_types(cs->condpart, t, ok);
1870 if (type == Vunknown || type == Vnone)
1871 type = propagate_types(cs->thenpart, Vunknown, ok);
1872 if (type == Vunknown || type == Vnone)
1873 type = propagate_types(cs->elsepart, Vunknown, ok);
1874 for (c = cs->casepart;
1875 c && (type == Vunknown || type == Vnone);
1877 type = propagate_types(c->action, Vunknown, ok);
1878 if (type != Vunknown && type != Vnone) {
1879 propagate_types(cs->thenpart, type, ok);
1880 propagate_types(cs->elsepart, type, ok);
1881 for (c = cs->casepart; c ; c = c->next)
1882 propagate_types(c->action, type, ok);
1888 ###### interp exec cases
1889 case Xcond_statement:
1891 struct value v, cnd;
1892 struct casepart *cp;
1893 struct cond_statement *c = cast(cond_statement, e);
1895 interp_exec(c->forpart);
1898 cnd = interp_exec(c->condpart);
1901 if (!(cnd.vtype == Vnone ||
1902 (cnd.vtype == Vbool && cnd.bool != 0)))
1906 interp_exec(c->dopart);
1909 v = interp_exec(c->thenpart);
1910 if (v.vtype != Vnone || !c->dopart)
1914 } while (c->dopart);
1916 for (cp = c->casepart; cp; cp = cp->next) {
1917 v = interp_exec(cp->value);
1918 if (value_cmp(v, cnd) == 0) {
1921 return interp_exec(cp->action);
1927 return interp_exec(c->elsepart);
1932 ### Finally the whole program.
1934 Somewhat reminiscent of Pascal a (current) Ocean program starts with
1935 the keyword "program" and list of variable names which are assigned
1936 values from command line arguments. Following this is a `block` which
1937 is the code to execute.
1939 As this is the top level, several things are handled a bit
1941 The whole program is not interpreted by `interp_exec` as that isn't
1942 passed the argument list which the program requires. Similarly type
1943 analysis is a bit more interesting at this level.
1948 ###### Parser: grammar
1951 Program -> program Varlist Block OptNL ${
1954 $0->left = reorder_bilist($<2);
1958 Varlist -> Varlist Variable ${
1967 ###### print binode cases
1969 do_indent(indent, "program");
1970 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
1972 print_exec(b2->left, 0, 0);
1978 print_exec(b->right, indent+1, bracket);
1980 do_indent(indent, "}\n");
1983 ###### propagate binode cases
1984 case Program: abort();
1986 ###### core functions
1988 static int analyse_prog(struct exec *prog, struct parse_context *c)
1990 struct binode *b = cast(binode, prog);
1996 propagate_types(b->right, Vnone, &ok);
2001 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2002 struct var *v = cast(var, b->left);
2003 if (v->var->val.vtype == Vunknown)
2004 val_init(&v->var->val, Vstr);
2006 b = cast(binode, prog);
2009 propagate_types(b->right, Vnone, &ok);
2014 for (v = c->varlist; v; v = v->next)
2015 if (v->val.vtype == Vunknown) {
2016 v->val.vtype = Vnum;
2017 mpq_init(v->val.num);
2018 mpq_set_ui(v->val.num, uniq, 1);
2021 /* Make sure everything is still consistent */
2022 propagate_types(b->right, Vnone, &ok);
2026 static void interp_prog(struct exec *prog, char **argv)
2028 struct binode *p = cast(binode, prog);
2029 struct binode *al = cast(binode, p->left);
2033 struct var *v = cast(var, al->left);
2034 struct value *vl = &v->var->val;
2036 if (argv[0] == NULL) {
2037 printf("Not enough args\n");
2040 al = cast(binode, al->right);
2042 if (!parse_value(vl, argv[0]))
2046 v = interp_exec(p->right);
2050 ###### interp binode cases
2051 case Program: abort();