1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possible with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
117 #define container_of(ptr, type, member) ({ \
118 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
119 (type *)( (char *)__mptr - offsetof(type,member) );})
121 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 #include <sys/mman.h>
149 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
150 "--section=SectionName prog.ocn\n";
151 static const struct option long_options[] = {
152 {"trace", 0, NULL, 't'},
153 {"print", 0, NULL, 'p'},
154 {"noexec", 0, NULL, 'n'},
155 {"brackets", 0, NULL, 'b'},
156 {"section", 1, NULL, 's'},
159 const char *options = "tpnbs";
160 int main(int argc, char *argv[])
166 char *section = NULL;
167 struct parse_context context = {
169 .ignored = (1 << TK_line_comment)
170 | (1 << TK_block_comment),
171 .number_chars = ".,_+-",
176 int doprint=0, dotrace=0, doexec=1, brackets=0;
179 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
182 case 't': dotrace=1; break;
183 case 'p': doprint=1; break;
184 case 'n': doexec=0; break;
185 case 'b': brackets=1; break;
186 case 's': section = optarg; break;
187 default: fprintf(stderr, Usage);
191 if (optind >= argc) {
192 fprintf(stderr, "oceani: no input file given\n");
195 fd = open(argv[optind], O_RDONLY);
197 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
200 len = lseek(fd, 0, 2);
201 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
202 s = code_extract(file, file+len, NULL);
204 fprintf(stderr, "oceani: could not find any code in %s\n",
210 for (ss = s; ss; ss = ss->next) {
211 struct text sec = ss->section;
212 if (sec.len == strlen(section) &&
213 strncmp(sec.txt, section, sec.len) == 0)
217 prog = parse_oceani(ss->code, &context.config,
218 dotrace ? stderr : NULL);
220 fprintf(stderr, "oceani: cannot find section %s\n",
225 prog = parse_oceani(s->code, &context.config,
226 dotrace ? stderr : NULL);
228 print_exec(*prog, 0, brackets);
229 if (prog && doexec) {
230 if (!analyse_prog(*prog, &context)) {
231 fprintf(stderr, "oceani: type error in program\n");
234 interp_prog(*prog, argv+optind+1);
241 struct section *t = s->next;
252 These four requirements of parse, analyse, print, interpret apply to
253 each language element individually so that is how most of the code
256 Three of the four are fairly self explanatory. The one that requires
257 a little explanation is the analysis step.
259 The current language design does not require (or even allow) the types
260 of variables to be declared, but they must still have a single type.
261 Different operations impose different requirements on the variables,
262 for example addition requires both arguments to be numeric, and
263 assignment requires the variable on the left to have the same type as
264 the expression on the right.
266 Analysis involves propagating these type requirements around and
267 consequently setting the type of each variable. If any requirements
268 are violated (e.g. a string is compared with a number) or if a
269 variable needs to have two different types, then an error is raised
270 and the program will not run.
272 If the same variable is declared in both branchs of an 'if/else', or
273 in all cases of a 'switch' then the multiple instances may be merged
274 into just one variable if the variable is references after the
275 conditional statement. When this happens, the types must naturally be
276 consistent across all the branches. When the variable is not used
277 outside the if, the variables in the different branches are distinct
278 and can be of different types.
280 Determining the types of all variables early is important for
281 processing command line arguments. These can be assigned to any type
282 of variable, but we must first know the correct type so any required
283 conversion can happen. If a variable is associated with a command
284 line argument but no type can be interpreted (e.g. the variable is
285 only ever used in a `print` statement), then the type is set to
288 Undeclared names may only appear in "use" statements and "case" expressions.
289 These names are given a type of "label" and a unique value.
290 This allows them to fill the role of a name in an enumerated type, which
291 is useful for testing the `switch` statement.
293 As there are, as yet, no distinct types that are compatible, there
294 isn't much subtlety in the analysis. When we hav distinct number
295 types, this will become more interesting.
299 One last introductory step before detailing the language elements and
300 providing their four requirements is to establish the data structures
301 to store these elements.
303 There are two key objects that we need to work with: executable
304 elements which comprise the program, and values which the program
305 works with. Between these are the variables in their various scopes
306 which hold the values.
310 Values can be numbers, which we represent as multi-precision
311 fractions, strings, Booleans and labels. When analysing the program
312 we also need to allow for places where no value is meaningful
313 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`
314 which can be anything and `Vnolabel` which can be anything except a
315 label). A 2 character 'tail' is included in each value as the scanner
316 wants to parse that from the end of numbers and we need somewhere to
317 put it. It is currently ignored but one day might allow for
318 e.g. "imaginary" numbers.
320 Values are never shared, they are always copied when used, and freed
321 when no longer needed.
323 When propagating type information around the program, we need to
324 determine if two types are compatible, where `Vunknown` is compatible
325 which anything, and `Vnolabel` is compatible with anything except a
326 label. A separate funtion to encode this rule will simplify some code
329 When assigning command line arguments to variable, we need to be able
330 to parse each type from a string.
338 myLDLIBS := libnumber.o libstring.o -lgmp
339 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
343 enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
354 static void free_value(struct value v)
359 case Vunknown: break;
360 case Vstr: free(v.str.txt); break;
361 case Vnum: mpq_clear(v.num); break;
367 static int vtype_compat(enum vtype require, enum vtype have)
371 return have != Vlabel;
375 return have == Vunknown || require == have;
379 ###### value functions
381 static void val_init(struct value *val, enum vtype type)
387 case Vunknown: break;
389 mpq_init(val->num); break;
391 val->str.txt = malloc(1);
403 static struct value dup_value(struct value v)
410 case Vunknown: break;
419 mpq_set(rv.num, v.num);
422 rv.str.len = v.str.len;
423 rv.str.txt = malloc(rv.str.len);
424 memcpy(rv.str.txt, v.str.txt, v.str.len);
430 static int value_cmp(struct value left, struct value right)
433 if (left.vtype != right.vtype)
434 return left.vtype - right.vtype;
435 switch (left.vtype) {
436 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
437 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
438 case Vstr: cmp = text_cmp(left.str, right.str); break;
439 case Vbool: cmp = left.bool - right.bool; break;
442 case Vunknown: cmp = 0;
447 static struct text text_join(struct text a, struct text b)
450 rv.len = a.len + b.len;
451 rv.txt = malloc(rv.len);
452 memcpy(rv.txt, a.txt, a.len);
453 memcpy(rv.txt+a.len, b.txt, b.len);
457 static void print_value(struct value v)
461 printf("*Unknown*"); break;
464 printf("*no-value*"); break;
466 printf("*label-%p*", v.label); break;
468 printf("%.*s", v.str.len, v.str.txt); break;
470 printf("%s", v.bool ? "True":"False"); break;
475 mpf_set_q(fl, v.num);
476 gmp_printf("%Fg", fl);
483 static int parse_value(struct value *vl, char *arg)
494 vl->str.len = strlen(arg);
495 vl->str.txt = malloc(vl->str.len);
496 memcpy(vl->str.txt, arg, vl->str.len);
503 tx.txt = arg; tx.len = strlen(tx.txt);
504 if (number_parse(vl->num, vl->tail, tx) == 0)
507 mpq_neg(vl->num, vl->num);
510 if (strcasecmp(arg, "true") == 0 ||
511 strcmp(arg, "1") == 0)
513 else if (strcasecmp(arg, "false") == 0 ||
514 strcmp(arg, "0") == 0)
517 printf("Bad bool: %s\n", arg);
527 Variables are scoped named values. We store the names in a linked
528 list of "bindings" sorted lexically, and use sequential search and
535 struct binding *next; // in lexical order
539 This linked list is stored in the parse context so that "reduce"
540 functions can find or add variables, and so the analysis phase can
541 ensure that every variable gets a type.
545 struct binding *varlist; // In lexical order
549 static struct binding *find_binding(struct parse_context *c, struct text s)
551 struct binding **l = &c->varlist;
556 (cmp = text_cmp((*l)->name, s)) < 0)
560 n = calloc(1, sizeof(*n));
567 Each name can be linked to multiple variables defined in different
568 scopes. Each scope starts where the name is declared and continues
569 until the end of the containing code block. Scopes of a given name
570 cannot nest, so a declaration while a name is in-scope is an error.
572 ###### binding fields
573 struct variable *var;
577 struct variable *previous;
579 struct binding *name;
583 While the naming seems strange, we include local constants in the
584 definition of variables. A name declared `var := value` can
585 subsequently be changed, but a name declared `var ::= value` cannot -
588 ###### variable fields
591 Scopes in parallel branches can be partially merged. More
592 specifically, if a given name is declared in both branches of an
593 if/else then it's scope is a candidate for merging. Similarly if
594 every branch of an exhaustive switch (e.g. has an "else" clause)
595 declares a given name, then the scopes from the branches are
596 candidates for merging.
598 Note that names declared inside a loop (which is only parallel to
599 itself) are never visible after the loop. Similarly names defined in
600 scopes which are not parallel, such as those started by `for` and
601 `switch`, are never visible after the scope. Only variable defined in
602 both `then` and `else` (including the implicit then after an `if`, and
603 excluding `then` used with `for`) and in all `case`s and `else` of a
604 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
606 Labels, which are a bit like variables, follow different rules.
607 Labels are not explicitly declared, but if an undeclared name appears
608 in a context where a label is legal, that effectively declares the
609 name as a label. The declaration remains in force (or in scope) at
610 least to the end of the immediately containing block and conditionally
611 in any larger containing block which does not declare the name in some
612 other way. Importantly, the conditional scope extension happens even
613 if the label is only used in parallel branch of a conditional -- when
614 used in one branch it is treated as having been declared in all
617 Merge candidates are tentatively visible beyond the end of the
618 branching statement which creates them. If the name is used, the
619 merge is affirmed and they become a single variable visible at the
620 outer layer. If not - if it is redeclared first - the merge lapses.
622 To track scopes we have an extra stack, implemented as a linked list,
623 which roughly parallels the parse stack and which is used exclusively
624 for scoping. When a new scope is opened, a new frame is pushed and
625 the child-count of the parent frame is incremented. This child-count
626 is used to distinguish between the first of a set of parallel scopes,
627 in which declared variables must not be in scope, and subsequent
628 branches, whether they must already be conditionally scoped.
630 To push a new frame *before* any code in the frame is parsed, we need a
631 grammar reduction. This is most easily achieved with a grammar
632 element which derives the empty string, and created the new scope when
633 it is recognized. This can be placed, for example, between a keyword
634 like "if" and the code following it.
638 struct scope *parent;
644 struct scope *scope_stack;
647 static void scope_pop(struct parse_context *c)
649 struct scope *s = c->scope_stack;
651 c->scope_stack = s->parent;
656 static void scope_push(struct parse_context *c)
658 struct scope *s = calloc(1, sizeof(*s));
660 c->scope_stack->child_count += 1;
661 s->parent = c->scope_stack;
669 OpenScope -> ${ scope_push(config2context(config)); }$
672 Each variable records a scope depth and is in one of four states:
674 - "in scope". This is the case between the declaration of the
675 variable and the end of the containing block, and also between
676 the usage with affirms a merge and the end of the block.
678 The scope depth is not greater than the current parse context scope
679 nest depth. When the block of that depth closes, the state will
680 change. To achieve this, all "in scope" variables are linked
681 together as a stack in nesting order.
683 - "pending". The "in scope" block has closed, but other parallel
684 scopes are still being processed. So far, every parallel block at
685 the same level that has closed has declared the name.
687 The scope depth is the depth of the last parallel block that
688 enclosed the declaration, and that has closed.
690 - "conditionally in scope". The "in scope" block and all parallel
691 scopes have closed, and no further mention of the name has been
692 seen. This state includes a secondary nest depth which records the
693 outermost scope seen since the variable became conditionally in
694 scope. If a use of the name is found, the variable becomes "in
695 scope" and that secondary depth becomes the recorded scope depth.
696 If the name is declared as a new variable, the old variable becomes
697 "out of scope" and the recorded scope depth stays unchanged.
699 - "out of scope". The variable is neither in scope nor conditionally
700 in scope. It is permanently out of scope now and can be removed from
701 the "in scope" stack.
704 ###### variable fields
705 int depth, min_depth;
706 enum { OutScope, PendingScope, CondScope, InScope } scope;
707 struct variable *in_scope;
711 struct variable *in_scope;
713 All variables with the same name are linked together using the
714 'previous' link. Those variable that have
715 been affirmatively merged all have a 'merged' pointer that points to
716 one primary variable - the most recently declared instance. When
717 merging variables, we need to also adjust the 'merged' pointer on any
718 other variables that had previously been merged with the one that will
719 no longer be primary.
721 ###### variable fields
722 struct variable *merged;
726 static void variable_merge(struct variable *primary, struct variable *secondary)
732 primary = primary->merged;
734 for (v = primary->previous; v; v=v->previous)
735 if (v == secondary || v == secondary->merged ||
736 v->merged == secondary ||
737 (v->merged && v->merged == secondary->merged)) {
745 while (context.varlist) {
746 struct binding *b = context.varlist;
747 struct variable *v = b->var;
748 context.varlist = b->next;
751 struct variable *t = v;
759 #### Manipulating Bindings
761 When a name is conditionally visible, a new declaration discards the
762 old binding - the condition lapses. Conversely a usage of the name
763 affirms the visibility and extends it to the end of the containing
764 block - i.e. the block that contains both the original declaration and
765 the latest usage. This is determined from `min_depth`. When a
766 conditionally visible variable gets affirmed like this, it is also
767 merged with other conditionally visible variables with the same name.
769 When we parse a variable declaration we either signal an error if the
770 name is currently bound, or create a new variable at the current nest
771 depth if the name is unbound or bound to a conditionally scoped or
772 pending-scope variable. If the previous variable was conditionally
773 scoped, it and its homonyms becomes out-of-scope.
775 When we parse a variable reference (including non-declarative
776 assignment) we signal an error if the name is not bound or is bound to
777 a pending-scope variable; update the scope if the name is bound to a
778 conditionally scoped variable; or just proceed normally if the named
779 variable is in scope.
781 When we exit a scope, any variables bound at this level are either
782 marked out of scope or pending-scoped, depending on whether the
783 scope was sequential or parallel.
785 When exiting a parallel scope we check if there are any variables that
786 were previously pending and are still visible. If there are, then
787 there weren't redeclared in the most recent scope, so they cannot be
788 merged and must become out-of-scope. If it is not the first of
789 parallel scopes (based on `child_count`), we check that there was a
790 previous binding that is still pending-scope. If there isn't, the new
791 variable must now be out-of-scope.
793 When exiting a sequential scope that immediately enclosed parallel
794 scopes, we need to resolve any pending-scope variables. If there was
795 no `else` clause, and we cannot determine that the `switch` was exhaustive,
796 we need to mark all pending-scope variable as out-of-scope. Otherwise
797 all pending-scope variables become conditionally scoped.
800 enum closetype { CloseSequential, CloseParallel, CloseElse };
804 static struct variable *var_decl(struct parse_context *c, struct text s)
806 struct binding *b = find_binding(c, s);
807 struct variable *v = b->var;
809 switch (v ? v->scope : OutScope) {
811 /* Signal error ... once I build error signalling support */
815 v && v->scope == CondScope;
821 v = calloc(1, sizeof(*v));
822 v->previous = b->var;
825 v->min_depth = v->depth = c->scope_depth;
827 v->in_scope = c->in_scope;
829 val_init(&v->val, Vunknown);
833 static struct variable *var_ref(struct parse_context *c, struct text s)
835 struct binding *b = find_binding(c, s);
836 struct variable *v = b->var;
839 switch (v ? v->scope : OutScope) {
842 /* Signal an error - once that is possible */
845 /* All CondScope variables of this name need to be merged
848 v->depth = v->min_depth;
850 for (v2 = v->previous;
851 v2 && v2->scope == CondScope;
853 variable_merge(v, v2);
861 static void var_block_close(struct parse_context *c, enum closetype ct)
863 /* close of all variables that are in_scope */
864 struct variable *v, **vp, *v2;
867 for (vp = &c->in_scope;
868 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
872 case CloseParallel: /* handle PendingScope */
876 if (c->scope_stack->child_count == 1)
877 v->scope = PendingScope;
878 else if (v->previous &&
879 v->previous->scope == PendingScope)
880 v->scope = PendingScope;
881 else if (v->val.vtype == Vlabel)
882 v->scope = PendingScope;
883 else if (v->name->var == v)
885 if (ct == CloseElse) {
886 /* All Pending variables with this name
887 * are now Conditional */
889 v2 && v2->scope == PendingScope;
891 v2->scope = CondScope;
896 v2 && v2->scope == PendingScope;
898 if (v2->val.vtype != Vlabel)
899 v2->scope = OutScope;
901 case OutScope: break;
904 case CloseSequential:
905 if (v->val.vtype == Vlabel)
906 v->scope = PendingScope;
912 /* There was no 'else', so we can only become
913 * conditional if we know the cases were exhaustive,
914 * and that doesn't mean anything yet.
915 * So only labels become conditional..
918 v2 && v2->scope == PendingScope;
920 if (v2->val.vtype == Vlabel) {
921 v2->scope = CondScope;
922 v2->min_depth = c->scope_depth;
924 v2->scope = OutScope;
927 case OutScope: break;
931 if (v->scope == OutScope)
940 Executables can be lots of different things. In many cases an
941 executable is just an operation combined with one or two other
942 executables. This allows for expressions and lists etc. Other times
943 an executable is something quite specific like a constant or variable
944 name. So we define a `struct exec` to be a general executable with a
945 type, and a `struct binode` which is a subclass of `exec` and forms a
946 node in a binary tree and holding an operation. There will be other
947 subclasses, and to access these we need to be able to `cast` the
948 `exec` into the various other types.
951 #define cast(structname, pointer) ({ \
952 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
953 if (__mptr && *__mptr != X##structname) abort(); \
954 (struct structname *)( (char *)__mptr);})
956 #define new(structname) ({ \
957 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
958 __ptr->type = X##structname; \
967 enum exec_types type;
974 struct exec *left, *right;
977 Each different type of `exec` node needs a number of functions
978 defined, a bit like methods. We must be able to be able to free it,
979 print it, analyse it and execute it. Once we have specific `exec`
980 types we will need to parse them too. Let's take this a bit more
985 The parser generator requires a `free_foo` function for each struct
986 that stores attributes and they will be `exec`s of subtypes there-of.
987 So we need `free_exec` which can handle all the subtypes, and we need
992 static void free_binode(struct binode *b)
1001 ###### core functions
1002 static void free_exec(struct exec *e)
1011 ###### forward decls
1013 static void free_exec(struct exec *e);
1015 ###### free exec cases
1016 case Xbinode: free_binode(cast(binode, e)); break;
1020 Printing an `exec` requires that we know the current indent level for
1021 printing line-oriented components. As will become clear later, we
1022 also want to know what sort of bracketing to use.
1024 ###### ast functions
1026 static void do_indent(int i, char *str)
1033 ###### core functions
1034 static void print_binode(struct binode *b, int indent, int bracket)
1038 ## print binode cases
1042 static void print_exec(struct exec *e, int indent, int bracket)
1046 print_binode(cast(binode, e), indent, bracket); break;
1051 ###### forward decls
1053 static void print_exec(struct exec *e, int indent, int bracket);
1057 As discussed, analysis involves propagating type requirements around
1058 the program and looking for errors.
1060 So `propagate_types` is passed a type that the `exec` is expected to return,
1061 and returns the type that it does return, either of which can be `Vunknown`.
1062 An `ok` flag is passed by reference. It is set to `0` when an error is
1063 found, and `2` when any change is made. If it remains unchanged at
1064 `1`, then no more propagation is needed.
1066 ###### core functions
1068 static enum vtype propagate_types(struct exec *prog, enum vtype type,
1076 switch (prog->type) {
1079 struct binode *b = cast(binode, prog);
1081 ## propagate binode cases
1085 ## propagate exec cases
1092 Interpreting an `exec` doesn't require anything but the `exec`. State
1093 is stored in variables and each variable will be directly linked from
1094 within the `exec` tree. The exception to this is the whole `program`
1095 which needs to look at command line arguments. The `program` will be
1096 interpreted separately.
1098 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1100 ###### core functions
1102 static struct value interp_exec(struct exec *e)
1112 struct binode *b = cast(binode, e);
1113 struct value left, right;
1114 left.vtype = right.vtype = Vnone;
1116 ## interp binode cases
1118 free_value(left); free_value(right);
1121 ## interp exec cases
1126 ## Language elements
1128 Each language element needs to be parsed, printed, analysed,
1129 interpreted, and freed. There are several, so let's just start with
1130 the easy ones and work our way up.
1134 We have already met values as separate objects. When manifest
1135 constants appear in the program text that must result in an executable
1136 which has a constant value. So the `val` structure embeds a value in
1153 $0->val.vtype = Vbool;
1158 $0->val.vtype = Vbool;
1163 $0->val.vtype = Vnum;
1164 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
1165 mpq_init($0->val.num);
1169 $0->val.vtype = Vstr;
1170 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1174 $0->val.vtype = Vstr;
1175 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1178 ###### print exec cases
1181 struct val *v = cast(val, e);
1182 if (v->val.vtype == Vstr)
1184 print_value(v->val);
1185 if (v->val.vtype == Vstr)
1190 ###### propagate exec cases
1193 struct val *val = cast(val, prog);
1194 if (!vtype_compat(type, val->val.vtype))
1196 return val->val.vtype;
1199 ###### interp exec cases
1201 return dup_value(cast(val, e)->val);
1203 ###### ast functions
1204 static void free_val(struct val *v)
1212 ###### free exec cases
1213 case Xval: free_val(cast(val, e)); break;
1215 ###### ast functions
1216 // Move all nodes from 'b' to 'rv', reversing the order.
1217 // In 'b' 'left' is a list, and 'right' is the last node.
1218 // In 'rv', left' is the first node and 'right' is a list.
1219 static struct binode *reorder_bilist(struct binode *b)
1221 struct binode *rv = NULL;
1224 struct exec *t = b->right;
1228 b = cast(binode, b->left);
1238 Just as we used as `val` to wrap a value into an `exec`, we similarly
1239 need a `var` to wrap a `variable` into an exec. While each `val`
1240 contained a copy of the value, each `var` hold a link to the variable
1241 because it really is the same variable no matter where it appears.
1242 When a variable is used, we need to remember to follow the `->merged`
1243 link to find the primary instance.
1251 struct variable *var;
1257 VariableDecl -> IDENTIFIER := ${ {
1258 struct variable *v = var_decl(config2context(config), $1.txt);
1262 | IDENTIFIER ::= ${ {
1263 struct variable *v = var_decl(config2context(config), $1.txt);
1269 Variable -> IDENTIFIER ${ {
1270 struct variable *v = var_ref(config2context(config), $1.txt);
1272 /* This might be a label - allocate a var just in case */
1273 v = var_decl(config2context(config), $1.txt);
1275 val_init(&v->val, Vlabel);
1281 ###### print exec cases
1284 struct var *v = cast(var, e);
1286 struct binding *b = v->var->name;
1287 printf("%.*s", b->name.len, b->name.txt);
1292 ###### propagate exec cases
1296 struct var *var = cast(var, prog);
1297 struct variable *v = var->var;
1304 if (v->val.vtype == Vunknown) {
1305 if (type > Vunknown && *ok != 0) {
1306 val_init(&v->val, type);
1311 if (!vtype_compat(type, v->val.vtype))
1313 if (type <= Vunknown)
1314 return v->val.vtype;
1318 ###### interp exec cases
1321 struct var *var = cast(var, e);
1322 struct variable *v = var->var;
1326 return dup_value(v->val);
1329 ###### ast functions
1331 static void free_var(struct var *v)
1336 ###### free exec cases
1337 case Xvar: free_var(cast(var, e)); break;
1339 ### Expressions: Boolean
1341 Our first user of the `binode` will be expressions, and particularly
1342 Boolean expressions. As I haven't implemented precedence in the
1343 parser generator yet, we need different names from each precedence
1344 level used by expressions. The outer most or lowest level precedence
1345 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1356 Expression -> Expression or BTerm ${
1362 | BTerm ${ $0 = $<1; }$
1364 BTerm -> BTerm and BFact ${
1370 | BFact ${ $0 = $<1; }$
1372 BFact -> not BFact ${
1379 ###### print binode cases
1381 print_exec(b->left, -1, 0);
1383 print_exec(b->right, -1, 0);
1386 print_exec(b->left, -1, 0);
1388 print_exec(b->right, -1, 0);
1392 print_exec(b->right, -1, 0);
1395 ###### propagate binode cases
1399 /* both must be Vbool, result is Vbool */
1400 propagate_types(b->left, Vbool, ok);
1401 propagate_types(b->right, Vbool, ok);
1402 if (type != Vbool && type > Vunknown)
1406 ###### interp binode cases
1408 rv = interp_exec(b->left);
1409 right = interp_exec(b->right);
1410 rv.bool = rv.bool && right.bool;
1413 rv = interp_exec(b->left);
1414 right = interp_exec(b->right);
1415 rv.bool = rv.bool || right.bool;
1418 rv = interp_exec(b->right);
1422 ### Expressions: Comparison
1424 Of slightly higher precedence that Boolean expressions are
1426 A comparison takes arguments of any type, but the two types must be
1429 To simplify the parsing we introduce an `eop` which can return an
1430 expression operator.
1437 ###### ast functions
1438 static void free_eop(struct eop *e)
1453 | Expr CMPop Expr ${
1459 | Expr ${ $0 = $<1; }$
1464 CMPop -> < ${ $0.op = Less; }$
1465 | > ${ $0.op = Gtr; }$
1466 | <= ${ $0.op = LessEq; }$
1467 | >= ${ $0.op = GtrEq; }$
1468 | == ${ $0.op = Eql; }$
1469 | != ${ $0.op = NEql; }$
1471 ###### print binode cases
1479 print_exec(b->left, -1, 0);
1481 case Less: printf(" < "); break;
1482 case LessEq: printf(" <= "); break;
1483 case Gtr: printf(" > "); break;
1484 case GtrEq: printf(" >= "); break;
1485 case Eql: printf(" == "); break;
1486 case NEql: printf(" != "); break;
1489 print_exec(b->right, -1, 0);
1492 ###### propagate binode cases
1499 /* Both must match but not labels, result is Vbool */
1500 t = propagate_types(b->left, Vnolabel, ok);
1502 propagate_types(b->right, t, ok);
1504 t = propagate_types(b->right, Vnolabel, ok);
1506 t = propagate_types(b->left, t, ok);
1508 if (!vtype_compat(type, Vbool))
1512 ###### interp binode cases
1521 left = interp_exec(b->left);
1522 right = interp_exec(b->right);
1523 cmp = value_cmp(left, right);
1526 case Less: rv.bool = cmp < 0; break;
1527 case LessEq: rv.bool = cmp <= 0; break;
1528 case Gtr: rv.bool = cmp > 0; break;
1529 case GtrEq: rv.bool = cmp >= 0; break;
1530 case Eql: rv.bool = cmp == 0; break;
1531 case NEql: rv.bool = cmp != 0; break;
1532 default: rv.bool = 0; break;
1537 ### Expressions: The rest
1539 The remaining expressions with the highest precedence are arithmetic
1540 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1541 The `Factor` is where the `Value` and `Variable` that we already have
1544 `+` and `-` are both infix and prefix operations (where they are
1545 absolute value and negation). These have different operator names.
1547 We also have a 'Bracket' operator which records where parentheses were
1548 found. This make it easy to reproduce these when printing. Once
1549 precedence is handled better I might be able to discard this.
1561 Expr -> Expr Eop Term ${
1567 | Term ${ $0 = $<1; }$
1569 Term -> Term Top Factor ${
1575 | Factor ${ $0 = $<1; }$
1577 Factor -> ( Expression ) ${
1587 | Value ${ $0 = (struct binode *)$<1; }$
1588 | Variable ${ $0 = (struct binode *)$<1; }$
1591 Eop -> + ${ $0.op = Plus; }$
1592 | - ${ $0.op = Minus; }$
1594 Uop -> + ${ $0.op = Absolute; }$
1595 | - ${ $0.op = Negate; }$
1597 Top -> * ${ $0.op = Times; }$
1598 | / ${ $0.op = Divide; }$
1599 | ++ ${ $0.op = Concat; }$
1601 ###### print binode cases
1607 print_exec(b->left, indent, 0);
1609 case Plus: printf(" + "); break;
1610 case Minus: printf(" - "); break;
1611 case Times: printf(" * "); break;
1612 case Divide: printf(" / "); break;
1613 case Concat: printf(" ++ "); break;
1616 print_exec(b->right, indent, 0);
1620 print_exec(b->right, indent, 0);
1624 print_exec(b->right, indent, 0);
1628 print_exec(b->right, indent, 0);
1632 ###### propagate binode cases
1637 /* both must be numbers, result is Vnum */
1640 /* as propagate_types ignores a NULL,
1641 * unary ops fit here too */
1642 propagate_types(b->left, Vnum, ok);
1643 propagate_types(b->right, Vnum, ok);
1644 if (!vtype_compat(type, Vnum))
1649 /* both must be Vstr, result is Vstr */
1650 propagate_types(b->left, Vstr, ok);
1651 propagate_types(b->right, Vstr, ok);
1652 if (!vtype_compat(type, Vstr))
1657 return propagate_types(b->right, type, ok);
1659 ###### interp binode cases
1662 rv = interp_exec(b->left);
1663 right = interp_exec(b->right);
1664 mpq_add(rv.num, rv.num, right.num);
1667 rv = interp_exec(b->left);
1668 right = interp_exec(b->right);
1669 mpq_sub(rv.num, rv.num, right.num);
1672 rv = interp_exec(b->left);
1673 right = interp_exec(b->right);
1674 mpq_mul(rv.num, rv.num, right.num);
1677 rv = interp_exec(b->left);
1678 right = interp_exec(b->right);
1679 mpq_div(rv.num, rv.num, right.num);
1682 rv = interp_exec(b->right);
1683 mpq_neg(rv.num, rv.num);
1686 rv = interp_exec(b->right);
1687 mpq_abs(rv.num, rv.num);
1690 rv = interp_exec(b->right);
1693 left = interp_exec(b->left);
1694 right = interp_exec(b->right);
1696 rv.str = text_join(left.str, right.str);
1699 ### Blocks, Statements, and Statement lists.
1701 Now that we have expressions out of the way we need to turn to
1702 statements. There are simple statements and more complex statements.
1703 Simple statements do not contain newlines, complex statements do.
1705 Statements often come in sequences and we have corresponding simple
1706 statement lists and complex statement lists.
1707 The former comprise only simple statements separated by semicolons.
1708 The later comprise complex statements and simple statement lists. They are
1709 separated by newlines. Thus the semicolon is only used to separate
1710 simple statements on the one line. This may be overly restrictive,
1711 but I'm not sure I every want a complex statement to share a line with
1714 Note that a simple statement list can still use multiple lines if
1715 subsequent lines are indented, so
1717 ###### Example: wrapped simple statement list
1722 is a single simple statement list. This might allow room for
1723 confusion, so I'm not set on it yet.
1725 A simple statement list needs no extra syntax. A complex statement
1726 list has two syntactic forms. It can be enclosed in braces (much like
1727 C blocks), or it can be introduced by a colon and continue until an
1728 unindented newline (much like Python blocks). With this extra syntax
1729 it is referred to as a block.
1731 Note that a block does not have to include any newlines if it only
1732 contains simple statements. So both of:
1734 if condition: a=b; d=f
1736 if condition { a=b; print f }
1740 In either case the list is constructed from a `binode` list with
1741 `Block` as the operator. When parsing the list it is most convenient
1742 to append to the end, so a list is a list and a statement. When using
1743 the list it is more convenient to consider a list to be a statement
1744 and a list. So we need a function to re-order a list.
1745 `reorder_bilist` serves this purpose.
1747 The only stand-alone statement we introduce at this stage is `pass`
1748 which does nothing and is represented as a `NULL` pointer in a `Block`
1768 Block -> Open Statementlist Close ${ $0 = $<2; }$
1769 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1770 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1771 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1772 | : Statementlist ${ $0 = $<2; }$
1773 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1775 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1777 ComplexStatements -> ComplexStatements ComplexStatement ${
1783 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1784 | ComplexStatement ${
1792 ComplexStatement -> SimpleStatements NEWLINE ${
1793 $0 = reorder_bilist($<1);
1795 ## ComplexStatement Grammar
1798 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1804 | SimpleStatement ${
1810 | SimpleStatements ; ${ $0 = $<1; }$
1812 SimpleStatement -> pass ${ $0 = NULL; }$
1813 ## SimpleStatement Grammar
1815 ###### print binode cases
1819 if (b->left == NULL)
1822 print_exec(b->left, indent, 0);
1825 print_exec(b->right, indent, 0);
1828 // block, one per line
1829 if (b->left == NULL)
1830 do_indent(indent, "pass\n");
1832 print_exec(b->left, indent, bracket);
1834 print_exec(b->right, indent, bracket);
1838 ###### propagate binode cases
1841 /* If any statement returns something other then Vnone
1842 * then all such must return same type.
1843 * As each statement may be Vnone or something else,
1844 * we must always pass Vunknown down, otherwise an incorrect
1845 * error might occur.
1849 for (e = b; e; e = cast(binode, e->right)) {
1850 t = propagate_types(e->left, Vunknown, ok);
1851 if (t != Vunknown && t != Vnone) {
1852 if (type == Vunknown)
1861 ###### interp binode cases
1863 while (rv.vtype == Vnone &&
1866 rv = interp_exec(b->left);
1867 b = cast(binode, b->right);
1871 ### The Print statement
1873 `print` is a simple statement that takes a comma-separated list of
1874 expressions and prints the values separated by spaces and terminated
1875 by a newline. No control of formatting is possible.
1877 `print` faces the same list-ordering issue as blocks, and uses the
1883 ###### SimpleStatement Grammar
1885 | print ExpressionList ${
1886 $0 = reorder_bilist($<2);
1888 | print ExpressionList , ${
1893 $0 = reorder_bilist($0);
1904 ExpressionList -> ExpressionList , Expression ${
1917 ###### print binode cases
1920 do_indent(indent, "print");
1924 print_exec(b->left, -1, 0);
1928 b = cast(binode, b->right);
1934 ###### propagate binode cases
1937 /* don't care but all must be consistent */
1938 propagate_types(b->left, Vnolabel, ok);
1939 propagate_types(b->right, Vnolabel, ok);
1942 ###### interp binode cases
1948 for ( ; b; b = cast(binode, b->right))
1952 left = interp_exec(b->left);
1965 ###### Assignment statement
1967 An assignment will assign a value to a variable, providing it hasn't
1968 be declared as a constant. The analysis phase ensures that the type
1969 will be correct so the interpreter just needs to perform the
1970 calculation. There is a form of assignment which declares a new
1971 variable as well as assigning a value. If a name is assigned before
1972 it is declared, and error will be raised as the name is created as
1973 `Vlabel` and it is illegal to assign to such names.
1979 ###### SimpleStatement Grammar
1980 | Variable = Expression ${ {
1981 struct var *v = cast(var, $1);
1987 if (v->var && !v->var->constant) {
1991 | VariableDecl Expression ${
1998 ###### print binode cases
2001 do_indent(indent, "");
2002 print_exec(b->left, indent, 0);
2004 print_exec(b->right, indent, 0);
2010 do_indent(indent, "");
2011 print_exec(b->left, indent, 0);
2012 if (cast(var, b->left)->var->constant)
2016 print_exec(b->right, indent, 0);
2021 ###### propagate binode cases
2025 /* Both must match and not be labels, result is Vnone */
2026 t = propagate_types(b->left, Vnolabel, ok);
2028 propagate_types(b->right, t, ok);
2030 t = propagate_types(b->right, Vnolabel, ok);
2032 t = propagate_types(b->left, t, ok);
2038 ###### interp binode cases
2043 struct variable *v = cast(var, b->left)->var;
2046 right = interp_exec(b->right);
2049 right.vtype = Vunknown;
2053 ### The `use` statement
2055 The `use` statement is the last "simple" statement. It is needed when
2056 the condition in a conditional statement is a block. `use` works much
2057 like `return` in C, but only completes the `condition`, not the whole
2063 ###### SimpleStatement Grammar
2070 ###### print binode cases
2073 do_indent(indent, "use ");
2074 print_exec(b->right, -1, 0);
2079 ###### propagate binode cases
2082 /* result matches value */
2083 return propagate_types(b->right, type, ok);
2085 ###### interp binode cases
2088 rv = interp_exec(b->right);
2091 ### The Conditional Statement
2093 This is the biggy and currently the only complex statement.
2094 This subsumes `if`, `while`, `do/while`, `switch`, and some parts of
2095 `for`. It is comprised of a number of parts, all of which are
2096 optional though set combinations apply.
2098 If there is a `forpart`, it is executed first, only once.
2099 If there is a `dopart`, then it is executed repeatedly providing
2100 always that the `condpart` or `cond`, if present, does not return a non-True
2101 value. `condpart` can fail to return any value if it simply executes
2102 to completion. This is treated the same as returning True.
2104 If there is a `thenpart` it will be executed whenever the `condpart`
2105 or `cond` returns True (or does not return any value), but this will happen
2106 *after* `dopart` (when present).
2108 If `elsepart` is present it will be executed at most once when the
2109 condition returns `False` or some value that isn't `True` and isn't
2110 matched by any `casepart`. If there are any `casepart`s, they will be
2111 executed when the condition returns a matching value.
2113 The particular sorts of values allowed in case parts has not yet been
2114 determined in the language design, so nothing is prohibited.
2116 The various blocks in this complex statement potentially provide scope
2117 for variables as described earlier. Each such block must include the
2118 "OpenScope" nonterminal before parsing the block, and must call
2119 `var_block_close()` when closing the block.
2121 The code following "`if`", "`switch`" and "`for`" does not get its own
2122 scope, but is in a scope covering the whole statement, so names
2123 declared there cannot be redeclared elsewhere. Similarly the
2124 condition following "`while`" is in a scope the covers the body
2125 ("`do`" part) of the loop, and which does not allow conditional scope
2126 extension. Code following "`then`" (both looping and non-looping),
2127 "`else`" and "`case`" each get their own local scope.
2129 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2138 struct exec *action;
2139 struct casepart *next;
2141 struct cond_statement {
2143 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2144 struct casepart *casepart;
2147 ###### ast functions
2149 static void free_casepart(struct casepart *cp)
2153 free_exec(cp->value);
2154 free_exec(cp->action);
2161 static void free_cond_statement(struct cond_statement *s)
2165 free_exec(s->forpart);
2166 free_exec(s->condpart);
2167 free_exec(s->dopart);
2168 free_exec(s->thenpart);
2169 free_exec(s->elsepart);
2170 free_casepart(s->casepart);
2174 ###### free exec cases
2175 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2177 ###### ComplexStatement Grammar
2178 | CondStatement ${ $0 = $<1; }$
2183 // both ForThen and Whilepart open scopes, and CondSuffix only
2184 // closes one - so in the first branch here we have another to close.
2185 CondStatement -> ForThen WhilePart CondSuffix ${
2187 $0->forpart = $1.forpart; $1.forpart = NULL;
2188 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2189 $0->condpart = $2.condpart; $2.condpart = NULL;
2190 $0->dopart = $2.dopart; $2.dopart = NULL;
2191 var_block_close(config2context(config), CloseSequential);
2193 | WhilePart CondSuffix ${
2195 $0->condpart = $1.condpart; $1.condpart = NULL;
2196 $0->dopart = $1.dopart; $1.dopart = NULL;
2198 | SwitchPart CondSuffix ${
2202 | IfPart IfSuffix ${
2204 $0->condpart = $1.condpart; $1.condpart = NULL;
2205 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2206 // This is where we close an "if" statement
2207 var_block_close(config2context(config), CloseSequential);
2210 CondSuffix -> IfSuffix ${
2212 // This is where we close scope of the whole
2213 // "for" or "while" statement
2214 var_block_close(config2context(config), CloseSequential);
2216 | CasePart CondSuffix ${
2218 $1->next = $0->casepart;
2223 CasePart -> Newlines case Expression OpenScope Block ${
2224 $0 = calloc(1,sizeof(struct casepart));
2227 var_block_close(config2context(config), CloseParallel);
2229 | case Expression OpenScope Block ${
2230 $0 = calloc(1,sizeof(struct casepart));
2233 var_block_close(config2context(config), CloseParallel);
2237 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2238 | Newlines else OpenScope Block ${
2239 $0 = new(cond_statement);
2241 var_block_close(config2context(config), CloseElse);
2243 | else OpenScope Block ${
2244 $0 = new(cond_statement);
2246 var_block_close(config2context(config), CloseElse);
2248 | Newlines else OpenScope CondStatement ${
2249 $0 = new(cond_statement);
2251 var_block_close(config2context(config), CloseElse);
2253 | else OpenScope CondStatement ${
2254 $0 = new(cond_statement);
2256 var_block_close(config2context(config), CloseElse);
2261 // These scopes are closed in CondSuffix
2262 ForPart -> for OpenScope SimpleStatements ${
2263 $0 = reorder_bilist($<3);
2265 | for OpenScope Block ${
2269 ThenPart -> then OpenScope SimpleStatements ${
2270 $0 = reorder_bilist($<3);
2271 var_block_close(config2context(config), CloseSequential);
2273 | then OpenScope Block ${
2275 var_block_close(config2context(config), CloseSequential);
2278 ThenPartNL -> ThenPart OptNL ${
2282 // This scope is closed in CondSuffix
2283 WhileHead -> while OpenScope Block ${
2288 ForThen -> ForPart OptNL ThenPartNL ${
2296 // This scope is closed in CondSuffix
2297 WhilePart -> while OpenScope Expression Block ${
2298 $0.type = Xcond_statement;
2302 | WhileHead OptNL do Block ${
2303 $0.type = Xcond_statement;
2308 IfPart -> if OpenScope Expression OpenScope Block ${
2309 $0.type = Xcond_statement;
2312 var_block_close(config2context(config), CloseParallel);
2314 | if OpenScope Block OptNL then OpenScope Block ${
2315 $0.type = Xcond_statement;
2318 var_block_close(config2context(config), CloseParallel);
2322 // This scope is closed in CondSuffix
2323 SwitchPart -> switch OpenScope Expression ${
2326 | switch OpenScope Block ${
2330 ###### print exec cases
2332 case Xcond_statement:
2334 struct cond_statement *cs = cast(cond_statement, e);
2335 struct casepart *cp;
2337 do_indent(indent, "for");
2338 if (bracket) printf(" {\n"); else printf(":\n");
2339 print_exec(cs->forpart, indent+1, bracket);
2342 do_indent(indent, "} then {\n");
2344 do_indent(indent, "then:\n");
2345 print_exec(cs->thenpart, indent+1, bracket);
2347 if (bracket) do_indent(indent, "}\n");
2351 if (cs->condpart && cs->condpart->type == Xbinode &&
2352 cast(binode, cs->condpart)->op == Block) {
2354 do_indent(indent, "while {\n");
2356 do_indent(indent, "while:\n");
2357 print_exec(cs->condpart, indent+1, bracket);
2359 do_indent(indent, "} do {\n");
2361 do_indent(indent, "do:\n");
2362 print_exec(cs->dopart, indent+1, bracket);
2364 do_indent(indent, "}\n");
2366 do_indent(indent, "while ");
2367 print_exec(cs->condpart, 0, bracket);
2372 print_exec(cs->dopart, indent+1, bracket);
2374 do_indent(indent, "}\n");
2379 do_indent(indent, "switch");
2381 do_indent(indent, "if");
2382 if (cs->condpart && cs->condpart->type == Xbinode &&
2383 cast(binode, cs->condpart)->op == Block) {
2388 print_exec(cs->condpart, indent+1, bracket);
2390 do_indent(indent, "}\n");
2392 do_indent(indent, "then:\n");
2393 print_exec(cs->thenpart, indent+1, bracket);
2397 print_exec(cs->condpart, 0, bracket);
2403 print_exec(cs->thenpart, indent+1, bracket);
2405 do_indent(indent, "}\n");
2410 for (cp = cs->casepart; cp; cp = cp->next) {
2411 do_indent(indent, "case ");
2412 print_exec(cp->value, -1, 0);
2417 print_exec(cp->action, indent+1, bracket);
2419 do_indent(indent, "}\n");
2422 do_indent(indent, "else");
2427 print_exec(cs->elsepart, indent+1, bracket);
2429 do_indent(indent, "}\n");
2434 ###### propagate exec cases
2435 case Xcond_statement:
2437 // forpart and dopart must return Vnone
2438 // condpart must be bool or match casepart->values
2439 // thenpart, elsepart, casepart->action must match
2441 struct cond_statement *cs = cast(cond_statement, prog);
2444 t = propagate_types(cs->forpart, Vnone, ok);
2445 if (!vtype_compat(Vnone, t))
2447 t = propagate_types(cs->dopart, Vnone, ok);
2448 if (!vtype_compat(Vnone, t))
2450 if (cs->casepart == NULL)
2451 propagate_types(cs->condpart, Vbool, ok);
2454 for (c = cs->casepart;
2455 c && (t == Vunknown); c = c->next)
2456 t = propagate_types(c->value, Vunknown, ok);
2457 if (t == Vunknown && cs->condpart)
2458 t = propagate_types(cs->condpart, Vunknown, ok);
2459 // Now we have a type (I hope) push it down
2460 if (t != Vunknown) {
2461 for (c = cs->casepart; c; c = c->next)
2462 propagate_types(c->value, t, ok);
2463 propagate_types(cs->condpart, t, ok);
2466 if (type == Vunknown || type == Vnone)
2467 type = propagate_types(cs->thenpart, Vunknown, ok);
2468 if (type == Vunknown || type == Vnone)
2469 type = propagate_types(cs->elsepart, Vunknown, ok);
2470 for (c = cs->casepart;
2471 c && (type == Vunknown || type == Vnone);
2473 type = propagate_types(c->action, Vunknown, ok);
2474 if (type != Vunknown && type != Vnone) {
2475 propagate_types(cs->thenpart, type, ok);
2476 propagate_types(cs->elsepart, type, ok);
2477 for (c = cs->casepart; c ; c = c->next)
2478 propagate_types(c->action, type, ok);
2484 ###### interp exec cases
2485 case Xcond_statement:
2487 struct value v, cnd;
2488 struct casepart *cp;
2489 struct cond_statement *c = cast(cond_statement, e);
2491 interp_exec(c->forpart);
2494 cnd = interp_exec(c->condpart);
2497 if (!(cnd.vtype == Vnone ||
2498 (cnd.vtype == Vbool && cnd.bool != 0)))
2502 interp_exec(c->dopart);
2505 v = interp_exec(c->thenpart);
2506 if (v.vtype != Vnone || !c->dopart)
2510 } while (c->dopart);
2512 for (cp = c->casepart; cp; cp = cp->next) {
2513 v = interp_exec(cp->value);
2514 if (value_cmp(v, cnd) == 0) {
2517 return interp_exec(cp->action);
2523 return interp_exec(c->elsepart);
2528 ### Finally the whole program.
2530 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2531 the keyword "program" and a list of variable names which are assigned
2532 values from command line arguments. Following this is a `block` which
2533 is the code to execute.
2535 As this is the top level, several things are handled a bit
2537 The whole program is not interpreted by `interp_exec` as that isn't
2538 passed the argument list which the program requires. Similarly type
2539 analysis is a bit more interesting at this level.
2544 ###### Parser: grammar
2547 Program -> program OpenScope Varlist Block OptNL ${
2550 $0->left = reorder_bilist($<3);
2552 var_block_close(config2context(config), CloseSequential);
2553 if (config2context(config)->scope_stack) abort();
2556 Varlist -> Varlist ArgDecl ${
2565 ArgDecl -> IDENTIFIER ${ {
2566 struct variable *v = var_decl(config2context(config), $1.txt);
2573 ###### print binode cases
2575 do_indent(indent, "program");
2576 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2578 print_exec(b2->left, 0, 0);
2584 print_exec(b->right, indent+1, bracket);
2586 do_indent(indent, "}\n");
2589 ###### propagate binode cases
2590 case Program: abort();
2592 ###### core functions
2594 static int analyse_prog(struct exec *prog, struct parse_context *c)
2596 struct binode *b = cast(binode, prog);
2601 propagate_types(b->right, Vnone, &ok);
2606 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2607 struct var *v = cast(var, b->left);
2608 if (v->var->val.vtype == Vunknown)
2609 val_init(&v->var->val, Vstr);
2611 b = cast(binode, prog);
2614 propagate_types(b->right, Vnone, &ok);
2619 /* Make sure everything is still consistent */
2620 propagate_types(b->right, Vnone, &ok);
2624 static void interp_prog(struct exec *prog, char **argv)
2626 struct binode *p = cast(binode, prog);
2627 struct binode *al = cast(binode, p->left);
2631 struct var *v = cast(var, al->left);
2632 struct value *vl = &v->var->val;
2634 if (argv[0] == NULL) {
2635 printf("Not enough args\n");
2638 al = cast(binode, al->right);
2640 if (!parse_value(vl, argv[0]))
2644 v = interp_exec(p->right);
2648 ###### interp binode cases
2649 case Program: abort();
2651 ## And now to test it out.
2653 Having a language requires having a "hello world" program. I'll
2654 provide a little more than that: a program that prints "Hello world"
2655 finds the GCD of two numbers, prints the first few elements of
2656 Fibonacci, and performs a binary search for a number.
2658 ###### File: oceani.mk
2661 @echo "===== TEST ====="
2662 ./oceani --section "test: hello" oceani.mdc 55 33
2667 print "Hello World, what lovely oceans you have!"
2668 /* When a variable is defined in both branches of an 'if',
2669 * and used afterwards, the variables are merged.
2675 print "Is", A, "bigger than", B,"? ", bigger
2676 /* If a variable is not used after the 'if', no
2677 * merge happens, so types can be different
2681 print A, "is more than twice", B, "?", double
2684 print "double", A, "is only", double
2693 print "GCD of", A, "and", B,"is", a
2695 print a, "is not positive, cannot calculate GCD"
2697 print b, "is not positive, cannot calculate GCD"
2702 print "Fibonacci:", f1,f2,
2703 then togo = togo - 1
2711 /* Binary search... */
2716 mid := (lo + hi) / 2
2728 print "Yay, I found", target
2730 print "Closest I found was", mid