1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possible with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
213 for (ss = s; ss; ss = ss->next) {
214 struct text sec = ss->section;
215 if (sec.len == strlen(section) &&
216 strncmp(sec.txt, section, sec.len) == 0)
220 prog = parse_oceani(ss->code, &context.config,
221 dotrace ? stderr : NULL);
223 fprintf(stderr, "oceani: cannot find section %s\n",
228 prog = parse_oceani(s->code, &context.config,
229 dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: fatal parser error.\n");
232 context.parse_error = 1;
235 print_exec(*prog, 0, brackets);
236 if (prog && doexec && !context.parse_error) {
237 if (!analyse_prog(*prog, &context)) {
238 fprintf(stderr, "oceani: type error in program - not running.\n");
241 interp_prog(*prog, argv+optind+1);
248 struct section *t = s->next;
254 exit(context.parse_error ? 1 : 0);
259 These four requirements of parse, analyse, print, interpret apply to
260 each language element individually so that is how most of the code
263 Three of the four are fairly self explanatory. The one that requires
264 a little explanation is the analysis step.
266 The current language design does not require (or even allow) the types
267 of variables to be declared, but they must still have a single type.
268 Different operations impose different requirements on the variables,
269 for example addition requires both arguments to be numeric, and
270 assignment requires the variable on the left to have the same type as
271 the expression on the right.
273 Analysis involves propagating these type requirements around and
274 consequently setting the type of each variable. If any requirements
275 are violated (e.g. a string is compared with a number) or if a
276 variable needs to have two different types, then an error is raised
277 and the program will not run.
279 If the same variable is declared in both branchs of an 'if/else', or
280 in all cases of a 'switch' then the multiple instances may be merged
281 into just one variable if the variable is references after the
282 conditional statement. When this happens, the types must naturally be
283 consistent across all the branches. When the variable is not used
284 outside the if, the variables in the different branches are distinct
285 and can be of different types.
287 Determining the types of all variables early is important for
288 processing command line arguments. These can be assigned to any type
289 of variable, but we must first know the correct type so any required
290 conversion can happen. If a variable is associated with a command
291 line argument but no type can be interpreted (e.g. the variable is
292 only ever used in a `print` statement), then the type is set to
295 Undeclared names may only appear in "use" statements and "case" expressions.
296 These names are given a type of "label" and a unique value.
297 This allows them to fill the role of a name in an enumerated type, which
298 is useful for testing the `switch` statement.
300 As we will see, the condition part of a `while` statement can return
301 either a Boolean or some other type. This requires that the expect
302 type that gets passed around comprises a type (`enum vtype`) and a
303 flag to indicate that `Vbool` is also permitted.
305 As there are, as yet, no distinct types that are compatible, there
306 isn't much subtlety in the analysis. When we have distinct number
307 types, this will become more interesting.
311 When analysis discovers an inconsistency it needs to report an error;
312 just refusing to run the code esure that the error doesn't cascade,
313 but by itself it isn't very useful. A clear understand of the sort of
314 error message that are useful will help guide the process of analysis.
316 At a simplistic level, the only sort of error that type analysis can
317 report is that the type of some construct doesn't match a contextual
318 requirement. For example, in `4 + "hello"` the addition provides a
319 contextual requirement for numbers, but `"hello"` is not a number. In
320 this particular example no further information is needed as the types
321 are obvious from local information. When a variable is involved that
322 isn't the case. It may be helpful to explain why the variable has a
323 particular type, by indicating the location where the type was set,
324 whether by declaration or usage.
326 Using a recursive-descent analysis we can easily detect a problem at
327 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
328 will detect that one argument is not a number and the usage of `hello`
329 will detect that a number was wanted, but not provided. In this
330 (early) version of the language, we will generate error reports at
331 multiple locations, to the use of `hello` will report an error and
332 explain were the value was set, and the addition will report an error
333 and say why numbers are needed. To be able to report locations for
334 errors, each language element will need to record a file location
335 (line and column) and each variable will need to record the language
336 element where its type was set. For now we will assume that each line
337 of an error message indicates one location in the file, and up to 2
338 types. So we provide a `printf`-like function which takes a format, a
339 language (a `struct exec` which has not yet been introduced), and 2
340 types. "`$1`" reports the first type, "`$2`" reports the second. We
341 will need a function to print the location, once we know how that is
346 static void fput_loc(struct exec *loc, FILE *f);
348 ###### core functions
350 static void type_err(struct parse_context *c,
351 char *fmt, struct exec *loc,
352 enum vtype t1, enum vtype t2)
354 fprintf(stderr, "%s:", c->file_name);
355 fput_loc(loc, stderr);
356 for (; *fmt ; fmt++) {
363 case '%': fputc(*fmt, stderr); break;
364 default: fputc('?', stderr); break;
366 fputs(vtype_names[t1], stderr);
369 fputs(vtype_names[t2], stderr);
378 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
380 fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
386 One last introductory step before detailing the language elements and
387 providing their four requirements is to establish the data structures
388 to store these elements.
390 There are two key objects that we need to work with: executable
391 elements which comprise the program, and values which the program
392 works with. Between these are the variables in their various scopes
393 which hold the values.
397 Values can be numbers, which we represent as multi-precision
398 fractions, strings, Booleans and labels. When analysing the program
399 we also need to allow for places where no value is meaningful
400 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`
401 which can be anything and `Vnolabel` which can be anything except a
402 label). A 2 character 'tail' is included in each value as the scanner
403 wants to parse that from the end of numbers and we need somewhere to
404 put it. It is currently ignored but one day might allow for
405 e.g. "imaginary" numbers.
407 Values are never shared, they are always copied when used, and freed
408 when no longer needed.
410 When propagating type information around the program, we need to
411 determine if two types are compatible, where `Vunknown` is compatible
412 which anything, and `Vnolabel` is compatible with anything except a
413 label. A separate funtion to encode this rule will simplify some code
416 When assigning command line arguments to variable, we need to be able
417 to parse each type from a string.
425 myLDLIBS := libnumber.o libstring.o -lgmp
426 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
430 enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
440 char *vtype_names[] = {"nolabel", "unknown", "none", "string",
441 "number", "Boolean", "label"};
444 static void free_value(struct value v)
449 case Vunknown: break;
450 case Vstr: free(v.str.txt); break;
451 case Vnum: mpq_clear(v.num); break;
457 static int vtype_compat(enum vtype require, enum vtype have, int bool_permitted)
459 if (bool_permitted && have == Vbool)
463 return have != Vlabel;
467 return have == Vunknown || require == have;
471 ###### value functions
473 static void val_init(struct value *val, enum vtype type)
479 case Vunknown: break;
481 mpq_init(val->num); break;
483 val->str.txt = malloc(1);
495 static struct value dup_value(struct value v)
502 case Vunknown: break;
511 mpq_set(rv.num, v.num);
514 rv.str.len = v.str.len;
515 rv.str.txt = malloc(rv.str.len);
516 memcpy(rv.str.txt, v.str.txt, v.str.len);
522 static int value_cmp(struct value left, struct value right)
525 if (left.vtype != right.vtype)
526 return left.vtype - right.vtype;
527 switch (left.vtype) {
528 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
529 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
530 case Vstr: cmp = text_cmp(left.str, right.str); break;
531 case Vbool: cmp = left.bool - right.bool; break;
534 case Vunknown: cmp = 0;
539 static struct text text_join(struct text a, struct text b)
542 rv.len = a.len + b.len;
543 rv.txt = malloc(rv.len);
544 memcpy(rv.txt, a.txt, a.len);
545 memcpy(rv.txt+a.len, b.txt, b.len);
549 static void print_value(struct value v)
553 printf("*Unknown*"); break;
556 printf("*no-value*"); break;
558 printf("*label-%p*", v.label); break;
560 printf("%.*s", v.str.len, v.str.txt); break;
562 printf("%s", v.bool ? "True":"False"); break;
567 mpf_set_q(fl, v.num);
568 gmp_printf("%Fg", fl);
575 static int parse_value(struct value *vl, char *arg)
586 vl->str.len = strlen(arg);
587 vl->str.txt = malloc(vl->str.len);
588 memcpy(vl->str.txt, arg, vl->str.len);
595 tx.txt = arg; tx.len = strlen(tx.txt);
596 if (number_parse(vl->num, vl->tail, tx) == 0)
599 mpq_neg(vl->num, vl->num);
602 if (strcasecmp(arg, "true") == 0 ||
603 strcmp(arg, "1") == 0)
605 else if (strcasecmp(arg, "false") == 0 ||
606 strcmp(arg, "0") == 0)
609 printf("Bad bool: %s\n", arg);
619 Variables are scoped named values. We store the names in a linked
620 list of "bindings" sorted lexically, and use sequential search and
627 struct binding *next; // in lexical order
631 This linked list is stored in the parse context so that "reduce"
632 functions can find or add variables, and so the analysis phase can
633 ensure that every variable gets a type.
637 struct binding *varlist; // In lexical order
641 static struct binding *find_binding(struct parse_context *c, struct text s)
643 struct binding **l = &c->varlist;
648 (cmp = text_cmp((*l)->name, s)) < 0)
652 n = calloc(1, sizeof(*n));
659 Each name can be linked to multiple variables defined in different
660 scopes. Each scope starts where the name is declared and continues
661 until the end of the containing code block. Scopes of a given name
662 cannot nest, so a declaration while a name is in-scope is an error.
664 ###### binding fields
665 struct variable *var;
669 struct variable *previous;
671 struct binding *name;
672 struct exec *where_decl;// where name was declared
673 struct exec *where_set; // where type was set
677 While the naming seems strange, we include local constants in the
678 definition of variables. A name declared `var := value` can
679 subsequently be changed, but a name declared `var ::= value` cannot -
682 ###### variable fields
685 Scopes in parallel branches can be partially merged. More
686 specifically, if a given name is declared in both branches of an
687 if/else then it's scope is a candidate for merging. Similarly if
688 every branch of an exhaustive switch (e.g. has an "else" clause)
689 declares a given name, then the scopes from the branches are
690 candidates for merging.
692 Note that names declared inside a loop (which is only parallel to
693 itself) are never visible after the loop. Similarly names defined in
694 scopes which are not parallel, such as those started by `for` and
695 `switch`, are never visible after the scope. Only variable defined in
696 both `then` and `else` (including the implicit then after an `if`, and
697 excluding `then` used with `for`) and in all `case`s and `else` of a
698 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
700 Labels, which are a bit like variables, follow different rules.
701 Labels are not explicitly declared, but if an undeclared name appears
702 in a context where a label is legal, that effectively declares the
703 name as a label. The declaration remains in force (or in scope) at
704 least to the end of the immediately containing block and conditionally
705 in any larger containing block which does not declare the name in some
706 other way. Importantly, the conditional scope extension happens even
707 if the label is only used in parallel branch of a conditional -- when
708 used in one branch it is treated as having been declared in all
711 Merge candidates are tentatively visible beyond the end of the
712 branching statement which creates them. If the name is used, the
713 merge is affirmed and they become a single variable visible at the
714 outer layer. If not - if it is redeclared first - the merge lapses.
716 To track scopes we have an extra stack, implemented as a linked list,
717 which roughly parallels the parse stack and which is used exclusively
718 for scoping. When a new scope is opened, a new frame is pushed and
719 the child-count of the parent frame is incremented. This child-count
720 is used to distinguish between the first of a set of parallel scopes,
721 in which declared variables must not be in scope, and subsequent
722 branches, whether they must already be conditionally scoped.
724 To push a new frame *before* any code in the frame is parsed, we need a
725 grammar reduction. This is most easily achieved with a grammar
726 element which derives the empty string, and created the new scope when
727 it is recognized. This can be placed, for example, between a keyword
728 like "if" and the code following it.
732 struct scope *parent;
738 struct scope *scope_stack;
741 static void scope_pop(struct parse_context *c)
743 struct scope *s = c->scope_stack;
745 c->scope_stack = s->parent;
750 static void scope_push(struct parse_context *c)
752 struct scope *s = calloc(1, sizeof(*s));
754 c->scope_stack->child_count += 1;
755 s->parent = c->scope_stack;
763 OpenScope -> ${ scope_push(config2context(config)); }$
766 Each variable records a scope depth and is in one of four states:
768 - "in scope". This is the case between the declaration of the
769 variable and the end of the containing block, and also between
770 the usage with affirms a merge and the end of the block.
772 The scope depth is not greater than the current parse context scope
773 nest depth. When the block of that depth closes, the state will
774 change. To achieve this, all "in scope" variables are linked
775 together as a stack in nesting order.
777 - "pending". The "in scope" block has closed, but other parallel
778 scopes are still being processed. So far, every parallel block at
779 the same level that has closed has declared the name.
781 The scope depth is the depth of the last parallel block that
782 enclosed the declaration, and that has closed.
784 - "conditionally in scope". The "in scope" block and all parallel
785 scopes have closed, and no further mention of the name has been
786 seen. This state includes a secondary nest depth which records the
787 outermost scope seen since the variable became conditionally in
788 scope. If a use of the name is found, the variable becomes "in
789 scope" and that secondary depth becomes the recorded scope depth.
790 If the name is declared as a new variable, the old variable becomes
791 "out of scope" and the recorded scope depth stays unchanged.
793 - "out of scope". The variable is neither in scope nor conditionally
794 in scope. It is permanently out of scope now and can be removed from
795 the "in scope" stack.
798 ###### variable fields
799 int depth, min_depth;
800 enum { OutScope, PendingScope, CondScope, InScope } scope;
801 struct variable *in_scope;
805 struct variable *in_scope;
807 All variables with the same name are linked together using the
808 'previous' link. Those variable that have
809 been affirmatively merged all have a 'merged' pointer that points to
810 one primary variable - the most recently declared instance. When
811 merging variables, we need to also adjust the 'merged' pointer on any
812 other variables that had previously been merged with the one that will
813 no longer be primary.
815 ###### variable fields
816 struct variable *merged;
820 static void variable_merge(struct variable *primary, struct variable *secondary)
826 primary = primary->merged;
828 for (v = primary->previous; v; v=v->previous)
829 if (v == secondary || v == secondary->merged ||
830 v->merged == secondary ||
831 (v->merged && v->merged == secondary->merged)) {
839 while (context.varlist) {
840 struct binding *b = context.varlist;
841 struct variable *v = b->var;
842 context.varlist = b->next;
845 struct variable *t = v;
853 #### Manipulating Bindings
855 When a name is conditionally visible, a new declaration discards the
856 old binding - the condition lapses. Conversely a usage of the name
857 affirms the visibility and extends it to the end of the containing
858 block - i.e. the block that contains both the original declaration and
859 the latest usage. This is determined from `min_depth`. When a
860 conditionally visible variable gets affirmed like this, it is also
861 merged with other conditionally visible variables with the same name.
863 When we parse a variable declaration we either signal an error if the
864 name is currently bound, or create a new variable at the current nest
865 depth if the name is unbound or bound to a conditionally scoped or
866 pending-scope variable. If the previous variable was conditionally
867 scoped, it and its homonyms becomes out-of-scope.
869 When we parse a variable reference (including non-declarative
870 assignment) we signal an error if the name is not bound or is bound to
871 a pending-scope variable; update the scope if the name is bound to a
872 conditionally scoped variable; or just proceed normally if the named
873 variable is in scope.
875 When we exit a scope, any variables bound at this level are either
876 marked out of scope or pending-scoped, depending on whether the
877 scope was sequential or parallel.
879 When exiting a parallel scope we check if there are any variables that
880 were previously pending and are still visible. If there are, then
881 there weren't redeclared in the most recent scope, so they cannot be
882 merged and must become out-of-scope. If it is not the first of
883 parallel scopes (based on `child_count`), we check that there was a
884 previous binding that is still pending-scope. If there isn't, the new
885 variable must now be out-of-scope.
887 When exiting a sequential scope that immediately enclosed parallel
888 scopes, we need to resolve any pending-scope variables. If there was
889 no `else` clause, and we cannot determine that the `switch` was exhaustive,
890 we need to mark all pending-scope variable as out-of-scope. Otherwise
891 all pending-scope variables become conditionally scoped.
894 enum closetype { CloseSequential, CloseParallel, CloseElse };
898 static struct variable *var_decl(struct parse_context *c, struct text s)
900 struct binding *b = find_binding(c, s);
901 struct variable *v = b->var;
903 switch (v ? v->scope : OutScope) {
905 /* Caller will report the error */
909 v && v->scope == CondScope;
915 v = calloc(1, sizeof(*v));
916 v->previous = b->var;
919 v->min_depth = v->depth = c->scope_depth;
921 v->in_scope = c->in_scope;
923 val_init(&v->val, Vunknown);
927 static struct variable *var_ref(struct parse_context *c, struct text s)
929 struct binding *b = find_binding(c, s);
930 struct variable *v = b->var;
933 switch (v ? v->scope : OutScope) {
936 /* Signal an error - once that is possible */
939 /* All CondScope variables of this name need to be merged
942 v->depth = v->min_depth;
944 for (v2 = v->previous;
945 v2 && v2->scope == CondScope;
947 variable_merge(v, v2);
955 static void var_block_close(struct parse_context *c, enum closetype ct)
957 /* close of all variables that are in_scope */
958 struct variable *v, **vp, *v2;
961 for (vp = &c->in_scope;
962 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
966 case CloseParallel: /* handle PendingScope */
970 if (c->scope_stack->child_count == 1)
971 v->scope = PendingScope;
972 else if (v->previous &&
973 v->previous->scope == PendingScope)
974 v->scope = PendingScope;
975 else if (v->val.vtype == Vlabel)
976 v->scope = PendingScope;
977 else if (v->name->var == v)
979 if (ct == CloseElse) {
980 /* All Pending variables with this name
981 * are now Conditional */
983 v2 && v2->scope == PendingScope;
985 v2->scope = CondScope;
990 v2 && v2->scope == PendingScope;
992 if (v2->val.vtype != Vlabel)
993 v2->scope = OutScope;
995 case OutScope: break;
998 case CloseSequential:
999 if (v->val.vtype == Vlabel)
1000 v->scope = PendingScope;
1003 v->scope = OutScope;
1006 /* There was no 'else', so we can only become
1007 * conditional if we know the cases were exhaustive,
1008 * and that doesn't mean anything yet.
1009 * So only labels become conditional..
1012 v2 && v2->scope == PendingScope;
1014 if (v2->val.vtype == Vlabel) {
1015 v2->scope = CondScope;
1016 v2->min_depth = c->scope_depth;
1018 v2->scope = OutScope;
1021 case OutScope: break;
1025 if (v->scope == OutScope)
1034 Executables can be lots of different things. In many cases an
1035 executable is just an operation combined with one or two other
1036 executables. This allows for expressions and lists etc. Other times
1037 an executable is something quite specific like a constant or variable
1038 name. So we define a `struct exec` to be a general executable with a
1039 type, and a `struct binode` which is a subclass of `exec` and forms a
1040 node in a binary tree and holding an operation. There will be other
1041 subclasses, and to access these we need to be able to `cast` the
1042 `exec` into the various other types.
1045 #define cast(structname, pointer) ({ \
1046 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1047 if (__mptr && *__mptr != X##structname) abort(); \
1048 (struct structname *)( (char *)__mptr);})
1050 #define new(structname) ({ \
1051 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1052 __ptr->type = X##structname; \
1053 __ptr->line = -1; __ptr->column = -1; \
1056 #define new_pos(structname, token) ({ \
1057 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1058 __ptr->type = X##structname; \
1059 __ptr->line = token.line; __ptr->column = token.col; \
1068 enum exec_types type;
1076 struct exec *left, *right;
1079 ###### ast functions
1081 static int __fput_loc(struct exec *loc, FILE *f)
1083 if (loc->line >= 0) {
1084 fprintf(f, "%d:%d: ", loc->line, loc->column);
1087 if (loc->type == Xbinode)
1088 return __fput_loc(cast(binode,loc)->left, f) ||
1089 __fput_loc(cast(binode,loc)->right, f);
1092 static void fput_loc(struct exec *loc, FILE *f)
1094 if (!__fput_loc(loc, f))
1095 fprintf(f, "??:??: ");
1098 Each different type of `exec` node needs a number of functions
1099 defined, a bit like methods. We must be able to be able to free it,
1100 print it, analyse it and execute it. Once we have specific `exec`
1101 types we will need to parse them too. Let's take this a bit more
1106 The parser generator requires a `free_foo` function for each struct
1107 that stores attributes and they will be `exec`s of subtypes there-of.
1108 So we need `free_exec` which can handle all the subtypes, and we need
1111 ###### ast functions
1113 static void free_binode(struct binode *b)
1118 free_exec(b->right);
1122 ###### core functions
1123 static void free_exec(struct exec *e)
1132 ###### forward decls
1134 static void free_exec(struct exec *e);
1136 ###### free exec cases
1137 case Xbinode: free_binode(cast(binode, e)); break;
1141 Printing an `exec` requires that we know the current indent level for
1142 printing line-oriented components. As will become clear later, we
1143 also want to know what sort of bracketing to use.
1145 ###### ast functions
1147 static void do_indent(int i, char *str)
1154 ###### core functions
1155 static void print_binode(struct binode *b, int indent, int bracket)
1159 ## print binode cases
1163 static void print_exec(struct exec *e, int indent, int bracket)
1169 print_binode(cast(binode, e), indent, bracket); break;
1174 ###### forward decls
1176 static void print_exec(struct exec *e, int indent, int bracket);
1180 As discussed, analysis involves propagating type requirements around
1181 the program and looking for errors.
1183 So `propagate_types` is passed an expected type (being a `vtype`
1184 together with a `bool_permitted` flag) that the `exec` is expected to
1185 return, and returns the type that it does return, either of which can
1186 be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
1187 when an error is found, and `2` when any change is made. If it
1188 remains unchanged at `1`, then no more propagation is needed.
1190 ###### core functions
1192 static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1193 enum vtype type, int bool_permitted)
1200 switch (prog->type) {
1203 struct binode *b = cast(binode, prog);
1205 ## propagate binode cases
1209 ## propagate exec cases
1216 Interpreting an `exec` doesn't require anything but the `exec`. State
1217 is stored in variables and each variable will be directly linked from
1218 within the `exec` tree. The exception to this is the whole `program`
1219 which needs to look at command line arguments. The `program` will be
1220 interpreted separately.
1222 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1224 ###### core functions
1226 static struct value interp_exec(struct exec *e)
1236 struct binode *b = cast(binode, e);
1237 struct value left, right;
1238 left.vtype = right.vtype = Vnone;
1240 ## interp binode cases
1242 free_value(left); free_value(right);
1245 ## interp exec cases
1250 ## Language elements
1252 Each language element needs to be parsed, printed, analysed,
1253 interpreted, and freed. There are several, so let's just start with
1254 the easy ones and work our way up.
1258 We have already met values as separate objects. When manifest
1259 constants appear in the program text that must result in an executable
1260 which has a constant value. So the `val` structure embeds a value in
1276 $0 = new_pos(val, $1);
1277 $0->val.vtype = Vbool;
1281 $0 = new_pos(val, $1);
1282 $0->val.vtype = Vbool;
1286 $0 = new_pos(val, $1);
1287 $0->val.vtype = Vnum;
1288 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
1289 mpq_init($0->val.num);
1290 if ($0->val.tail[0])
1291 tok_err(config2context(config), "error: unsupported number suffix.",
1295 $0 = new_pos(val, $1);
1296 $0->val.vtype = Vstr;
1297 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1298 if ($0->val.tail[0])
1299 tok_err(config2context(config), "error: unsupported string suffix.",
1303 $0 = new_pos(val, $1);
1304 $0->val.vtype = Vstr;
1305 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1306 if ($0->val.tail[0])
1307 tok_err(config2context(config), "error: unsupported string suffix.",
1311 ###### print exec cases
1314 struct val *v = cast(val, e);
1315 if (v->val.vtype == Vstr)
1317 print_value(v->val);
1318 if (v->val.vtype == Vstr)
1323 ###### propagate exec cases
1326 struct val *val = cast(val, prog);
1327 if (!vtype_compat(type, val->val.vtype, bool_permitted)) {
1328 type_err(c, "error: expected %1 found %2",
1329 prog, type, val->val.vtype);
1332 return val->val.vtype;
1335 ###### interp exec cases
1337 return dup_value(cast(val, e)->val);
1339 ###### ast functions
1340 static void free_val(struct val *v)
1348 ###### free exec cases
1349 case Xval: free_val(cast(val, e)); break;
1351 ###### ast functions
1352 // Move all nodes from 'b' to 'rv', reversing the order.
1353 // In 'b' 'left' is a list, and 'right' is the last node.
1354 // In 'rv', left' is the first node and 'right' is a list.
1355 static struct binode *reorder_bilist(struct binode *b)
1357 struct binode *rv = NULL;
1360 struct exec *t = b->right;
1364 b = cast(binode, b->left);
1374 Just as we used as `val` to wrap a value into an `exec`, we similarly
1375 need a `var` to wrap a `variable` into an exec. While each `val`
1376 contained a copy of the value, each `var` hold a link to the variable
1377 because it really is the same variable no matter where it appears.
1378 When a variable is used, we need to remember to follow the `->merged`
1379 link to find the primary instance.
1387 struct variable *var;
1393 VariableDecl -> IDENTIFIER := ${ {
1394 struct variable *v = var_decl(config2context(config), $1.txt);
1395 $0 = new_pos(var, $1);
1400 v = var_ref(config2context(config), $1.txt);
1402 type_err(config2context(config), "error: variable '%v' redeclared",
1404 type_err(config2context(config), "info: this is where '%v' was first declared",
1405 v->where_decl, Vnone, Vnone);
1408 | IDENTIFIER ::= ${ {
1409 struct variable *v = var_decl(config2context(config), $1.txt);
1410 $0 = new_pos(var, $1);
1416 v = var_ref(config2context(config), $1.txt);
1418 type_err(config2context(config), "error: variable '%v' redeclared",
1420 type_err(config2context(config), "info: this is where '%v' was first declared",
1421 v->where_decl, Vnone, Vnone);
1425 Variable -> IDENTIFIER ${ {
1426 struct variable *v = var_ref(config2context(config), $1.txt);
1427 $0 = new_pos(var, $1);
1429 /* This might be a label - allocate a var just in case */
1430 v = var_decl(config2context(config), $1.txt);
1432 val_init(&v->val, Vlabel);
1439 ###### print exec cases
1442 struct var *v = cast(var, e);
1444 struct binding *b = v->var->name;
1445 printf("%.*s", b->name.len, b->name.txt);
1452 if (loc->type == Xvar) {
1453 struct var *v = cast(var, loc);
1455 struct binding *b = v->var->name;
1456 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1458 fputs("???", stderr);
1460 fputs("NOTVAR", stderr);
1463 ###### propagate exec cases
1467 struct var *var = cast(var, prog);
1468 struct variable *v = var->var;
1470 type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone);
1476 if (v->val.vtype == Vunknown) {
1477 if (type > Vunknown && *ok != 0) {
1478 val_init(&v->val, type);
1479 v->where_set = prog;
1484 if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
1485 type_err(c, "error: expected %1 but variable '%v' is %2", prog,
1486 type, v->val.vtype);
1487 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1488 v->val.vtype, Vnone);
1491 if (type <= Vunknown)
1492 return v->val.vtype;
1496 ###### interp exec cases
1499 struct var *var = cast(var, e);
1500 struct variable *v = var->var;
1504 return dup_value(v->val);
1507 ###### ast functions
1509 static void free_var(struct var *v)
1514 ###### free exec cases
1515 case Xvar: free_var(cast(var, e)); break;
1517 ### Expressions: Boolean
1519 Our first user of the `binode` will be expressions, and particularly
1520 Boolean expressions. As I haven't implemented precedence in the
1521 parser generator yet, we need different names from each precedence
1522 level used by expressions. The outer most or lowest level precedence
1523 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1534 Expression -> Expression or BTerm ${ {
1535 struct binode *b = new(binode);
1541 | BTerm ${ $0 = $<1; }$
1543 BTerm -> BTerm and BFact ${ {
1544 struct binode *b = new(binode);
1550 | BFact ${ $0 = $<1; }$
1552 BFact -> not BFact ${ {
1553 struct binode *b = new(binode);
1560 ###### print binode cases
1562 print_exec(b->left, -1, 0);
1564 print_exec(b->right, -1, 0);
1567 print_exec(b->left, -1, 0);
1569 print_exec(b->right, -1, 0);
1573 print_exec(b->right, -1, 0);
1576 ###### propagate binode cases
1580 /* both must be Vbool, result is Vbool */
1581 propagate_types(b->left, c, ok, Vbool, 0);
1582 propagate_types(b->right, c, ok, Vbool, 0);
1583 if (type != Vbool && type > Vunknown) {
1584 type_err(c, "error: %1 operation found where %2 expected", prog,
1590 ###### interp binode cases
1592 rv = interp_exec(b->left);
1593 right = interp_exec(b->right);
1594 rv.bool = rv.bool && right.bool;
1597 rv = interp_exec(b->left);
1598 right = interp_exec(b->right);
1599 rv.bool = rv.bool || right.bool;
1602 rv = interp_exec(b->right);
1606 ### Expressions: Comparison
1608 Of slightly higher precedence that Boolean expressions are
1610 A comparison takes arguments of any type, but the two types must be
1613 To simplify the parsing we introduce an `eop` which can return an
1614 expression operator.
1621 ###### ast functions
1622 static void free_eop(struct eop *e)
1637 | Expr CMPop Expr ${ {
1638 struct binode *b = new(binode);
1644 | Expr ${ $0 = $<1; }$
1649 CMPop -> < ${ $0.op = Less; }$
1650 | > ${ $0.op = Gtr; }$
1651 | <= ${ $0.op = LessEq; }$
1652 | >= ${ $0.op = GtrEq; }$
1653 | == ${ $0.op = Eql; }$
1654 | != ${ $0.op = NEql; }$
1656 ###### print binode cases
1664 print_exec(b->left, -1, 0);
1666 case Less: printf(" < "); break;
1667 case LessEq: printf(" <= "); break;
1668 case Gtr: printf(" > "); break;
1669 case GtrEq: printf(" >= "); break;
1670 case Eql: printf(" == "); break;
1671 case NEql: printf(" != "); break;
1674 print_exec(b->right, -1, 0);
1677 ###### propagate binode cases
1684 /* Both must match but not labels, result is Vbool */
1685 t = propagate_types(b->left, c, ok, Vnolabel, 0);
1687 propagate_types(b->right, c, ok, t, 0);
1689 t = propagate_types(b->right, c, ok, Vnolabel, 0);
1691 t = propagate_types(b->left, c, ok, t, 0);
1693 if (!vtype_compat(type, Vbool, 0)) {
1694 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1700 ###### interp binode cases
1709 left = interp_exec(b->left);
1710 right = interp_exec(b->right);
1711 cmp = value_cmp(left, right);
1714 case Less: rv.bool = cmp < 0; break;
1715 case LessEq: rv.bool = cmp <= 0; break;
1716 case Gtr: rv.bool = cmp > 0; break;
1717 case GtrEq: rv.bool = cmp >= 0; break;
1718 case Eql: rv.bool = cmp == 0; break;
1719 case NEql: rv.bool = cmp != 0; break;
1720 default: rv.bool = 0; break;
1725 ### Expressions: The rest
1727 The remaining expressions with the highest precedence are arithmetic
1728 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1729 The `Factor` is where the `Value` and `Variable` that we already have
1732 `+` and `-` are both infix and prefix operations (where they are
1733 absolute value and negation). These have different operator names.
1735 We also have a 'Bracket' operator which records where parentheses were
1736 found. This make it easy to reproduce these when printing. Once
1737 precedence is handled better I might be able to discard this.
1749 Expr -> Expr Eop Term ${ {
1750 struct binode *b = new(binode);
1756 | Term ${ $0 = $<1; }$
1758 Term -> Term Top Factor ${ {
1759 struct binode *b = new(binode);
1765 | Factor ${ $0 = $<1; }$
1767 Factor -> ( Expression ) ${ {
1768 struct binode *b = new_pos(binode, $1);
1774 struct binode *b = new(binode);
1779 | Value ${ $0 = $<1; }$
1780 | Variable ${ $0 = $<1; }$
1783 Eop -> + ${ $0.op = Plus; }$
1784 | - ${ $0.op = Minus; }$
1786 Uop -> + ${ $0.op = Absolute; }$
1787 | - ${ $0.op = Negate; }$
1789 Top -> * ${ $0.op = Times; }$
1790 | / ${ $0.op = Divide; }$
1791 | ++ ${ $0.op = Concat; }$
1793 ###### print binode cases
1799 print_exec(b->left, indent, 0);
1801 case Plus: printf(" + "); break;
1802 case Minus: printf(" - "); break;
1803 case Times: printf(" * "); break;
1804 case Divide: printf(" / "); break;
1805 case Concat: printf(" ++ "); break;
1808 print_exec(b->right, indent, 0);
1812 print_exec(b->right, indent, 0);
1816 print_exec(b->right, indent, 0);
1820 print_exec(b->right, indent, 0);
1824 ###### propagate binode cases
1829 /* both must be numbers, result is Vnum */
1832 /* as propagate_types ignores a NULL,
1833 * unary ops fit here too */
1834 propagate_types(b->left, c, ok, Vnum, 0);
1835 propagate_types(b->right, c, ok, Vnum, 0);
1836 if (!vtype_compat(type, Vnum, 0)) {
1837 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
1844 /* both must be Vstr, result is Vstr */
1845 propagate_types(b->left, c, ok, Vstr, 0);
1846 propagate_types(b->right, c, ok, Vstr, 0);
1847 if (!vtype_compat(type, Vstr, 0)) {
1848 type_err(c, "error: Concat returns %1 but %2 expected", prog,
1855 return propagate_types(b->right, c, ok, type, 0);
1857 ###### interp binode cases
1860 rv = interp_exec(b->left);
1861 right = interp_exec(b->right);
1862 mpq_add(rv.num, rv.num, right.num);
1865 rv = interp_exec(b->left);
1866 right = interp_exec(b->right);
1867 mpq_sub(rv.num, rv.num, right.num);
1870 rv = interp_exec(b->left);
1871 right = interp_exec(b->right);
1872 mpq_mul(rv.num, rv.num, right.num);
1875 rv = interp_exec(b->left);
1876 right = interp_exec(b->right);
1877 mpq_div(rv.num, rv.num, right.num);
1880 rv = interp_exec(b->right);
1881 mpq_neg(rv.num, rv.num);
1884 rv = interp_exec(b->right);
1885 mpq_abs(rv.num, rv.num);
1888 rv = interp_exec(b->right);
1891 left = interp_exec(b->left);
1892 right = interp_exec(b->right);
1894 rv.str = text_join(left.str, right.str);
1897 ### Blocks, Statements, and Statement lists.
1899 Now that we have expressions out of the way we need to turn to
1900 statements. There are simple statements and more complex statements.
1901 Simple statements do not contain newlines, complex statements do.
1903 Statements often come in sequences and we have corresponding simple
1904 statement lists and complex statement lists.
1905 The former comprise only simple statements separated by semicolons.
1906 The later comprise complex statements and simple statement lists. They are
1907 separated by newlines. Thus the semicolon is only used to separate
1908 simple statements on the one line. This may be overly restrictive,
1909 but I'm not sure I every want a complex statement to share a line with
1912 Note that a simple statement list can still use multiple lines if
1913 subsequent lines are indented, so
1915 ###### Example: wrapped simple statement list
1920 is a single simple statement list. This might allow room for
1921 confusion, so I'm not set on it yet.
1923 A simple statement list needs no extra syntax. A complex statement
1924 list has two syntactic forms. It can be enclosed in braces (much like
1925 C blocks), or it can be introduced by a colon and continue until an
1926 unindented newline (much like Python blocks). With this extra syntax
1927 it is referred to as a block.
1929 Note that a block does not have to include any newlines if it only
1930 contains simple statements. So both of:
1932 if condition: a=b; d=f
1934 if condition { a=b; print f }
1938 In either case the list is constructed from a `binode` list with
1939 `Block` as the operator. When parsing the list it is most convenient
1940 to append to the end, so a list is a list and a statement. When using
1941 the list it is more convenient to consider a list to be a statement
1942 and a list. So we need a function to re-order a list.
1943 `reorder_bilist` serves this purpose.
1945 The only stand-alone statement we introduce at this stage is `pass`
1946 which does nothing and is represented as a `NULL` pointer in a `Block`
1966 Block -> Open Statementlist Close ${ $0 = $<2; }$
1967 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1968 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1969 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1970 | : Statementlist ${ $0 = $<2; }$
1971 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1973 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1975 ComplexStatements -> ComplexStatements ComplexStatement ${
1981 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1982 | ComplexStatement ${
1990 ComplexStatement -> SimpleStatements NEWLINE ${
1991 $0 = reorder_bilist($<1);
1993 ## ComplexStatement Grammar
1996 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2002 | SimpleStatement ${
2008 | SimpleStatements ; ${ $0 = $<1; }$
2010 SimpleStatement -> pass ${ $0 = NULL; }$
2011 ## SimpleStatement Grammar
2013 ###### print binode cases
2017 if (b->left == NULL)
2020 print_exec(b->left, indent, 0);
2023 print_exec(b->right, indent, 0);
2026 // block, one per line
2027 if (b->left == NULL)
2028 do_indent(indent, "pass\n");
2030 print_exec(b->left, indent, bracket);
2032 print_exec(b->right, indent, bracket);
2036 ###### propagate binode cases
2039 /* If any statement returns something other then Vnone
2040 * or Vbool then all such must return same type.
2041 * As each statement may be Vnone or something else,
2042 * we must always pass Vunknown down, otherwise an incorrect
2043 * error might occur. We never return Vnone unless it is
2048 for (e = b; e; e = cast(binode, e->right)) {
2049 t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
2050 if (bool_permitted && t == Vbool)
2052 if (t != Vunknown && t != Vnone && t != Vbool) {
2053 if (type == Vunknown)
2055 else if (t != type) {
2056 type_err(c, "error: expected %1, found %2",
2065 ###### interp binode cases
2067 while (rv.vtype == Vnone &&
2070 rv = interp_exec(b->left);
2071 b = cast(binode, b->right);
2075 ### The Print statement
2077 `print` is a simple statement that takes a comma-separated list of
2078 expressions and prints the values separated by spaces and terminated
2079 by a newline. No control of formatting is possible.
2081 `print` faces the same list-ordering issue as blocks, and uses the
2087 ###### SimpleStatement Grammar
2089 | print ExpressionList ${
2090 $0 = reorder_bilist($<2);
2092 | print ExpressionList , ${
2097 $0 = reorder_bilist($0);
2108 ExpressionList -> ExpressionList , Expression ${
2121 ###### print binode cases
2124 do_indent(indent, "print");
2128 print_exec(b->left, -1, 0);
2132 b = cast(binode, b->right);
2138 ###### propagate binode cases
2141 /* don't care but all must be consistent */
2142 propagate_types(b->left, c, ok, Vnolabel, 0);
2143 propagate_types(b->right, c, ok, Vnolabel, 0);
2146 ###### interp binode cases
2152 for ( ; b; b = cast(binode, b->right))
2156 left = interp_exec(b->left);
2169 ###### Assignment statement
2171 An assignment will assign a value to a variable, providing it hasn't
2172 be declared as a constant. The analysis phase ensures that the type
2173 will be correct so the interpreter just needs to perform the
2174 calculation. There is a form of assignment which declares a new
2175 variable as well as assigning a value. If a name is assigned before
2176 it is declared, and error will be raised as the name is created as
2177 `Vlabel` and it is illegal to assign to such names.
2183 ###### SimpleStatement Grammar
2184 | Variable = Expression ${ {
2185 struct var *v = cast(var, $1);
2191 if (v->var && !v->var->constant) {
2195 | VariableDecl Expression ${
2202 ###### print binode cases
2205 do_indent(indent, "");
2206 print_exec(b->left, indent, 0);
2208 print_exec(b->right, indent, 0);
2214 do_indent(indent, "");
2215 print_exec(b->left, indent, 0);
2216 if (cast(var, b->left)->var->constant)
2220 print_exec(b->right, indent, 0);
2225 ###### propagate binode cases
2229 /* Both must match and not be labels, result is Vnone */
2230 t = propagate_types(b->left, c, ok, Vnolabel, 0);
2232 if (propagate_types(b->right, c, ok, t, 0) != t)
2233 if (b->left->type == Xvar)
2234 type_err(c, "info: variable '%v' was set as %1 here.",
2235 cast(var, b->left)->var->where_set, t, Vnone);
2237 t = propagate_types(b->right, c, ok, Vnolabel, 0);
2239 propagate_types(b->left, c, ok, t, 0);
2245 ###### interp binode cases
2250 struct variable *v = cast(var, b->left)->var;
2253 right = interp_exec(b->right);
2256 right.vtype = Vunknown;
2260 ### The `use` statement
2262 The `use` statement is the last "simple" statement. It is needed when
2263 the condition in a conditional statement is a block. `use` works much
2264 like `return` in C, but only completes the `condition`, not the whole
2270 ###### SimpleStatement Grammar
2272 $0 = new_pos(binode, $1);
2277 ###### print binode cases
2280 do_indent(indent, "use ");
2281 print_exec(b->right, -1, 0);
2286 ###### propagate binode cases
2289 /* result matches value */
2290 return propagate_types(b->right, c, ok, type, 0);
2292 ###### interp binode cases
2295 rv = interp_exec(b->right);
2298 ### The Conditional Statement
2300 This is the biggy and currently the only complex statement. This
2301 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2302 It is comprised of a number of parts, all of which are optional though
2303 set combinations apply. Each part is (usually) a key word (`then` is
2304 sometimes optional) followed by either an expression of a code block,
2305 except the `casepart` which is a "key word and an expression" followed
2306 by a code block. The code-block option is valid for all parts and,
2307 where an expression is also allowed, the code block can use the `use`
2308 statement to report a value. If the code block does no report a value
2309 the effect is similar to reporting `False`.
2311 The `else` and `case` parts, as well as `then` when combined with
2312 `if`, can contain a `use` statement which will apply to some
2313 containing conditional statement. `for` parts, `do` parts and `then`
2314 parts used with `for` can never contain a `use`, except in some
2315 subordinate conditional statement.
2317 If there is a `forpart`, it is executed first, only once.
2318 If there is a `dopart`, then it is executed repeatedly providing
2319 always that the `condpart` or `cond`, if present, does not return a non-True
2320 value. `condpart` can fail to return any value if it simply executes
2321 to completion. This is treated the same as returning `True`.
2323 If there is a `thenpart` it will be executed whenever the `condpart`
2324 or `cond` returns True (or does not return any value), but this will happen
2325 *after* `dopart` (when present).
2327 If `elsepart` is present it will be executed at most once when the
2328 condition returns `False` or some value that isn't `True` and isn't
2329 matched by any `casepart`. If there are any `casepart`s, they will be
2330 executed when the condition returns a matching value.
2332 The particular sorts of values allowed in case parts has not yet been
2333 determined in the language design, so nothing is prohibited.
2335 The various blocks in this complex statement potentially provide scope
2336 for variables as described earlier. Each such block must include the
2337 "OpenScope" nonterminal before parsing the block, and must call
2338 `var_block_close()` when closing the block.
2340 The code following "`if`", "`switch`" and "`for`" does not get its own
2341 scope, but is in a scope covering the whole statement, so names
2342 declared there cannot be redeclared elsewhere. Similarly the
2343 condition following "`while`" is in a scope the covers the body
2344 ("`do`" part) of the loop, and which does not allow conditional scope
2345 extension. Code following "`then`" (both looping and non-looping),
2346 "`else`" and "`case`" each get their own local scope.
2348 The type requirements on the code block in a `whilepart` are quite
2349 unusal. It is allowed to return a value of some identifiable type, in
2350 which case the loop abort and an appropriate `casepart` is run, or it
2351 can return a Boolean, in which case the loop either continues to the
2352 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2353 This is different both from the `ifpart` code block which is expected to
2354 return a Boolean, or the `switchpart` code block which is expected to
2355 return the same type as the casepart values. The correct analysis of
2356 the type of the `whilepart` code block is the reason for the
2357 `bool_permitted` flag which is passed to `propagate_types()`.
2359 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2368 struct exec *action;
2369 struct casepart *next;
2371 struct cond_statement {
2373 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2374 struct casepart *casepart;
2377 ###### ast functions
2379 static void free_casepart(struct casepart *cp)
2383 free_exec(cp->value);
2384 free_exec(cp->action);
2391 static void free_cond_statement(struct cond_statement *s)
2395 free_exec(s->forpart);
2396 free_exec(s->condpart);
2397 free_exec(s->dopart);
2398 free_exec(s->thenpart);
2399 free_exec(s->elsepart);
2400 free_casepart(s->casepart);
2404 ###### free exec cases
2405 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2407 ###### ComplexStatement Grammar
2408 | CondStatement ${ $0 = $<1; }$
2413 // both ForThen and Whilepart open scopes, and CondSuffix only
2414 // closes one - so in the first branch here we have another to close.
2415 CondStatement -> ForThen WhilePart CondSuffix ${
2417 $0->forpart = $1.forpart; $1.forpart = NULL;
2418 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2419 $0->condpart = $2.condpart; $2.condpart = NULL;
2420 $0->dopart = $2.dopart; $2.dopart = NULL;
2421 var_block_close(config2context(config), CloseSequential);
2423 | WhilePart CondSuffix ${
2425 $0->condpart = $1.condpart; $1.condpart = NULL;
2426 $0->dopart = $1.dopart; $1.dopart = NULL;
2428 | SwitchPart CondSuffix ${
2432 | IfPart IfSuffix ${
2434 $0->condpart = $1.condpart; $1.condpart = NULL;
2435 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2436 // This is where we close an "if" statement
2437 var_block_close(config2context(config), CloseSequential);
2440 CondSuffix -> IfSuffix ${
2442 // This is where we close scope of the whole
2443 // "for" or "while" statement
2444 var_block_close(config2context(config), CloseSequential);
2446 | CasePart CondSuffix ${
2448 $1->next = $0->casepart;
2453 CasePart -> Newlines case Expression OpenScope Block ${
2454 $0 = calloc(1,sizeof(struct casepart));
2457 var_block_close(config2context(config), CloseParallel);
2459 | case Expression OpenScope Block ${
2460 $0 = calloc(1,sizeof(struct casepart));
2463 var_block_close(config2context(config), CloseParallel);
2467 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2468 | Newlines else OpenScope Block ${
2469 $0 = new(cond_statement);
2471 var_block_close(config2context(config), CloseElse);
2473 | else OpenScope Block ${
2474 $0 = new(cond_statement);
2476 var_block_close(config2context(config), CloseElse);
2478 | Newlines else OpenScope CondStatement ${
2479 $0 = new(cond_statement);
2481 var_block_close(config2context(config), CloseElse);
2483 | else OpenScope CondStatement ${
2484 $0 = new(cond_statement);
2486 var_block_close(config2context(config), CloseElse);
2491 // These scopes are closed in CondSuffix
2492 ForPart -> for OpenScope SimpleStatements ${
2493 $0 = reorder_bilist($<3);
2495 | for OpenScope Block ${
2499 ThenPart -> then OpenScope SimpleStatements ${
2500 $0 = reorder_bilist($<3);
2501 var_block_close(config2context(config), CloseSequential);
2503 | then OpenScope Block ${
2505 var_block_close(config2context(config), CloseSequential);
2508 ThenPartNL -> ThenPart OptNL ${
2512 // This scope is closed in CondSuffix
2513 WhileHead -> while OpenScope Block ${
2518 ForThen -> ForPart OptNL ThenPartNL ${
2526 // This scope is closed in CondSuffix
2527 WhilePart -> while OpenScope Expression Block ${
2528 $0.type = Xcond_statement;
2532 | WhileHead OptNL do Block ${
2533 $0.type = Xcond_statement;
2538 IfPart -> if OpenScope Expression OpenScope Block ${
2539 $0.type = Xcond_statement;
2542 var_block_close(config2context(config), CloseParallel);
2544 | if OpenScope Block OptNL then OpenScope Block ${
2545 $0.type = Xcond_statement;
2548 var_block_close(config2context(config), CloseParallel);
2552 // This scope is closed in CondSuffix
2553 SwitchPart -> switch OpenScope Expression ${
2556 | switch OpenScope Block ${
2560 ###### print exec cases
2562 case Xcond_statement:
2564 struct cond_statement *cs = cast(cond_statement, e);
2565 struct casepart *cp;
2567 do_indent(indent, "for");
2568 if (bracket) printf(" {\n"); else printf(":\n");
2569 print_exec(cs->forpart, indent+1, bracket);
2572 do_indent(indent, "} then {\n");
2574 do_indent(indent, "then:\n");
2575 print_exec(cs->thenpart, indent+1, bracket);
2577 if (bracket) do_indent(indent, "}\n");
2581 if (cs->condpart && cs->condpart->type == Xbinode &&
2582 cast(binode, cs->condpart)->op == Block) {
2584 do_indent(indent, "while {\n");
2586 do_indent(indent, "while:\n");
2587 print_exec(cs->condpart, indent+1, bracket);
2589 do_indent(indent, "} do {\n");
2591 do_indent(indent, "do:\n");
2592 print_exec(cs->dopart, indent+1, bracket);
2594 do_indent(indent, "}\n");
2596 do_indent(indent, "while ");
2597 print_exec(cs->condpart, 0, bracket);
2602 print_exec(cs->dopart, indent+1, bracket);
2604 do_indent(indent, "}\n");
2609 do_indent(indent, "switch");
2611 do_indent(indent, "if");
2612 if (cs->condpart && cs->condpart->type == Xbinode &&
2613 cast(binode, cs->condpart)->op == Block) {
2618 print_exec(cs->condpart, indent+1, bracket);
2620 do_indent(indent, "}\n");
2622 do_indent(indent, "then:\n");
2623 print_exec(cs->thenpart, indent+1, bracket);
2627 print_exec(cs->condpart, 0, bracket);
2633 print_exec(cs->thenpart, indent+1, bracket);
2635 do_indent(indent, "}\n");
2640 for (cp = cs->casepart; cp; cp = cp->next) {
2641 do_indent(indent, "case ");
2642 print_exec(cp->value, -1, 0);
2647 print_exec(cp->action, indent+1, bracket);
2649 do_indent(indent, "}\n");
2652 do_indent(indent, "else");
2657 print_exec(cs->elsepart, indent+1, bracket);
2659 do_indent(indent, "}\n");
2664 ###### propagate exec cases
2665 case Xcond_statement:
2667 // forpart and dopart must return Vnone
2668 // thenpart must return Vnone if there is a dopart,
2669 // otherwise it is like elsepart.
2671 // be bool if there is not casepart
2672 // match casepart->values if there is a switchpart
2673 // either be bool or match casepart->value if there
2675 // elsepart, casepart->action must match there return type
2676 // expected of this statement.
2677 struct cond_statement *cs = cast(cond_statement, prog);
2678 struct casepart *cp;
2680 t = propagate_types(cs->forpart, c, ok, Vnone, 0);
2681 if (!vtype_compat(Vnone, t, 0))
2683 t = propagate_types(cs->dopart, c, ok, Vnone, 0);
2684 if (!vtype_compat(Vnone, t, 0))
2687 t = propagate_types(cs->thenpart, c, ok, Vnone, 0);
2688 if (!vtype_compat(Vnone, t, 0))
2691 if (cs->casepart == NULL)
2692 propagate_types(cs->condpart, c, ok, Vbool, 0);
2694 /* Condpart must match case values, with bool permitted */
2696 for (cp = cs->casepart;
2697 cp && (t == Vunknown); cp = cp->next)
2698 t = propagate_types(cp->value, c, ok, Vunknown, 0);
2699 if (t == Vunknown && cs->condpart)
2700 t = propagate_types(cs->condpart, c, ok, Vunknown, 1);
2701 // Now we have a type (I hope) push it down
2702 if (t != Vunknown) {
2703 for (cp = cs->casepart; cp; cp = cp->next)
2704 propagate_types(cp->value, c, ok, t, 0);
2705 propagate_types(cs->condpart, c, ok, t, 1);
2708 // (if)then, else, and case parts must return expected type.
2709 if (!cs->dopart && type == Vunknown)
2710 type = propagate_types(cs->thenpart, c, ok, Vunknown, bool_permitted);
2711 if (type == Vunknown)
2712 type = propagate_types(cs->elsepart, c, ok, Vunknown, bool_permitted);
2713 for (cp = cs->casepart;
2714 cp && type == Vunknown;
2716 type = propagate_types(cp->action, c, ok, Vunknown, bool_permitted);
2717 if (type > Vunknown) {
2719 propagate_types(cs->thenpart, c, ok, type, bool_permitted);
2720 propagate_types(cs->elsepart, c, ok, type, bool_permitted);
2721 for (cp = cs->casepart; cp ; cp = cp->next)
2722 propagate_types(cp->action, c, ok, type, bool_permitted);
2728 ###### interp exec cases
2729 case Xcond_statement:
2731 struct value v, cnd;
2732 struct casepart *cp;
2733 struct cond_statement *c = cast(cond_statement, e);
2735 interp_exec(c->forpart);
2738 cnd = interp_exec(c->condpart);
2741 if (!(cnd.vtype == Vnone ||
2742 (cnd.vtype == Vbool && cnd.bool != 0)))
2746 interp_exec(c->dopart);
2749 v = interp_exec(c->thenpart);
2750 if (v.vtype != Vnone || !c->dopart)
2754 } while (c->dopart);
2756 for (cp = c->casepart; cp; cp = cp->next) {
2757 v = interp_exec(cp->value);
2758 if (value_cmp(v, cnd) == 0) {
2761 return interp_exec(cp->action);
2767 return interp_exec(c->elsepart);
2772 ### Finally the whole program.
2774 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2775 the keyword "program" and a list of variable names which are assigned
2776 values from command line arguments. Following this is a `block` which
2777 is the code to execute.
2779 As this is the top level, several things are handled a bit
2781 The whole program is not interpreted by `interp_exec` as that isn't
2782 passed the argument list which the program requires. Similarly type
2783 analysis is a bit more interesting at this level.
2788 ###### Parser: grammar
2791 Program -> program OpenScope Varlist Block OptNL ${
2794 $0->left = reorder_bilist($<3);
2796 var_block_close(config2context(config), CloseSequential);
2797 if (config2context(config)->scope_stack) abort();
2800 tok_err(config2context(config),
2801 "error: unhandled parse error.", &$1);
2804 Varlist -> Varlist ArgDecl ${
2813 ArgDecl -> IDENTIFIER ${ {
2814 struct variable *v = var_decl(config2context(config), $1.txt);
2821 ###### print binode cases
2823 do_indent(indent, "program");
2824 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2826 print_exec(b2->left, 0, 0);
2832 print_exec(b->right, indent+1, bracket);
2834 do_indent(indent, "}\n");
2837 ###### propagate binode cases
2838 case Program: abort();
2840 ###### core functions
2842 static int analyse_prog(struct exec *prog, struct parse_context *c)
2844 struct binode *b = cast(binode, prog);
2851 propagate_types(b->right, c, &ok, Vnone, 0);
2856 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2857 struct var *v = cast(var, b->left);
2858 if (v->var->val.vtype == Vunknown) {
2859 v->var->where_set = b;
2860 val_init(&v->var->val, Vstr);
2863 b = cast(binode, prog);
2866 propagate_types(b->right, c, &ok, Vnone, 0);
2871 /* Make sure everything is still consistent */
2872 propagate_types(b->right, c, &ok, Vnone, 0);
2876 static void interp_prog(struct exec *prog, char **argv)
2878 struct binode *p = cast(binode, prog);
2884 al = cast(binode, p->left);
2886 struct var *v = cast(var, al->left);
2887 struct value *vl = &v->var->val;
2889 if (argv[0] == NULL) {
2890 printf("Not enough args\n");
2893 al = cast(binode, al->right);
2895 if (!parse_value(vl, argv[0]))
2899 v = interp_exec(p->right);
2903 ###### interp binode cases
2904 case Program: abort();
2906 ## And now to test it out.
2908 Having a language requires having a "hello world" program. I'll
2909 provide a little more than that: a program that prints "Hello world"
2910 finds the GCD of two numbers, prints the first few elements of
2911 Fibonacci, and performs a binary search for a number.
2913 ###### File: oceani.mk
2916 @echo "===== TEST ====="
2917 ./oceani --section "test: hello" oceani.mdc 55 33
2922 print "Hello World, what lovely oceans you have!"
2923 /* When a variable is defined in both branches of an 'if',
2924 * and used afterwards, the variables are merged.
2930 print "Is", A, "bigger than", B,"? ", bigger
2931 /* If a variable is not used after the 'if', no
2932 * merge happens, so types can be different
2936 print A, "is more than twice", B, "?", double
2939 print "double", A, "is only", double
2948 print "GCD of", A, "and", B,"is", a
2950 print a, "is not positive, cannot calculate GCD"
2952 print b, "is not positive, cannot calculate GCD"
2957 print "Fibonacci:", f1,f2,
2958 then togo = togo - 1
2966 /* Binary search... */
2971 mid := (lo + hi) / 2
2983 print "Yay, I found", target
2985 print "Closest I found was", mid