1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possible with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
213 for (ss = s; ss; ss = ss->next) {
214 struct text sec = ss->section;
215 if (sec.len == strlen(section) &&
216 strncmp(sec.txt, section, sec.len) == 0)
220 prog = parse_oceani(ss->code, &context.config,
221 dotrace ? stderr : NULL);
223 fprintf(stderr, "oceani: cannot find section %s\n",
228 prog = parse_oceani(s->code, &context.config,
229 dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: fatal parser error.\n");
232 context.parse_error = 1;
235 print_exec(*prog, 0, brackets);
236 if (prog && doexec && !context.parse_error) {
237 if (!analyse_prog(*prog, &context)) {
238 fprintf(stderr, "oceani: type error in program - not running.\n");
241 interp_prog(*prog, argv+optind+1);
248 struct section *t = s->next;
254 exit(context.parse_error ? 1 : 0);
259 These four requirements of parse, analyse, print, interpret apply to
260 each language element individually so that is how most of the code
263 Three of the four are fairly self explanatory. The one that requires
264 a little explanation is the analysis step.
266 The current language design does not require (or even allow) the types
267 of variables to be declared, but they must still have a single type.
268 Different operations impose different requirements on the variables,
269 for example addition requires both arguments to be numeric, and
270 assignment requires the variable on the left to have the same type as
271 the expression on the right.
273 Analysis involves propagating these type requirements around and
274 consequently setting the type of each variable. If any requirements
275 are violated (e.g. a string is compared with a number) or if a
276 variable needs to have two different types, then an error is raised
277 and the program will not run.
279 If the same variable is declared in both branchs of an 'if/else', or
280 in all cases of a 'switch' then the multiple instances may be merged
281 into just one variable if the variable is references after the
282 conditional statement. When this happens, the types must naturally be
283 consistent across all the branches. When the variable is not used
284 outside the if, the variables in the different branches are distinct
285 and can be of different types.
287 Determining the types of all variables early is important for
288 processing command line arguments. These can be assigned to any type
289 of variable, but we must first know the correct type so any required
290 conversion can happen. If a variable is associated with a command
291 line argument but no type can be interpreted (e.g. the variable is
292 only ever used in a `print` statement), then the type is set to
295 Undeclared names may only appear in "use" statements and "case" expressions.
296 These names are given a type of "label" and a unique value.
297 This allows them to fill the role of a name in an enumerated type, which
298 is useful for testing the `switch` statement.
300 As we will see, the condition part of a `while` statement can return
301 either a Boolean or some other type. This requires that the expect
302 type that gets passed around comprises a type (`enum vtype`) and a
303 flag to indicate that `Vbool` is also permitted.
305 As there are, as yet, no distinct types that are compatible, there
306 isn't much subtlety in the analysis. When we have distinct number
307 types, this will become more interesting.
311 When analysis discovers an inconsistency it needs to report an error;
312 just refusing to run the code ensures that the error doesn't cascade,
313 but by itself it isn't very useful. A clear understand of the sort of
314 error message that are useful will help guide the process of analysis.
316 At a simplistic level, the only sort of error that type analysis can
317 report is that the type of some construct doesn't match a contextual
318 requirement. For example, in `4 + "hello"` the addition provides a
319 contextual requirement for numbers, but `"hello"` is not a number. In
320 this particular example no further information is needed as the types
321 are obvious from local information. When a variable is involved that
322 isn't the case. It may be helpful to explain why the variable has a
323 particular type, by indicating the location where the type was set,
324 whether by declaration or usage.
326 Using a recursive-descent analysis we can easily detect a problem at
327 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
328 will detect that one argument is not a number and the usage of `hello`
329 will detect that a number was wanted, but not provided. In this
330 (early) version of the language, we will generate error reports at
331 multiple locations, so the use of `hello` will report an error and
332 explain were the value was set, and the addition will report an error
333 and say why numbers are needed. To be able to report locations for
334 errors, each language element will need to record a file location
335 (line and column) and each variable will need to record the language
336 element where its type was set. For now we will assume that each line
337 of an error message indicates one location in the file, and up to 2
338 types. So we provide a `printf`-like function which takes a format, a
339 language (a `struct exec` which has not yet been introduced), and 2
340 types. "`$1`" reports the first type, "`$2`" reports the second. We
341 will need a function to print the location, once we know how that is
346 static void fput_loc(struct exec *loc, FILE *f);
348 ###### core functions
350 static void type_err(struct parse_context *c,
351 char *fmt, struct exec *loc,
352 enum vtype t1, enum vtype t2)
354 fprintf(stderr, "%s:", c->file_name);
355 fput_loc(loc, stderr);
356 for (; *fmt ; fmt++) {
363 case '%': fputc(*fmt, stderr); break;
364 default: fputc('?', stderr); break;
366 fputs(vtype_names[t1], stderr);
369 fputs(vtype_names[t2], stderr);
378 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
380 fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
386 One last introductory step before detailing the language elements and
387 providing their four requirements is to establish the data structures
388 to store these elements.
390 There are two key objects that we need to work with: executable
391 elements which comprise the program, and values which the program
392 works with. Between these are the variables in their various scopes
393 which hold the values.
397 Values can be numbers, which we represent as multi-precision
398 fractions, strings, Booleans and labels. When analysing the program
399 we also need to allow for places where no value is meaningful
400 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`
401 which can be anything and `Vnolabel` which can be anything except a
404 Values are never shared, they are always copied when used, and freed
405 when no longer needed.
407 When propagating type information around the program, we need to
408 determine if two types are compatible, where `Vunknown` is compatible
409 with anything, and `Vnolabel` is compatible with anything except a
410 label. A separate function to encode this rule will simplify some code
413 There is an extra complication that this function needs to handle,
414 which is described later when the Conditional Statement is introduced.
415 In certain cases where a particular type is generally expected, a
416 Boolean is also alway permitted. To handle those cases, we explicitly tell
417 `vtype_compat()` is a Boolean is permitted.
419 When assigning command line arguments to variables, we need to be able
420 to parse each type from a string.
428 myLDLIBS := libnumber.o libstring.o -lgmp
429 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
433 enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
442 char *vtype_names[] = {"nolabel", "unknown", "none", "string",
443 "number", "Boolean", "label"};
446 static void free_value(struct value v)
451 case Vunknown: break;
452 case Vstr: free(v.str.txt); break;
453 case Vnum: mpq_clear(v.num); break;
459 static int vtype_compat(enum vtype require, enum vtype have, int bool_permitted)
461 if (bool_permitted && have == Vbool)
465 return have != Vlabel;
469 return have == Vunknown || require == have;
473 ###### value functions
475 static void val_init(struct value *val, enum vtype type)
481 case Vunknown: break;
483 mpq_init(val->num); break;
485 val->str.txt = malloc(1);
497 static struct value dup_value(struct value v)
504 case Vunknown: break;
513 mpq_set(rv.num, v.num);
516 rv.str.len = v.str.len;
517 rv.str.txt = malloc(rv.str.len);
518 memcpy(rv.str.txt, v.str.txt, v.str.len);
524 static int value_cmp(struct value left, struct value right)
527 if (left.vtype != right.vtype)
528 return left.vtype - right.vtype;
529 switch (left.vtype) {
530 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
531 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
532 case Vstr: cmp = text_cmp(left.str, right.str); break;
533 case Vbool: cmp = left.bool - right.bool; break;
536 case Vunknown: cmp = 0;
541 static struct text text_join(struct text a, struct text b)
544 rv.len = a.len + b.len;
545 rv.txt = malloc(rv.len);
546 memcpy(rv.txt, a.txt, a.len);
547 memcpy(rv.txt+a.len, b.txt, b.len);
551 static void print_value(struct value v)
555 printf("*Unknown*"); break;
558 printf("*no-value*"); break;
560 printf("*label-%p*", v.label); break;
562 printf("%.*s", v.str.len, v.str.txt); break;
564 printf("%s", v.bool ? "True":"False"); break;
569 mpf_set_q(fl, v.num);
570 gmp_printf("%Fg", fl);
577 static int parse_value(struct value *vl, char *arg)
590 vl->str.len = strlen(arg);
591 vl->str.txt = malloc(vl->str.len);
592 memcpy(vl->str.txt, arg, vl->str.len);
599 tx.txt = arg; tx.len = strlen(tx.txt);
600 if (number_parse(vl->num, tail, tx) == 0)
603 mpq_neg(vl->num, vl->num);
605 printf("Unsupported suffix: %s\n", arg);
610 if (strcasecmp(arg, "true") == 0 ||
611 strcmp(arg, "1") == 0)
613 else if (strcasecmp(arg, "false") == 0 ||
614 strcmp(arg, "0") == 0)
617 printf("Bad bool: %s\n", arg);
627 Variables are scoped named values. We store the names in a linked
628 list of "bindings" sorted lexically, and use sequential search and
635 struct binding *next; // in lexical order
639 This linked list is stored in the parse context so that "reduce"
640 functions can find or add variables, and so the analysis phase can
641 ensure that every variable gets a type.
645 struct binding *varlist; // In lexical order
649 static struct binding *find_binding(struct parse_context *c, struct text s)
651 struct binding **l = &c->varlist;
656 (cmp = text_cmp((*l)->name, s)) < 0)
660 n = calloc(1, sizeof(*n));
667 Each name can be linked to multiple variables defined in different
668 scopes. Each scope starts where the name is declared and continues
669 until the end of the containing code block. Scopes of a given name
670 cannot nest, so a declaration while a name is in-scope is an error.
672 ###### binding fields
673 struct variable *var;
677 struct variable *previous;
679 struct binding *name;
680 struct exec *where_decl;// where name was declared
681 struct exec *where_set; // where type was set
685 While the naming seems strange, we include local constants in the
686 definition of variables. A name declared `var := value` can
687 subsequently be changed, but a name declared `var ::= value` cannot -
690 ###### variable fields
693 Scopes in parallel branches can be partially merged. More
694 specifically, if a given name is declared in both branches of an
695 if/else then it's scope is a candidate for merging. Similarly if
696 every branch of an exhaustive switch (e.g. has an "else" clause)
697 declares a given name, then the scopes from the branches are
698 candidates for merging.
700 Note that names declared inside a loop (which is only parallel to
701 itself) are never visible after the loop. Similarly names defined in
702 scopes which are not parallel, such as those started by `for` and
703 `switch`, are never visible after the scope. Only variables defined in
704 both `then` and `else` (including the implicit then after an `if`, and
705 excluding `then` used with `for`) and in all `case`s and `else` of a
706 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
708 Labels, which are a bit like variables, follow different rules.
709 Labels are not explicitly declared, but if an undeclared name appears
710 in a context where a label is legal, that effectively declares the
711 name as a label. The declaration remains in force (or in scope) at
712 least to the end of the immediately containing block and conditionally
713 in any larger containing block which does not declare the name in some
714 other way. Importantly, the conditional scope extension happens even
715 if the label is only used in one parallel branch of a conditional --
716 when used in one branch it is treated as having been declared in all
719 Merge candidates are tentatively visible beyond the end of the
720 branching statement which creates them. If the name is used, the
721 merge is affirmed and they become a single variable visible at the
722 outer layer. If not - if it is redeclared first - the merge lapses.
724 To track scopes we have an extra stack, implemented as a linked list,
725 which roughly parallels the parse stack and which is used exclusively
726 for scoping. When a new scope is opened, a new frame is pushed and
727 the child-count of the parent frame is incremented. This child-count
728 is used to distinguish between the first of a set of parallel scopes,
729 in which declared variables must not be in scope, and subsequent
730 branches, whether they must already be conditionally scoped.
732 To push a new frame *before* any code in the frame is parsed, we need a
733 grammar reduction. This is most easily achieved with a grammar
734 element which derives the empty string, and creates the new scope when
735 it is recognized. This can be placed, for example, between a keyword
736 like "if" and the code following it.
740 struct scope *parent;
746 struct scope *scope_stack;
749 static void scope_pop(struct parse_context *c)
751 struct scope *s = c->scope_stack;
753 c->scope_stack = s->parent;
758 static void scope_push(struct parse_context *c)
760 struct scope *s = calloc(1, sizeof(*s));
762 c->scope_stack->child_count += 1;
763 s->parent = c->scope_stack;
771 OpenScope -> ${ scope_push(config2context(config)); }$
774 Each variable records a scope depth and is in one of four states:
776 - "in scope". This is the case between the declaration of the
777 variable and the end of the containing block, and also between
778 the usage with affirms a merge and the end of that block.
780 The scope depth is not greater than the current parse context scope
781 nest depth. When the block of that depth closes, the state will
782 change. To achieve this, all "in scope" variables are linked
783 together as a stack in nesting order.
785 - "pending". The "in scope" block has closed, but other parallel
786 scopes are still being processed. So far, every parallel block at
787 the same level that has closed has declared the name.
789 The scope depth is the depth of the last parallel block that
790 enclosed the declaration, and that has closed.
792 - "conditionally in scope". The "in scope" block and all parallel
793 scopes have closed, and no further mention of the name has been
794 seen. This state includes a secondary nest depth which records the
795 outermost scope seen since the variable became conditionally in
796 scope. If a use of the name is found, the variable becomes "in
797 scope" and that secondary depth becomes the recorded scope depth.
798 If the name is declared as a new variable, the old variable becomes
799 "out of scope" and the recorded scope depth stays unchanged.
801 - "out of scope". The variable is neither in scope nor conditionally
802 in scope. It is permanently out of scope now and can be removed from
803 the "in scope" stack.
806 ###### variable fields
807 int depth, min_depth;
808 enum { OutScope, PendingScope, CondScope, InScope } scope;
809 struct variable *in_scope;
813 struct variable *in_scope;
815 All variables with the same name are linked together using the
816 'previous' link. Those variable that have
817 been affirmatively merged all have a 'merged' pointer that points to
818 one primary variable - the most recently declared instance. When
819 merging variables, we need to also adjust the 'merged' pointer on any
820 other variables that had previously been merged with the one that will
821 no longer be primary.
823 ###### variable fields
824 struct variable *merged;
828 static void variable_merge(struct variable *primary, struct variable *secondary)
834 primary = primary->merged;
836 for (v = primary->previous; v; v=v->previous)
837 if (v == secondary || v == secondary->merged ||
838 v->merged == secondary ||
839 (v->merged && v->merged == secondary->merged)) {
847 while (context.varlist) {
848 struct binding *b = context.varlist;
849 struct variable *v = b->var;
850 context.varlist = b->next;
853 struct variable *t = v;
861 #### Manipulating Bindings
863 When a name is conditionally visible, a new declaration discards the
864 old binding - the condition lapses. Conversely a usage of the name
865 affirms the visibility and extends it to the end of the containing
866 block - i.e. the block that contains both the original declaration and
867 the latest usage. This is determined from `min_depth`. When a
868 conditionally visible variable gets affirmed like this, it is also
869 merged with other conditionally visible variables with the same name.
871 When we parse a variable declaration we either signal an error if the
872 name is currently bound, or create a new variable at the current nest
873 depth if the name is unbound or bound to a conditionally scoped or
874 pending-scope variable. If the previous variable was conditionally
875 scoped, it and its homonyms becomes out-of-scope.
877 When we parse a variable reference (including non-declarative
878 assignment) we signal an error if the name is not bound or is bound to
879 a pending-scope variable; update the scope if the name is bound to a
880 conditionally scoped variable; or just proceed normally if the named
881 variable is in scope.
883 When we exit a scope, any variables bound at this level are either
884 marked out of scope or pending-scoped, depending on whether the
885 scope was sequential or parallel.
887 When exiting a parallel scope we check if there are any variables that
888 were previously pending and are still visible. If there are, then
889 there weren't redeclared in the most recent scope, so they cannot be
890 merged and must become out-of-scope. If it is not the first of
891 parallel scopes (based on `child_count`), we check that there was a
892 previous binding that is still pending-scope. If there isn't, the new
893 variable must now be out-of-scope.
895 When exiting a sequential scope that immediately enclosed parallel
896 scopes, we need to resolve any pending-scope variables. If there was
897 no `else` clause, and we cannot determine that the `switch` was exhaustive,
898 we need to mark all pending-scope variable as out-of-scope. Otherwise
899 all pending-scope variables become conditionally scoped.
902 enum closetype { CloseSequential, CloseParallel, CloseElse };
906 static struct variable *var_decl(struct parse_context *c, struct text s)
908 struct binding *b = find_binding(c, s);
909 struct variable *v = b->var;
911 switch (v ? v->scope : OutScope) {
913 /* Caller will report the error */
917 v && v->scope == CondScope;
923 v = calloc(1, sizeof(*v));
924 v->previous = b->var;
927 v->min_depth = v->depth = c->scope_depth;
929 v->in_scope = c->in_scope;
931 val_init(&v->val, Vunknown);
935 static struct variable *var_ref(struct parse_context *c, struct text s)
937 struct binding *b = find_binding(c, s);
938 struct variable *v = b->var;
941 switch (v ? v->scope : OutScope) {
944 /* Signal an error - once that is possible */
947 /* All CondScope variables of this name need to be merged
950 v->depth = v->min_depth;
952 for (v2 = v->previous;
953 v2 && v2->scope == CondScope;
955 variable_merge(v, v2);
963 static void var_block_close(struct parse_context *c, enum closetype ct)
965 /* close of all variables that are in_scope */
966 struct variable *v, **vp, *v2;
969 for (vp = &c->in_scope;
970 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
974 case CloseParallel: /* handle PendingScope */
978 if (c->scope_stack->child_count == 1)
979 v->scope = PendingScope;
980 else if (v->previous &&
981 v->previous->scope == PendingScope)
982 v->scope = PendingScope;
983 else if (v->val.vtype == Vlabel)
984 v->scope = PendingScope;
985 else if (v->name->var == v)
987 if (ct == CloseElse) {
988 /* All Pending variables with this name
989 * are now Conditional */
991 v2 && v2->scope == PendingScope;
993 v2->scope = CondScope;
998 v2 && v2->scope == PendingScope;
1000 if (v2->val.vtype != Vlabel)
1001 v2->scope = OutScope;
1003 case OutScope: break;
1006 case CloseSequential:
1007 if (v->val.vtype == Vlabel)
1008 v->scope = PendingScope;
1011 v->scope = OutScope;
1014 /* There was no 'else', so we can only become
1015 * conditional if we know the cases were exhaustive,
1016 * and that doesn't mean anything yet.
1017 * So only labels become conditional..
1020 v2 && v2->scope == PendingScope;
1022 if (v2->val.vtype == Vlabel) {
1023 v2->scope = CondScope;
1024 v2->min_depth = c->scope_depth;
1026 v2->scope = OutScope;
1029 case OutScope: break;
1033 if (v->scope == OutScope)
1042 Executables can be lots of different things. In many cases an
1043 executable is just an operation combined with one or two other
1044 executables. This allows for expressions and lists etc. Other times
1045 an executable is something quite specific like a constant or variable
1046 name. So we define a `struct exec` to be a general executable with a
1047 type, and a `struct binode` which is a subclass of `exec`, forms a
1048 node in a binary tree, and holds an operation. There will be other
1049 subclasses, and to access these we need to be able to `cast` the
1050 `exec` into the various other types.
1053 #define cast(structname, pointer) ({ \
1054 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1055 if (__mptr && *__mptr != X##structname) abort(); \
1056 (struct structname *)( (char *)__mptr);})
1058 #define new(structname) ({ \
1059 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1060 __ptr->type = X##structname; \
1061 __ptr->line = -1; __ptr->column = -1; \
1064 #define new_pos(structname, token) ({ \
1065 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1066 __ptr->type = X##structname; \
1067 __ptr->line = token.line; __ptr->column = token.col; \
1076 enum exec_types type;
1084 struct exec *left, *right;
1087 ###### ast functions
1089 static int __fput_loc(struct exec *loc, FILE *f)
1091 if (loc->line >= 0) {
1092 fprintf(f, "%d:%d: ", loc->line, loc->column);
1095 if (loc->type == Xbinode)
1096 return __fput_loc(cast(binode,loc)->left, f) ||
1097 __fput_loc(cast(binode,loc)->right, f);
1100 static void fput_loc(struct exec *loc, FILE *f)
1102 if (!__fput_loc(loc, f))
1103 fprintf(f, "??:??: ");
1106 Each different type of `exec` node needs a number of functions
1107 defined, a bit like methods. We must be able to be able to free it,
1108 print it, analyse it and execute it. Once we have specific `exec`
1109 types we will need to parse them too. Let's take this a bit more
1114 The parser generator requires a `free_foo` function for each struct
1115 that stores attributes and they will be `exec`s and subtypes there-of.
1116 So we need `free_exec` which can handle all the subtypes, and we need
1119 ###### ast functions
1121 static void free_binode(struct binode *b)
1126 free_exec(b->right);
1130 ###### core functions
1131 static void free_exec(struct exec *e)
1140 ###### forward decls
1142 static void free_exec(struct exec *e);
1144 ###### free exec cases
1145 case Xbinode: free_binode(cast(binode, e)); break;
1149 Printing an `exec` requires that we know the current indent level for
1150 printing line-oriented components. As will become clear later, we
1151 also want to know what sort of bracketing to use.
1153 ###### ast functions
1155 static void do_indent(int i, char *str)
1162 ###### core functions
1163 static void print_binode(struct binode *b, int indent, int bracket)
1167 ## print binode cases
1171 static void print_exec(struct exec *e, int indent, int bracket)
1177 print_binode(cast(binode, e), indent, bracket); break;
1182 ###### forward decls
1184 static void print_exec(struct exec *e, int indent, int bracket);
1188 As discussed, analysis involves propagating type requirements around
1189 the program and looking for errors.
1191 So `propagate_types` is passed an expected type (being a `vtype`
1192 together with a `bool_permitted` flag) that the `exec` is expected to
1193 return, and returns the type that it does return, either of which can
1194 be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
1195 when an error is found, and `2` when any change is made. If it
1196 remains unchanged at `1`, then no more propagation is needed.
1198 ###### core functions
1200 static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1201 enum vtype type, int bool_permitted)
1208 switch (prog->type) {
1211 struct binode *b = cast(binode, prog);
1213 ## propagate binode cases
1217 ## propagate exec cases
1224 Interpreting an `exec` doesn't require anything but the `exec`. State
1225 is stored in variables and each variable will be directly linked from
1226 within the `exec` tree. The exception to this is the whole `program`
1227 which needs to look at command line arguments. The `program` will be
1228 interpreted separately.
1230 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1232 ###### core functions
1234 static struct value interp_exec(struct exec *e)
1244 struct binode *b = cast(binode, e);
1245 struct value left, right;
1246 left.vtype = right.vtype = Vnone;
1248 ## interp binode cases
1250 free_value(left); free_value(right);
1253 ## interp exec cases
1258 ## Language elements
1260 Each language element needs to be parsed, printed, analysed,
1261 interpreted, and freed. There are several, so let's just start with
1262 the easy ones and work our way up.
1266 We have already met values as separate objects. When manifest
1267 constants appear in the program text, that must result in an executable
1268 which has a constant value. So the `val` structure embeds a value in
1284 $0 = new_pos(val, $1);
1285 $0->val.vtype = Vbool;
1289 $0 = new_pos(val, $1);
1290 $0->val.vtype = Vbool;
1294 $0 = new_pos(val, $1);
1295 $0->val.vtype = Vnum;
1298 if (number_parse($0->val.num, tail, $1.txt) == 0)
1299 mpq_init($0->val.num);
1301 tok_err(config2context(config), "error: unsupported number suffix.",
1306 $0 = new_pos(val, $1);
1307 $0->val.vtype = Vstr;
1310 string_parse(&$1, '\\', &$0->val.str, tail);
1312 tok_err(config2context(config), "error: unsupported string suffix.",
1317 $0 = new_pos(val, $1);
1318 $0->val.vtype = Vstr;
1321 string_parse(&$1, '\\', &$0->val.str, tail);
1323 tok_err(config2context(config), "error: unsupported string suffix.",
1328 ###### print exec cases
1331 struct val *v = cast(val, e);
1332 if (v->val.vtype == Vstr)
1334 print_value(v->val);
1335 if (v->val.vtype == Vstr)
1340 ###### propagate exec cases
1343 struct val *val = cast(val, prog);
1344 if (!vtype_compat(type, val->val.vtype, bool_permitted)) {
1345 type_err(c, "error: expected %1 found %2",
1346 prog, type, val->val.vtype);
1349 return val->val.vtype;
1352 ###### interp exec cases
1354 return dup_value(cast(val, e)->val);
1356 ###### ast functions
1357 static void free_val(struct val *v)
1365 ###### free exec cases
1366 case Xval: free_val(cast(val, e)); break;
1368 ###### ast functions
1369 // Move all nodes from 'b' to 'rv', reversing the order.
1370 // In 'b' 'left' is a list, and 'right' is the last node.
1371 // In 'rv', left' is the first node and 'right' is a list.
1372 static struct binode *reorder_bilist(struct binode *b)
1374 struct binode *rv = NULL;
1377 struct exec *t = b->right;
1381 b = cast(binode, b->left);
1391 Just as we used a `val` to wrap a value into an `exec`, we similarly
1392 need a `var` to wrap a `variable` into an exec. While each `val`
1393 contained a copy of the value, each `var` hold a link to the variable
1394 because it really is the same variable no matter where it appears.
1395 When a variable is used, we need to remember to follow the `->merged`
1396 link to find the primary instance.
1404 struct variable *var;
1410 VariableDecl -> IDENTIFIER := ${ {
1411 struct variable *v = var_decl(config2context(config), $1.txt);
1412 $0 = new_pos(var, $1);
1417 v = var_ref(config2context(config), $1.txt);
1419 type_err(config2context(config), "error: variable '%v' redeclared",
1421 type_err(config2context(config), "info: this is where '%v' was first declared",
1422 v->where_decl, Vnone, Vnone);
1425 | IDENTIFIER ::= ${ {
1426 struct variable *v = var_decl(config2context(config), $1.txt);
1427 $0 = new_pos(var, $1);
1433 v = var_ref(config2context(config), $1.txt);
1435 type_err(config2context(config), "error: variable '%v' redeclared",
1437 type_err(config2context(config), "info: this is where '%v' was first declared",
1438 v->where_decl, Vnone, Vnone);
1442 Variable -> IDENTIFIER ${ {
1443 struct variable *v = var_ref(config2context(config), $1.txt);
1444 $0 = new_pos(var, $1);
1446 /* This might be a label - allocate a var just in case */
1447 v = var_decl(config2context(config), $1.txt);
1449 val_init(&v->val, Vlabel);
1456 ###### print exec cases
1459 struct var *v = cast(var, e);
1461 struct binding *b = v->var->name;
1462 printf("%.*s", b->name.len, b->name.txt);
1469 if (loc->type == Xvar) {
1470 struct var *v = cast(var, loc);
1472 struct binding *b = v->var->name;
1473 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1475 fputs("???", stderr);
1477 fputs("NOTVAR", stderr);
1480 ###### propagate exec cases
1484 struct var *var = cast(var, prog);
1485 struct variable *v = var->var;
1487 type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone);
1493 if (v->val.vtype == Vunknown) {
1494 if (type > Vunknown && *ok != 0) {
1495 val_init(&v->val, type);
1496 v->where_set = prog;
1501 if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
1502 type_err(c, "error: expected %1 but variable '%v' is %2", prog,
1503 type, v->val.vtype);
1504 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1505 v->val.vtype, Vnone);
1508 if (type <= Vunknown)
1509 return v->val.vtype;
1513 ###### interp exec cases
1516 struct var *var = cast(var, e);
1517 struct variable *v = var->var;
1521 return dup_value(v->val);
1524 ###### ast functions
1526 static void free_var(struct var *v)
1531 ###### free exec cases
1532 case Xvar: free_var(cast(var, e)); break;
1534 ### Expressions: Boolean
1536 Our first user of the `binode` will be expressions, and particularly
1537 Boolean expressions. As I haven't implemented precedence in the
1538 parser generator yet, we need different names from each precedence
1539 level used by expressions. The outer most or lowest level precedence
1540 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1551 Expression -> Expression or BTerm ${ {
1552 struct binode *b = new(binode);
1558 | BTerm ${ $0 = $<1; }$
1560 BTerm -> BTerm and BFact ${ {
1561 struct binode *b = new(binode);
1567 | BFact ${ $0 = $<1; }$
1569 BFact -> not BFact ${ {
1570 struct binode *b = new(binode);
1577 ###### print binode cases
1579 print_exec(b->left, -1, 0);
1581 print_exec(b->right, -1, 0);
1584 print_exec(b->left, -1, 0);
1586 print_exec(b->right, -1, 0);
1590 print_exec(b->right, -1, 0);
1593 ###### propagate binode cases
1597 /* both must be Vbool, result is Vbool */
1598 propagate_types(b->left, c, ok, Vbool, 0);
1599 propagate_types(b->right, c, ok, Vbool, 0);
1600 if (type != Vbool && type > Vunknown) {
1601 type_err(c, "error: %1 operation found where %2 expected", prog,
1607 ###### interp binode cases
1609 rv = interp_exec(b->left);
1610 right = interp_exec(b->right);
1611 rv.bool = rv.bool && right.bool;
1614 rv = interp_exec(b->left);
1615 right = interp_exec(b->right);
1616 rv.bool = rv.bool || right.bool;
1619 rv = interp_exec(b->right);
1623 ### Expressions: Comparison
1625 Of slightly higher precedence that Boolean expressions are
1627 A comparison takes arguments of any type, but the two types must be
1630 To simplify the parsing we introduce an `eop` which can record an
1631 expression operator.
1638 ###### ast functions
1639 static void free_eop(struct eop *e)
1654 | Expr CMPop Expr ${ {
1655 struct binode *b = new(binode);
1661 | Expr ${ $0 = $<1; }$
1666 CMPop -> < ${ $0.op = Less; }$
1667 | > ${ $0.op = Gtr; }$
1668 | <= ${ $0.op = LessEq; }$
1669 | >= ${ $0.op = GtrEq; }$
1670 | == ${ $0.op = Eql; }$
1671 | != ${ $0.op = NEql; }$
1673 ###### print binode cases
1681 print_exec(b->left, -1, 0);
1683 case Less: printf(" < "); break;
1684 case LessEq: printf(" <= "); break;
1685 case Gtr: printf(" > "); break;
1686 case GtrEq: printf(" >= "); break;
1687 case Eql: printf(" == "); break;
1688 case NEql: printf(" != "); break;
1691 print_exec(b->right, -1, 0);
1694 ###### propagate binode cases
1701 /* Both must match but not labels, result is Vbool */
1702 t = propagate_types(b->left, c, ok, Vnolabel, 0);
1704 propagate_types(b->right, c, ok, t, 0);
1706 t = propagate_types(b->right, c, ok, Vnolabel, 0);
1708 t = propagate_types(b->left, c, ok, t, 0);
1710 if (!vtype_compat(type, Vbool, 0)) {
1711 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1717 ###### interp binode cases
1726 left = interp_exec(b->left);
1727 right = interp_exec(b->right);
1728 cmp = value_cmp(left, right);
1731 case Less: rv.bool = cmp < 0; break;
1732 case LessEq: rv.bool = cmp <= 0; break;
1733 case Gtr: rv.bool = cmp > 0; break;
1734 case GtrEq: rv.bool = cmp >= 0; break;
1735 case Eql: rv.bool = cmp == 0; break;
1736 case NEql: rv.bool = cmp != 0; break;
1737 default: rv.bool = 0; break;
1742 ### Expressions: The rest
1744 The remaining expressions with the highest precedence are arithmetic
1745 and string concatenation. They are `Expr`, `Term`, and `Factor`.
1746 The `Factor` is where the `Value` and `Variable` that we already have
1749 `+` and `-` are both infix and prefix operations (where they are
1750 absolute value and negation). These have different operator names.
1752 We also have a 'Bracket' operator which records where parentheses were
1753 found. This make it easy to reproduce these when printing. Once
1754 precedence is handled better I might be able to discard this.
1766 Expr -> Expr Eop Term ${ {
1767 struct binode *b = new(binode);
1773 | Term ${ $0 = $<1; }$
1775 Term -> Term Top Factor ${ {
1776 struct binode *b = new(binode);
1782 | Factor ${ $0 = $<1; }$
1784 Factor -> ( Expression ) ${ {
1785 struct binode *b = new_pos(binode, $1);
1791 struct binode *b = new(binode);
1796 | Value ${ $0 = $<1; }$
1797 | Variable ${ $0 = $<1; }$
1800 Eop -> + ${ $0.op = Plus; }$
1801 | - ${ $0.op = Minus; }$
1803 Uop -> + ${ $0.op = Absolute; }$
1804 | - ${ $0.op = Negate; }$
1806 Top -> * ${ $0.op = Times; }$
1807 | / ${ $0.op = Divide; }$
1808 | ++ ${ $0.op = Concat; }$
1810 ###### print binode cases
1816 print_exec(b->left, indent, 0);
1818 case Plus: printf(" + "); break;
1819 case Minus: printf(" - "); break;
1820 case Times: printf(" * "); break;
1821 case Divide: printf(" / "); break;
1822 case Concat: printf(" ++ "); break;
1825 print_exec(b->right, indent, 0);
1829 print_exec(b->right, indent, 0);
1833 print_exec(b->right, indent, 0);
1837 print_exec(b->right, indent, 0);
1841 ###### propagate binode cases
1846 /* both must be numbers, result is Vnum */
1849 /* as propagate_types ignores a NULL,
1850 * unary ops fit here too */
1851 propagate_types(b->left, c, ok, Vnum, 0);
1852 propagate_types(b->right, c, ok, Vnum, 0);
1853 if (!vtype_compat(type, Vnum, 0)) {
1854 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
1861 /* both must be Vstr, result is Vstr */
1862 propagate_types(b->left, c, ok, Vstr, 0);
1863 propagate_types(b->right, c, ok, Vstr, 0);
1864 if (!vtype_compat(type, Vstr, 0)) {
1865 type_err(c, "error: Concat returns %1 but %2 expected", prog,
1872 return propagate_types(b->right, c, ok, type, 0);
1874 ###### interp binode cases
1877 rv = interp_exec(b->left);
1878 right = interp_exec(b->right);
1879 mpq_add(rv.num, rv.num, right.num);
1882 rv = interp_exec(b->left);
1883 right = interp_exec(b->right);
1884 mpq_sub(rv.num, rv.num, right.num);
1887 rv = interp_exec(b->left);
1888 right = interp_exec(b->right);
1889 mpq_mul(rv.num, rv.num, right.num);
1892 rv = interp_exec(b->left);
1893 right = interp_exec(b->right);
1894 mpq_div(rv.num, rv.num, right.num);
1897 rv = interp_exec(b->right);
1898 mpq_neg(rv.num, rv.num);
1901 rv = interp_exec(b->right);
1902 mpq_abs(rv.num, rv.num);
1905 rv = interp_exec(b->right);
1908 left = interp_exec(b->left);
1909 right = interp_exec(b->right);
1911 rv.str = text_join(left.str, right.str);
1914 ### Blocks, Statements, and Statement lists.
1916 Now that we have expressions out of the way we need to turn to
1917 statements. There are simple statements and more complex statements.
1918 Simple statements do not contain newlines, complex statements do.
1920 Statements often come in sequences and we have corresponding simple
1921 statement lists and complex statement lists.
1922 The former comprise only simple statements separated by semicolons.
1923 The later comprise complex statements and simple statement lists. They are
1924 separated by newlines. Thus the semicolon is only used to separate
1925 simple statements on the one line. This may be overly restrictive,
1926 but I'm not sure I ever want a complex statement to share a line with
1929 Note that a simple statement list can still use multiple lines if
1930 subsequent lines are indented, so
1932 ###### Example: wrapped simple statement list
1937 is a single simple statement list. This might allow room for
1938 confusion, so I'm not set on it yet.
1940 A simple statement list needs no extra syntax. A complex statement
1941 list has two syntactic forms. It can be enclosed in braces (much like
1942 C blocks), or it can be introduced by a colon and continue until an
1943 unindented newline (much like Python blocks). With this extra syntax
1944 it is referred to as a block.
1946 Note that a block does not have to include any newlines if it only
1947 contains simple statements. So both of:
1949 if condition: a=b; d=f
1951 if condition { a=b; print f }
1955 In either case the list is constructed from a `binode` list with
1956 `Block` as the operator. When parsing the list it is most convenient
1957 to append to the end, so a list is a list and a statement. When using
1958 the list it is more convenient to consider a list to be a statement
1959 and a list. So we need a function to re-order a list.
1960 `reorder_bilist` serves this purpose.
1962 The only stand-alone statement we introduce at this stage is `pass`
1963 which does nothing and is represented as a `NULL` pointer in a `Block`
1983 Block -> Open Statementlist Close ${ $0 = $<2; }$
1984 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1985 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1986 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1987 | : Statementlist ${ $0 = $<2; }$
1988 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1990 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1992 ComplexStatements -> ComplexStatements ComplexStatement ${
1998 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1999 | ComplexStatement ${
2007 ComplexStatement -> SimpleStatements NEWLINE ${
2008 $0 = reorder_bilist($<1);
2010 ## ComplexStatement Grammar
2013 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2019 | SimpleStatement ${
2025 | SimpleStatements ; ${ $0 = $<1; }$
2027 SimpleStatement -> pass ${ $0 = NULL; }$
2028 ## SimpleStatement Grammar
2030 ###### print binode cases
2034 if (b->left == NULL)
2037 print_exec(b->left, indent, 0);
2040 print_exec(b->right, indent, 0);
2043 // block, one per line
2044 if (b->left == NULL)
2045 do_indent(indent, "pass\n");
2047 print_exec(b->left, indent, bracket);
2049 print_exec(b->right, indent, bracket);
2053 ###### propagate binode cases
2056 /* If any statement returns something other then Vnone
2057 * or Vbool then all such must return same type.
2058 * As each statement may be Vnone or something else,
2059 * we must always pass Vunknown down, otherwise an incorrect
2060 * error might occur. We never return Vnone unless it is
2065 for (e = b; e; e = cast(binode, e->right)) {
2066 t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
2067 if (bool_permitted && t == Vbool)
2069 if (t != Vunknown && t != Vnone && t != Vbool) {
2070 if (type == Vunknown)
2072 else if (t != type) {
2073 type_err(c, "error: expected %1, found %2",
2082 ###### interp binode cases
2084 while (rv.vtype == Vnone &&
2087 rv = interp_exec(b->left);
2088 b = cast(binode, b->right);
2092 ### The Print statement
2094 `print` is a simple statement that takes a comma-separated list of
2095 expressions and prints the values separated by spaces and terminated
2096 by a newline. No control of formatting is possible.
2098 `print` faces the same list-ordering issue as blocks, and uses the
2104 ###### SimpleStatement Grammar
2106 | print ExpressionList ${
2107 $0 = reorder_bilist($<2);
2109 | print ExpressionList , ${
2114 $0 = reorder_bilist($0);
2125 ExpressionList -> ExpressionList , Expression ${
2138 ###### print binode cases
2141 do_indent(indent, "print");
2145 print_exec(b->left, -1, 0);
2149 b = cast(binode, b->right);
2155 ###### propagate binode cases
2158 /* don't care but all must be consistent */
2159 propagate_types(b->left, c, ok, Vnolabel, 0);
2160 propagate_types(b->right, c, ok, Vnolabel, 0);
2163 ###### interp binode cases
2169 for ( ; b; b = cast(binode, b->right))
2173 left = interp_exec(b->left);
2186 ###### Assignment statement
2188 An assignment will assign a value to a variable, providing it hasn't
2189 be declared as a constant. The analysis phase ensures that the type
2190 will be correct so the interpreter just needs to perform the
2191 calculation. There is a form of assignment which declares a new
2192 variable as well as assigning a value. If a name is assigned before
2193 it is declared, and error will be raised as the name is created as
2194 `Vlabel` and it is illegal to assign to such names.
2200 ###### SimpleStatement Grammar
2201 | Variable = Expression ${ {
2202 struct var *v = cast(var, $1);
2208 if (v->var && !v->var->constant) {
2212 | VariableDecl Expression ${
2219 ###### print binode cases
2222 do_indent(indent, "");
2223 print_exec(b->left, indent, 0);
2225 print_exec(b->right, indent, 0);
2231 do_indent(indent, "");
2232 print_exec(b->left, indent, 0);
2233 if (cast(var, b->left)->var->constant)
2237 print_exec(b->right, indent, 0);
2242 ###### propagate binode cases
2246 /* Both must match and not be labels, result is Vnone */
2247 t = propagate_types(b->left, c, ok, Vnolabel, 0);
2249 if (propagate_types(b->right, c, ok, t, 0) != t)
2250 if (b->left->type == Xvar)
2251 type_err(c, "info: variable '%v' was set as %1 here.",
2252 cast(var, b->left)->var->where_set, t, Vnone);
2254 t = propagate_types(b->right, c, ok, Vnolabel, 0);
2256 propagate_types(b->left, c, ok, t, 0);
2262 ###### interp binode cases
2267 struct variable *v = cast(var, b->left)->var;
2270 right = interp_exec(b->right);
2273 right.vtype = Vunknown;
2277 ### The `use` statement
2279 The `use` statement is the last "simple" statement. It is needed when
2280 the condition in a conditional statement is a block. `use` works much
2281 like `return` in C, but only completes the `condition`, not the whole
2287 ###### SimpleStatement Grammar
2289 $0 = new_pos(binode, $1);
2294 ###### print binode cases
2297 do_indent(indent, "use ");
2298 print_exec(b->right, -1, 0);
2303 ###### propagate binode cases
2306 /* result matches value */
2307 return propagate_types(b->right, c, ok, type, 0);
2309 ###### interp binode cases
2312 rv = interp_exec(b->right);
2315 ### The Conditional Statement
2317 This is the biggy and currently the only complex statement. This
2318 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2319 It is comprised of a number of parts, all of which are optional though
2320 set combinations apply. Each part is (usually) a key word (`then` is
2321 sometimes optional) followed by either an expression or a code block,
2322 except the `casepart` which is a "key word and an expression" followed
2323 by a code block. The code-block option is valid for all parts and,
2324 where an expression is also allowed, the code block can use the `use`
2325 statement to report a value. If the code block does not report a value
2326 the effect is similar to reporting `True`.
2328 The `else` and `case` parts, as well as `then` when combined with
2329 `if`, can contain a `use` statement which will apply to some
2330 containing conditional statement. `for` parts, `do` parts and `then`
2331 parts used with `for` can never contain a `use`, except in some
2332 subordinate conditional statement.
2334 If there is a `forpart`, it is executed first, only once.
2335 If there is a `dopart`, then it is executed repeatedly providing
2336 always that the `condpart` or `cond`, if present, does not return a non-True
2337 value. `condpart` can fail to return any value if it simply executes
2338 to completion. This is treated the same as returning `True`.
2340 If there is a `thenpart` it will be executed whenever the `condpart`
2341 or `cond` returns True (or does not return any value), but this will happen
2342 *after* `dopart` (when present).
2344 If `elsepart` is present it will be executed at most once when the
2345 condition returns `False` or some value that isn't `True` and isn't
2346 matched by any `casepart`. If there are any `casepart`s, they will be
2347 executed when the condition returns a matching value.
2349 The particular sorts of values allowed in case parts has not yet been
2350 determined in the language design, so nothing is prohibited.
2352 The various blocks in this complex statement potentially provide scope
2353 for variables as described earlier. Each such block must include the
2354 "OpenScope" nonterminal before parsing the block, and must call
2355 `var_block_close()` when closing the block.
2357 The code following "`if`", "`switch`" and "`for`" does not get its own
2358 scope, but is in a scope covering the whole statement, so names
2359 declared there cannot be redeclared elsewhere. Similarly the
2360 condition following "`while`" is in a scope the covers the body
2361 ("`do`" part) of the loop, and which does not allow conditional scope
2362 extension. Code following "`then`" (both looping and non-looping),
2363 "`else`" and "`case`" each get their own local scope.
2365 The type requirements on the code block in a `whilepart` are quite
2366 unusal. It is allowed to return a value of some identifiable type, in
2367 which case the loop aborts and an appropriate `casepart` is run, or it
2368 can return a Boolean, in which case the loop either continues to the
2369 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2370 This is different both from the `ifpart` code block which is expected to
2371 return a Boolean, or the `switchpart` code block which is expected to
2372 return the same type as the casepart values. The correct analysis of
2373 the type of the `whilepart` code block is the reason for the
2374 `bool_permitted` flag which is passed to `propagate_types()`.
2376 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2385 struct exec *action;
2386 struct casepart *next;
2388 struct cond_statement {
2390 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2391 struct casepart *casepart;
2394 ###### ast functions
2396 static void free_casepart(struct casepart *cp)
2400 free_exec(cp->value);
2401 free_exec(cp->action);
2408 static void free_cond_statement(struct cond_statement *s)
2412 free_exec(s->forpart);
2413 free_exec(s->condpart);
2414 free_exec(s->dopart);
2415 free_exec(s->thenpart);
2416 free_exec(s->elsepart);
2417 free_casepart(s->casepart);
2421 ###### free exec cases
2422 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2424 ###### ComplexStatement Grammar
2425 | CondStatement ${ $0 = $<1; }$
2430 // both ForThen and Whilepart open scopes, and CondSuffix only
2431 // closes one - so in the first branch here we have another to close.
2432 CondStatement -> ForThen WhilePart CondSuffix ${
2434 $0->forpart = $1.forpart; $1.forpart = NULL;
2435 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2436 $0->condpart = $2.condpart; $2.condpart = NULL;
2437 $0->dopart = $2.dopart; $2.dopart = NULL;
2438 var_block_close(config2context(config), CloseSequential);
2440 | WhilePart CondSuffix ${
2442 $0->condpart = $1.condpart; $1.condpart = NULL;
2443 $0->dopart = $1.dopart; $1.dopart = NULL;
2445 | SwitchPart CondSuffix ${
2449 | IfPart IfSuffix ${
2451 $0->condpart = $1.condpart; $1.condpart = NULL;
2452 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2453 // This is where we close an "if" statement
2454 var_block_close(config2context(config), CloseSequential);
2457 CondSuffix -> IfSuffix ${
2459 // This is where we close scope of the whole
2460 // "for" or "while" statement
2461 var_block_close(config2context(config), CloseSequential);
2463 | CasePart CondSuffix ${
2465 $1->next = $0->casepart;
2470 CasePart -> Newlines case Expression OpenScope Block ${
2471 $0 = calloc(1,sizeof(struct casepart));
2474 var_block_close(config2context(config), CloseParallel);
2476 | case Expression OpenScope Block ${
2477 $0 = calloc(1,sizeof(struct casepart));
2480 var_block_close(config2context(config), CloseParallel);
2484 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2485 | Newlines else OpenScope Block ${
2486 $0 = new(cond_statement);
2488 var_block_close(config2context(config), CloseElse);
2490 | else OpenScope Block ${
2491 $0 = new(cond_statement);
2493 var_block_close(config2context(config), CloseElse);
2495 | Newlines else OpenScope CondStatement ${
2496 $0 = new(cond_statement);
2498 var_block_close(config2context(config), CloseElse);
2500 | else OpenScope CondStatement ${
2501 $0 = new(cond_statement);
2503 var_block_close(config2context(config), CloseElse);
2508 // These scopes are closed in CondSuffix
2509 ForPart -> for OpenScope SimpleStatements ${
2510 $0 = reorder_bilist($<3);
2512 | for OpenScope Block ${
2516 ThenPart -> then OpenScope SimpleStatements ${
2517 $0 = reorder_bilist($<3);
2518 var_block_close(config2context(config), CloseSequential);
2520 | then OpenScope Block ${
2522 var_block_close(config2context(config), CloseSequential);
2525 ThenPartNL -> ThenPart OptNL ${
2529 // This scope is closed in CondSuffix
2530 WhileHead -> while OpenScope Block ${
2535 ForThen -> ForPart OptNL ThenPartNL ${
2543 // This scope is closed in CondSuffix
2544 WhilePart -> while OpenScope Expression Block ${
2545 $0.type = Xcond_statement;
2549 | WhileHead OptNL do Block ${
2550 $0.type = Xcond_statement;
2555 IfPart -> if OpenScope Expression OpenScope Block ${
2556 $0.type = Xcond_statement;
2559 var_block_close(config2context(config), CloseParallel);
2561 | if OpenScope Block OptNL then OpenScope Block ${
2562 $0.type = Xcond_statement;
2565 var_block_close(config2context(config), CloseParallel);
2569 // This scope is closed in CondSuffix
2570 SwitchPart -> switch OpenScope Expression ${
2573 | switch OpenScope Block ${
2577 ###### print exec cases
2579 case Xcond_statement:
2581 struct cond_statement *cs = cast(cond_statement, e);
2582 struct casepart *cp;
2584 do_indent(indent, "for");
2585 if (bracket) printf(" {\n"); else printf(":\n");
2586 print_exec(cs->forpart, indent+1, bracket);
2589 do_indent(indent, "} then {\n");
2591 do_indent(indent, "then:\n");
2592 print_exec(cs->thenpart, indent+1, bracket);
2594 if (bracket) do_indent(indent, "}\n");
2598 if (cs->condpart && cs->condpart->type == Xbinode &&
2599 cast(binode, cs->condpart)->op == Block) {
2601 do_indent(indent, "while {\n");
2603 do_indent(indent, "while:\n");
2604 print_exec(cs->condpart, indent+1, bracket);
2606 do_indent(indent, "} do {\n");
2608 do_indent(indent, "do:\n");
2609 print_exec(cs->dopart, indent+1, bracket);
2611 do_indent(indent, "}\n");
2613 do_indent(indent, "while ");
2614 print_exec(cs->condpart, 0, bracket);
2619 print_exec(cs->dopart, indent+1, bracket);
2621 do_indent(indent, "}\n");
2626 do_indent(indent, "switch");
2628 do_indent(indent, "if");
2629 if (cs->condpart && cs->condpart->type == Xbinode &&
2630 cast(binode, cs->condpart)->op == Block) {
2635 print_exec(cs->condpart, indent+1, bracket);
2637 do_indent(indent, "}\n");
2639 do_indent(indent, "then:\n");
2640 print_exec(cs->thenpart, indent+1, bracket);
2644 print_exec(cs->condpart, 0, bracket);
2650 print_exec(cs->thenpart, indent+1, bracket);
2652 do_indent(indent, "}\n");
2657 for (cp = cs->casepart; cp; cp = cp->next) {
2658 do_indent(indent, "case ");
2659 print_exec(cp->value, -1, 0);
2664 print_exec(cp->action, indent+1, bracket);
2666 do_indent(indent, "}\n");
2669 do_indent(indent, "else");
2674 print_exec(cs->elsepart, indent+1, bracket);
2676 do_indent(indent, "}\n");
2681 ###### propagate exec cases
2682 case Xcond_statement:
2684 // forpart and dopart must return Vnone
2685 // thenpart must return Vnone if there is a dopart,
2686 // otherwise it is like elsepart.
2688 // be bool if there is not casepart
2689 // match casepart->values if there is a switchpart
2690 // either be bool or match casepart->value if there
2692 // elsepart, casepart->action must match there return type
2693 // expected of this statement.
2694 struct cond_statement *cs = cast(cond_statement, prog);
2695 struct casepart *cp;
2697 t = propagate_types(cs->forpart, c, ok, Vnone, 0);
2698 if (!vtype_compat(Vnone, t, 0))
2700 t = propagate_types(cs->dopart, c, ok, Vnone, 0);
2701 if (!vtype_compat(Vnone, t, 0))
2704 t = propagate_types(cs->thenpart, c, ok, Vnone, 0);
2705 if (!vtype_compat(Vnone, t, 0))
2708 if (cs->casepart == NULL)
2709 propagate_types(cs->condpart, c, ok, Vbool, 0);
2711 /* Condpart must match case values, with bool permitted */
2713 for (cp = cs->casepart;
2714 cp && (t == Vunknown); cp = cp->next)
2715 t = propagate_types(cp->value, c, ok, Vunknown, 0);
2716 if (t == Vunknown && cs->condpart)
2717 t = propagate_types(cs->condpart, c, ok, Vunknown, 1);
2718 // Now we have a type (I hope) push it down
2719 if (t != Vunknown) {
2720 for (cp = cs->casepart; cp; cp = cp->next)
2721 propagate_types(cp->value, c, ok, t, 0);
2722 propagate_types(cs->condpart, c, ok, t, 1);
2725 // (if)then, else, and case parts must return expected type.
2726 if (!cs->dopart && type == Vunknown)
2727 type = propagate_types(cs->thenpart, c, ok, Vunknown, bool_permitted);
2728 if (type == Vunknown)
2729 type = propagate_types(cs->elsepart, c, ok, Vunknown, bool_permitted);
2730 for (cp = cs->casepart;
2731 cp && type == Vunknown;
2733 type = propagate_types(cp->action, c, ok, Vunknown, bool_permitted);
2734 if (type > Vunknown) {
2736 propagate_types(cs->thenpart, c, ok, type, bool_permitted);
2737 propagate_types(cs->elsepart, c, ok, type, bool_permitted);
2738 for (cp = cs->casepart; cp ; cp = cp->next)
2739 propagate_types(cp->action, c, ok, type, bool_permitted);
2745 ###### interp exec cases
2746 case Xcond_statement:
2748 struct value v, cnd;
2749 struct casepart *cp;
2750 struct cond_statement *c = cast(cond_statement, e);
2753 interp_exec(c->forpart);
2756 cnd = interp_exec(c->condpart);
2759 if (!(cnd.vtype == Vnone ||
2760 (cnd.vtype == Vbool && cnd.bool != 0)))
2762 // cnd is Vnone or Vbool, doesn't need to be freed
2764 interp_exec(c->dopart);
2767 v = interp_exec(c->thenpart);
2768 if (v.vtype != Vnone || !c->dopart)
2772 } while (c->dopart);
2774 for (cp = c->casepart; cp; cp = cp->next) {
2775 v = interp_exec(cp->value);
2776 if (value_cmp(v, cnd) == 0) {
2779 return interp_exec(cp->action);
2785 return interp_exec(c->elsepart);
2790 ### Finally the whole program.
2792 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2793 the keyword "program" and a list of variable names which are assigned
2794 values from command line arguments. Following this is a `block` which
2795 is the code to execute.
2797 As this is the top level, several things are handled a bit
2799 The whole program is not interpreted by `interp_exec` as that isn't
2800 passed the argument list which the program requires. Similarly type
2801 analysis is a bit more interesting at this level.
2806 ###### Parser: grammar
2809 Program -> program OpenScope Varlist Block OptNL ${
2812 $0->left = reorder_bilist($<3);
2814 var_block_close(config2context(config), CloseSequential);
2815 if (config2context(config)->scope_stack) abort();
2818 tok_err(config2context(config),
2819 "error: unhandled parse error.", &$1);
2822 Varlist -> Varlist ArgDecl ${
2831 ArgDecl -> IDENTIFIER ${ {
2832 struct variable *v = var_decl(config2context(config), $1.txt);
2839 ###### print binode cases
2841 do_indent(indent, "program");
2842 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2844 print_exec(b2->left, 0, 0);
2850 print_exec(b->right, indent+1, bracket);
2852 do_indent(indent, "}\n");
2855 ###### propagate binode cases
2856 case Program: abort();
2858 ###### core functions
2860 static int analyse_prog(struct exec *prog, struct parse_context *c)
2862 struct binode *b = cast(binode, prog);
2869 propagate_types(b->right, c, &ok, Vnone, 0);
2874 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2875 struct var *v = cast(var, b->left);
2876 if (v->var->val.vtype == Vunknown) {
2877 v->var->where_set = b;
2878 val_init(&v->var->val, Vstr);
2881 b = cast(binode, prog);
2884 propagate_types(b->right, c, &ok, Vnone, 0);
2889 /* Make sure everything is still consistent */
2890 propagate_types(b->right, c, &ok, Vnone, 0);
2894 static void interp_prog(struct exec *prog, char **argv)
2896 struct binode *p = cast(binode, prog);
2902 al = cast(binode, p->left);
2904 struct var *v = cast(var, al->left);
2905 struct value *vl = &v->var->val;
2907 if (argv[0] == NULL) {
2908 printf("Not enough args\n");
2911 al = cast(binode, al->right);
2913 if (!parse_value(vl, argv[0]))
2917 v = interp_exec(p->right);
2921 ###### interp binode cases
2922 case Program: abort();
2924 ## And now to test it out.
2926 Having a language requires having a "hello world" program. I'll
2927 provide a little more than that: a program that prints "Hello world"
2928 finds the GCD of two numbers, prints the first few elements of
2929 Fibonacci, and performs a binary search for a number.
2931 ###### File: oceani.mk
2934 @echo "===== TEST ====="
2935 ./oceani --section "test: hello" oceani.mdc 55 33
2940 print "Hello World, what lovely oceans you have!"
2941 /* When a variable is defined in both branches of an 'if',
2942 * and used afterwards, the variables are merged.
2948 print "Is", A, "bigger than", B,"? ", bigger
2949 /* If a variable is not used after the 'if', no
2950 * merge happens, so types can be different
2954 print A, "is more than twice", B, "?", double
2957 print "double", A, "is only", double
2966 print "GCD of", A, "and", B,"is", a
2968 print a, "is not positive, cannot calculate GCD"
2970 print b, "is not positive, cannot calculate GCD"
2975 print "Fibonacci:", f1,f2,
2976 then togo = togo - 1
2984 /* Binary search... */
2989 mid := (lo + hi) / 2
3001 print "Yay, I found", target
3003 print "Closest I found was", mid