1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possible with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
213 for (ss = s; ss; ss = ss->next) {
214 struct text sec = ss->section;
215 if (sec.len == strlen(section) &&
216 strncmp(sec.txt, section, sec.len) == 0)
220 prog = parse_oceani(ss->code, &context.config,
221 dotrace ? stderr : NULL);
223 fprintf(stderr, "oceani: cannot find section %s\n",
228 prog = parse_oceani(s->code, &context.config,
229 dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: fatal parser error.\n");
232 context.parse_error = 1;
235 print_exec(*prog, 0, brackets);
236 if (prog && doexec && !context.parse_error) {
237 if (!analyse_prog(*prog, &context)) {
238 fprintf(stderr, "oceani: type error in program - not running.\n");
241 interp_prog(*prog, argv+optind+1);
248 struct section *t = s->next;
254 exit(context.parse_error ? 1 : 0);
259 These four requirements of parse, analyse, print, interpret apply to
260 each language element individually so that is how most of the code
263 Three of the four are fairly self explanatory. The one that requires
264 a little explanation is the analysis step.
266 The current language design does not require (or even allow) the types
267 of variables to be declared, but they must still have a single type.
268 Different operations impose different requirements on the variables,
269 for example addition requires both arguments to be numeric, and
270 assignment requires the variable on the left to have the same type as
271 the expression on the right.
273 Analysis involves propagating these type requirements around and
274 consequently setting the type of each variable. If any requirements
275 are violated (e.g. a string is compared with a number) or if a
276 variable needs to have two different types, then an error is raised
277 and the program will not run.
279 If the same variable is declared in both branchs of an 'if/else', or
280 in all cases of a 'switch' then the multiple instances may be merged
281 into just one variable if the variable is references after the
282 conditional statement. When this happens, the types must naturally be
283 consistent across all the branches. When the variable is not used
284 outside the if, the variables in the different branches are distinct
285 and can be of different types.
287 Determining the types of all variables early is important for
288 processing command line arguments. These can be assigned to any type
289 of variable, but we must first know the correct type so any required
290 conversion can happen. If a variable is associated with a command
291 line argument but no type can be interpreted (e.g. the variable is
292 only ever used in a `print` statement), then the type is set to
295 Undeclared names may only appear in "use" statements and "case" expressions.
296 These names are given a type of "label" and a unique value.
297 This allows them to fill the role of a name in an enumerated type, which
298 is useful for testing the `switch` statement.
300 As we will see, the condition part of a `while` statement can return
301 either a Boolean or some other type. This requires that the expect
302 type that gets passed around comprises a type (`enum vtype`) and a
303 flag to indicate that `Vbool` is also permitted.
305 As there are, as yet, no distinct types that are compatible, there
306 isn't much subtlety in the analysis. When we have distinct number
307 types, this will become more interesting.
311 When analysis discovers an inconsistency it needs to report an error;
312 just refusing to run the code esure that the error doesn't cascade,
313 but by itself it isn't very useful. A clear understand of the sort of
314 error message that are useful will help guide the process of analysis.
316 At a simplistic level, the only sort of error that type analysis can
317 report is that the type of some construct doesn't match a contextual
318 requirement. For example, in `4 + "hello"` the addition provides a
319 contextual requirement for numbers, but `"hello"` is not a number. In
320 this particular example no further information is needed as the types
321 are obvious from local information. When a variable is involved that
322 isn't the case. It may be helpful to explain why the variable has a
323 particular type, by indicating the location where the type was set,
324 whether by declaration or usage.
326 Using a recursive-descent analysis we can easily detect a problem at
327 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
328 will detect that one argument is not a number and the usage of `hello`
329 will detect that a number was wanted, but not provided. In this
330 (early) version of the language, we will generate error reports at
331 multiple locations, to the use of `hello` will report an error and
332 explain were the value was set, and the addition will report an error
333 and say why numbers are needed. To be able to report locations for
334 errors, each language element will need to record a file location
335 (line and column) and each variable will need to record the language
336 element where its type was set. For now we will assume that each line
337 of an error message indicates one location in the file, and up to 2
338 types. So we provide a `printf`-like function which takes a format, a
339 language (a `struct exec` which has not yet been introduced), and 2
340 types. "`$1`" reports the first type, "`$2`" reports the second. We
341 will need a function to print the location, once we know how that is
346 static void fput_loc(struct exec *loc, FILE *f);
348 ###### core functions
350 static void type_err(struct parse_context *c,
351 char *fmt, struct exec *loc,
352 enum vtype t1, enum vtype t2)
354 fprintf(stderr, "%s:", c->file_name);
355 fput_loc(loc, stderr);
356 for (; *fmt ; fmt++) {
363 case '%': fputc(*fmt, stderr); break;
364 default: fputc('?', stderr); break;
366 fputs(vtype_names[t1], stderr);
369 fputs(vtype_names[t2], stderr);
380 One last introductory step before detailing the language elements and
381 providing their four requirements is to establish the data structures
382 to store these elements.
384 There are two key objects that we need to work with: executable
385 elements which comprise the program, and values which the program
386 works with. Between these are the variables in their various scopes
387 which hold the values.
391 Values can be numbers, which we represent as multi-precision
392 fractions, strings, Booleans and labels. When analysing the program
393 we also need to allow for places where no value is meaningful
394 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`
395 which can be anything and `Vnolabel` which can be anything except a
396 label). A 2 character 'tail' is included in each value as the scanner
397 wants to parse that from the end of numbers and we need somewhere to
398 put it. It is currently ignored but one day might allow for
399 e.g. "imaginary" numbers.
401 Values are never shared, they are always copied when used, and freed
402 when no longer needed.
404 When propagating type information around the program, we need to
405 determine if two types are compatible, where `Vunknown` is compatible
406 which anything, and `Vnolabel` is compatible with anything except a
407 label. A separate funtion to encode this rule will simplify some code
410 When assigning command line arguments to variable, we need to be able
411 to parse each type from a string.
419 myLDLIBS := libnumber.o libstring.o -lgmp
420 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
424 enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
434 char *vtype_names[] = {"nolabel", "unknown", "none", "string",
435 "number", "Boolean", "label"};
438 static void free_value(struct value v)
443 case Vunknown: break;
444 case Vstr: free(v.str.txt); break;
445 case Vnum: mpq_clear(v.num); break;
451 static int vtype_compat(enum vtype require, enum vtype have, int bool_permitted)
453 if (bool_permitted && have == Vbool)
457 return have != Vlabel;
461 return have == Vunknown || require == have;
465 ###### value functions
467 static void val_init(struct value *val, enum vtype type)
473 case Vunknown: break;
475 mpq_init(val->num); break;
477 val->str.txt = malloc(1);
489 static struct value dup_value(struct value v)
496 case Vunknown: break;
505 mpq_set(rv.num, v.num);
508 rv.str.len = v.str.len;
509 rv.str.txt = malloc(rv.str.len);
510 memcpy(rv.str.txt, v.str.txt, v.str.len);
516 static int value_cmp(struct value left, struct value right)
519 if (left.vtype != right.vtype)
520 return left.vtype - right.vtype;
521 switch (left.vtype) {
522 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
523 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
524 case Vstr: cmp = text_cmp(left.str, right.str); break;
525 case Vbool: cmp = left.bool - right.bool; break;
528 case Vunknown: cmp = 0;
533 static struct text text_join(struct text a, struct text b)
536 rv.len = a.len + b.len;
537 rv.txt = malloc(rv.len);
538 memcpy(rv.txt, a.txt, a.len);
539 memcpy(rv.txt+a.len, b.txt, b.len);
543 static void print_value(struct value v)
547 printf("*Unknown*"); break;
550 printf("*no-value*"); break;
552 printf("*label-%p*", v.label); break;
554 printf("%.*s", v.str.len, v.str.txt); break;
556 printf("%s", v.bool ? "True":"False"); break;
561 mpf_set_q(fl, v.num);
562 gmp_printf("%Fg", fl);
569 static int parse_value(struct value *vl, char *arg)
580 vl->str.len = strlen(arg);
581 vl->str.txt = malloc(vl->str.len);
582 memcpy(vl->str.txt, arg, vl->str.len);
589 tx.txt = arg; tx.len = strlen(tx.txt);
590 if (number_parse(vl->num, vl->tail, tx) == 0)
593 mpq_neg(vl->num, vl->num);
596 if (strcasecmp(arg, "true") == 0 ||
597 strcmp(arg, "1") == 0)
599 else if (strcasecmp(arg, "false") == 0 ||
600 strcmp(arg, "0") == 0)
603 printf("Bad bool: %s\n", arg);
613 Variables are scoped named values. We store the names in a linked
614 list of "bindings" sorted lexically, and use sequential search and
621 struct binding *next; // in lexical order
625 This linked list is stored in the parse context so that "reduce"
626 functions can find or add variables, and so the analysis phase can
627 ensure that every variable gets a type.
631 struct binding *varlist; // In lexical order
635 static struct binding *find_binding(struct parse_context *c, struct text s)
637 struct binding **l = &c->varlist;
642 (cmp = text_cmp((*l)->name, s)) < 0)
646 n = calloc(1, sizeof(*n));
653 Each name can be linked to multiple variables defined in different
654 scopes. Each scope starts where the name is declared and continues
655 until the end of the containing code block. Scopes of a given name
656 cannot nest, so a declaration while a name is in-scope is an error.
658 ###### binding fields
659 struct variable *var;
663 struct variable *previous;
665 struct binding *name;
666 struct exec *where_set; // where type was set
670 While the naming seems strange, we include local constants in the
671 definition of variables. A name declared `var := value` can
672 subsequently be changed, but a name declared `var ::= value` cannot -
675 ###### variable fields
678 Scopes in parallel branches can be partially merged. More
679 specifically, if a given name is declared in both branches of an
680 if/else then it's scope is a candidate for merging. Similarly if
681 every branch of an exhaustive switch (e.g. has an "else" clause)
682 declares a given name, then the scopes from the branches are
683 candidates for merging.
685 Note that names declared inside a loop (which is only parallel to
686 itself) are never visible after the loop. Similarly names defined in
687 scopes which are not parallel, such as those started by `for` and
688 `switch`, are never visible after the scope. Only variable defined in
689 both `then` and `else` (including the implicit then after an `if`, and
690 excluding `then` used with `for`) and in all `case`s and `else` of a
691 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
693 Labels, which are a bit like variables, follow different rules.
694 Labels are not explicitly declared, but if an undeclared name appears
695 in a context where a label is legal, that effectively declares the
696 name as a label. The declaration remains in force (or in scope) at
697 least to the end of the immediately containing block and conditionally
698 in any larger containing block which does not declare the name in some
699 other way. Importantly, the conditional scope extension happens even
700 if the label is only used in parallel branch of a conditional -- when
701 used in one branch it is treated as having been declared in all
704 Merge candidates are tentatively visible beyond the end of the
705 branching statement which creates them. If the name is used, the
706 merge is affirmed and they become a single variable visible at the
707 outer layer. If not - if it is redeclared first - the merge lapses.
709 To track scopes we have an extra stack, implemented as a linked list,
710 which roughly parallels the parse stack and which is used exclusively
711 for scoping. When a new scope is opened, a new frame is pushed and
712 the child-count of the parent frame is incremented. This child-count
713 is used to distinguish between the first of a set of parallel scopes,
714 in which declared variables must not be in scope, and subsequent
715 branches, whether they must already be conditionally scoped.
717 To push a new frame *before* any code in the frame is parsed, we need a
718 grammar reduction. This is most easily achieved with a grammar
719 element which derives the empty string, and created the new scope when
720 it is recognized. This can be placed, for example, between a keyword
721 like "if" and the code following it.
725 struct scope *parent;
731 struct scope *scope_stack;
734 static void scope_pop(struct parse_context *c)
736 struct scope *s = c->scope_stack;
738 c->scope_stack = s->parent;
743 static void scope_push(struct parse_context *c)
745 struct scope *s = calloc(1, sizeof(*s));
747 c->scope_stack->child_count += 1;
748 s->parent = c->scope_stack;
756 OpenScope -> ${ scope_push(config2context(config)); }$
759 Each variable records a scope depth and is in one of four states:
761 - "in scope". This is the case between the declaration of the
762 variable and the end of the containing block, and also between
763 the usage with affirms a merge and the end of the block.
765 The scope depth is not greater than the current parse context scope
766 nest depth. When the block of that depth closes, the state will
767 change. To achieve this, all "in scope" variables are linked
768 together as a stack in nesting order.
770 - "pending". The "in scope" block has closed, but other parallel
771 scopes are still being processed. So far, every parallel block at
772 the same level that has closed has declared the name.
774 The scope depth is the depth of the last parallel block that
775 enclosed the declaration, and that has closed.
777 - "conditionally in scope". The "in scope" block and all parallel
778 scopes have closed, and no further mention of the name has been
779 seen. This state includes a secondary nest depth which records the
780 outermost scope seen since the variable became conditionally in
781 scope. If a use of the name is found, the variable becomes "in
782 scope" and that secondary depth becomes the recorded scope depth.
783 If the name is declared as a new variable, the old variable becomes
784 "out of scope" and the recorded scope depth stays unchanged.
786 - "out of scope". The variable is neither in scope nor conditionally
787 in scope. It is permanently out of scope now and can be removed from
788 the "in scope" stack.
791 ###### variable fields
792 int depth, min_depth;
793 enum { OutScope, PendingScope, CondScope, InScope } scope;
794 struct variable *in_scope;
798 struct variable *in_scope;
800 All variables with the same name are linked together using the
801 'previous' link. Those variable that have
802 been affirmatively merged all have a 'merged' pointer that points to
803 one primary variable - the most recently declared instance. When
804 merging variables, we need to also adjust the 'merged' pointer on any
805 other variables that had previously been merged with the one that will
806 no longer be primary.
808 ###### variable fields
809 struct variable *merged;
813 static void variable_merge(struct variable *primary, struct variable *secondary)
819 primary = primary->merged;
821 for (v = primary->previous; v; v=v->previous)
822 if (v == secondary || v == secondary->merged ||
823 v->merged == secondary ||
824 (v->merged && v->merged == secondary->merged)) {
832 while (context.varlist) {
833 struct binding *b = context.varlist;
834 struct variable *v = b->var;
835 context.varlist = b->next;
838 struct variable *t = v;
846 #### Manipulating Bindings
848 When a name is conditionally visible, a new declaration discards the
849 old binding - the condition lapses. Conversely a usage of the name
850 affirms the visibility and extends it to the end of the containing
851 block - i.e. the block that contains both the original declaration and
852 the latest usage. This is determined from `min_depth`. When a
853 conditionally visible variable gets affirmed like this, it is also
854 merged with other conditionally visible variables with the same name.
856 When we parse a variable declaration we either signal an error if the
857 name is currently bound, or create a new variable at the current nest
858 depth if the name is unbound or bound to a conditionally scoped or
859 pending-scope variable. If the previous variable was conditionally
860 scoped, it and its homonyms becomes out-of-scope.
862 When we parse a variable reference (including non-declarative
863 assignment) we signal an error if the name is not bound or is bound to
864 a pending-scope variable; update the scope if the name is bound to a
865 conditionally scoped variable; or just proceed normally if the named
866 variable is in scope.
868 When we exit a scope, any variables bound at this level are either
869 marked out of scope or pending-scoped, depending on whether the
870 scope was sequential or parallel.
872 When exiting a parallel scope we check if there are any variables that
873 were previously pending and are still visible. If there are, then
874 there weren't redeclared in the most recent scope, so they cannot be
875 merged and must become out-of-scope. If it is not the first of
876 parallel scopes (based on `child_count`), we check that there was a
877 previous binding that is still pending-scope. If there isn't, the new
878 variable must now be out-of-scope.
880 When exiting a sequential scope that immediately enclosed parallel
881 scopes, we need to resolve any pending-scope variables. If there was
882 no `else` clause, and we cannot determine that the `switch` was exhaustive,
883 we need to mark all pending-scope variable as out-of-scope. Otherwise
884 all pending-scope variables become conditionally scoped.
887 enum closetype { CloseSequential, CloseParallel, CloseElse };
891 static struct variable *var_decl(struct parse_context *c, struct text s)
893 struct binding *b = find_binding(c, s);
894 struct variable *v = b->var;
896 switch (v ? v->scope : OutScope) {
898 /* Signal error ... once I build error signalling support */
902 v && v->scope == CondScope;
908 v = calloc(1, sizeof(*v));
909 v->previous = b->var;
912 v->min_depth = v->depth = c->scope_depth;
914 v->in_scope = c->in_scope;
916 val_init(&v->val, Vunknown);
920 static struct variable *var_ref(struct parse_context *c, struct text s)
922 struct binding *b = find_binding(c, s);
923 struct variable *v = b->var;
926 switch (v ? v->scope : OutScope) {
929 /* Signal an error - once that is possible */
932 /* All CondScope variables of this name need to be merged
935 v->depth = v->min_depth;
937 for (v2 = v->previous;
938 v2 && v2->scope == CondScope;
940 variable_merge(v, v2);
948 static void var_block_close(struct parse_context *c, enum closetype ct)
950 /* close of all variables that are in_scope */
951 struct variable *v, **vp, *v2;
954 for (vp = &c->in_scope;
955 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
959 case CloseParallel: /* handle PendingScope */
963 if (c->scope_stack->child_count == 1)
964 v->scope = PendingScope;
965 else if (v->previous &&
966 v->previous->scope == PendingScope)
967 v->scope = PendingScope;
968 else if (v->val.vtype == Vlabel)
969 v->scope = PendingScope;
970 else if (v->name->var == v)
972 if (ct == CloseElse) {
973 /* All Pending variables with this name
974 * are now Conditional */
976 v2 && v2->scope == PendingScope;
978 v2->scope = CondScope;
983 v2 && v2->scope == PendingScope;
985 if (v2->val.vtype != Vlabel)
986 v2->scope = OutScope;
988 case OutScope: break;
991 case CloseSequential:
992 if (v->val.vtype == Vlabel)
993 v->scope = PendingScope;
999 /* There was no 'else', so we can only become
1000 * conditional if we know the cases were exhaustive,
1001 * and that doesn't mean anything yet.
1002 * So only labels become conditional..
1005 v2 && v2->scope == PendingScope;
1007 if (v2->val.vtype == Vlabel) {
1008 v2->scope = CondScope;
1009 v2->min_depth = c->scope_depth;
1011 v2->scope = OutScope;
1014 case OutScope: break;
1018 if (v->scope == OutScope)
1027 Executables can be lots of different things. In many cases an
1028 executable is just an operation combined with one or two other
1029 executables. This allows for expressions and lists etc. Other times
1030 an executable is something quite specific like a constant or variable
1031 name. So we define a `struct exec` to be a general executable with a
1032 type, and a `struct binode` which is a subclass of `exec` and forms a
1033 node in a binary tree and holding an operation. There will be other
1034 subclasses, and to access these we need to be able to `cast` the
1035 `exec` into the various other types.
1038 #define cast(structname, pointer) ({ \
1039 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1040 if (__mptr && *__mptr != X##structname) abort(); \
1041 (struct structname *)( (char *)__mptr);})
1043 #define new(structname) ({ \
1044 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1045 __ptr->type = X##structname; \
1046 __ptr->line = -1; __ptr->column = -1; \
1049 #define new_pos(structname, token) ({ \
1050 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1051 __ptr->type = X##structname; \
1052 __ptr->line = token.line; __ptr->column = token.col; \
1061 enum exec_types type;
1069 struct exec *left, *right;
1072 ###### ast functions
1074 static int __fput_loc(struct exec *loc, FILE *f)
1076 if (loc->line >= 0) {
1077 fprintf(f, "%d:%d: ", loc->line, loc->column);
1080 if (loc->type == Xbinode)
1081 return __fput_loc(cast(binode,loc)->left, f) ||
1082 __fput_loc(cast(binode,loc)->right, f);
1085 static void fput_loc(struct exec *loc, FILE *f)
1087 if (!__fput_loc(loc, f))
1088 fprintf(f, "??:??: ");
1091 Each different type of `exec` node needs a number of functions
1092 defined, a bit like methods. We must be able to be able to free it,
1093 print it, analyse it and execute it. Once we have specific `exec`
1094 types we will need to parse them too. Let's take this a bit more
1099 The parser generator requires a `free_foo` function for each struct
1100 that stores attributes and they will be `exec`s of subtypes there-of.
1101 So we need `free_exec` which can handle all the subtypes, and we need
1104 ###### ast functions
1106 static void free_binode(struct binode *b)
1111 free_exec(b->right);
1115 ###### core functions
1116 static void free_exec(struct exec *e)
1125 ###### forward decls
1127 static void free_exec(struct exec *e);
1129 ###### free exec cases
1130 case Xbinode: free_binode(cast(binode, e)); break;
1134 Printing an `exec` requires that we know the current indent level for
1135 printing line-oriented components. As will become clear later, we
1136 also want to know what sort of bracketing to use.
1138 ###### ast functions
1140 static void do_indent(int i, char *str)
1147 ###### core functions
1148 static void print_binode(struct binode *b, int indent, int bracket)
1152 ## print binode cases
1156 static void print_exec(struct exec *e, int indent, int bracket)
1162 print_binode(cast(binode, e), indent, bracket); break;
1167 ###### forward decls
1169 static void print_exec(struct exec *e, int indent, int bracket);
1173 As discussed, analysis involves propagating type requirements around
1174 the program and looking for errors.
1176 So `propagate_types` is passed an expected type (being a `vtype`
1177 together with a `bool_permitted` flag) that the `exec` is expected to
1178 return, and returns the type that it does return, either of which can
1179 be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
1180 when an error is found, and `2` when any change is made. If it
1181 remains unchanged at `1`, then no more propagation is needed.
1183 ###### core functions
1185 static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1186 enum vtype type, int bool_permitted)
1193 switch (prog->type) {
1196 struct binode *b = cast(binode, prog);
1198 ## propagate binode cases
1202 ## propagate exec cases
1209 Interpreting an `exec` doesn't require anything but the `exec`. State
1210 is stored in variables and each variable will be directly linked from
1211 within the `exec` tree. The exception to this is the whole `program`
1212 which needs to look at command line arguments. The `program` will be
1213 interpreted separately.
1215 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1217 ###### core functions
1219 static struct value interp_exec(struct exec *e)
1229 struct binode *b = cast(binode, e);
1230 struct value left, right;
1231 left.vtype = right.vtype = Vnone;
1233 ## interp binode cases
1235 free_value(left); free_value(right);
1238 ## interp exec cases
1243 ## Language elements
1245 Each language element needs to be parsed, printed, analysed,
1246 interpreted, and freed. There are several, so let's just start with
1247 the easy ones and work our way up.
1251 We have already met values as separate objects. When manifest
1252 constants appear in the program text that must result in an executable
1253 which has a constant value. So the `val` structure embeds a value in
1269 $0 = new_pos(val, $1);
1270 $0->val.vtype = Vbool;
1274 $0 = new_pos(val, $1);
1275 $0->val.vtype = Vbool;
1279 $0 = new_pos(val, $1);
1280 $0->val.vtype = Vnum;
1281 if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
1282 mpq_init($0->val.num);
1285 $0 = new_pos(val, $1);
1286 $0->val.vtype = Vstr;
1287 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1290 $0 = new_pos(val, $1);
1291 $0->val.vtype = Vstr;
1292 string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
1295 ###### print exec cases
1298 struct val *v = cast(val, e);
1299 if (v->val.vtype == Vstr)
1301 print_value(v->val);
1302 if (v->val.vtype == Vstr)
1307 ###### propagate exec cases
1310 struct val *val = cast(val, prog);
1311 if (!vtype_compat(type, val->val.vtype, bool_permitted)) {
1312 type_err(c, "error: expected %1 found %2",
1313 prog, type, val->val.vtype);
1316 return val->val.vtype;
1319 ###### interp exec cases
1321 return dup_value(cast(val, e)->val);
1323 ###### ast functions
1324 static void free_val(struct val *v)
1332 ###### free exec cases
1333 case Xval: free_val(cast(val, e)); break;
1335 ###### ast functions
1336 // Move all nodes from 'b' to 'rv', reversing the order.
1337 // In 'b' 'left' is a list, and 'right' is the last node.
1338 // In 'rv', left' is the first node and 'right' is a list.
1339 static struct binode *reorder_bilist(struct binode *b)
1341 struct binode *rv = NULL;
1344 struct exec *t = b->right;
1348 b = cast(binode, b->left);
1358 Just as we used as `val` to wrap a value into an `exec`, we similarly
1359 need a `var` to wrap a `variable` into an exec. While each `val`
1360 contained a copy of the value, each `var` hold a link to the variable
1361 because it really is the same variable no matter where it appears.
1362 When a variable is used, we need to remember to follow the `->merged`
1363 link to find the primary instance.
1371 struct variable *var;
1377 VariableDecl -> IDENTIFIER := ${ {
1378 struct variable *v = var_decl(config2context(config), $1.txt);
1379 $0 = new_pos(var, $1);
1382 | IDENTIFIER ::= ${ {
1383 struct variable *v = var_decl(config2context(config), $1.txt);
1385 $0 = new_pos(var, $1);
1389 Variable -> IDENTIFIER ${ {
1390 struct variable *v = var_ref(config2context(config), $1.txt);
1391 $0 = new_pos(var, $1);
1393 /* This might be a label - allocate a var just in case */
1394 v = var_decl(config2context(config), $1.txt);
1396 val_init(&v->val, Vlabel);
1403 ###### print exec cases
1406 struct var *v = cast(var, e);
1408 struct binding *b = v->var->name;
1409 printf("%.*s", b->name.len, b->name.txt);
1416 if (loc->type == Xvar) {
1417 struct var *v = cast(var, loc);
1419 struct binding *b = v->var->name;
1420 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1422 fputs("???", stderr);
1424 fputs("NOTVAR", stderr);
1427 ###### propagate exec cases
1431 struct var *var = cast(var, prog);
1432 struct variable *v = var->var;
1434 type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone);
1440 if (v->val.vtype == Vunknown) {
1441 if (type > Vunknown && *ok != 0) {
1442 val_init(&v->val, type);
1443 v->where_set = prog;
1448 if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
1449 type_err(c, "error: expected %1 but variable %v is %2", prog,
1450 type, v->val.vtype);
1451 type_err(c, "info: this is where %v was set to %1", v->where_set,
1452 v->val.vtype, Vnone);
1455 if (type <= Vunknown)
1456 return v->val.vtype;
1460 ###### interp exec cases
1463 struct var *var = cast(var, e);
1464 struct variable *v = var->var;
1468 return dup_value(v->val);
1471 ###### ast functions
1473 static void free_var(struct var *v)
1478 ###### free exec cases
1479 case Xvar: free_var(cast(var, e)); break;
1481 ### Expressions: Boolean
1483 Our first user of the `binode` will be expressions, and particularly
1484 Boolean expressions. As I haven't implemented precedence in the
1485 parser generator yet, we need different names from each precedence
1486 level used by expressions. The outer most or lowest level precedence
1487 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1498 Expression -> Expression or BTerm ${ {
1499 struct binode *b = new(binode);
1505 | BTerm ${ $0 = $<1; }$
1507 BTerm -> BTerm and BFact ${ {
1508 struct binode *b = new(binode);
1514 | BFact ${ $0 = $<1; }$
1516 BFact -> not BFact ${ {
1517 struct binode *b = new(binode);
1524 ###### print binode cases
1526 print_exec(b->left, -1, 0);
1528 print_exec(b->right, -1, 0);
1531 print_exec(b->left, -1, 0);
1533 print_exec(b->right, -1, 0);
1537 print_exec(b->right, -1, 0);
1540 ###### propagate binode cases
1544 /* both must be Vbool, result is Vbool */
1545 propagate_types(b->left, c, ok, Vbool, 0);
1546 propagate_types(b->right, c, ok, Vbool, 0);
1547 if (type != Vbool && type > Vunknown) {
1548 type_err(c, "error: %1 operation found where %2 expected", prog,
1554 ###### interp binode cases
1556 rv = interp_exec(b->left);
1557 right = interp_exec(b->right);
1558 rv.bool = rv.bool && right.bool;
1561 rv = interp_exec(b->left);
1562 right = interp_exec(b->right);
1563 rv.bool = rv.bool || right.bool;
1566 rv = interp_exec(b->right);
1570 ### Expressions: Comparison
1572 Of slightly higher precedence that Boolean expressions are
1574 A comparison takes arguments of any type, but the two types must be
1577 To simplify the parsing we introduce an `eop` which can return an
1578 expression operator.
1585 ###### ast functions
1586 static void free_eop(struct eop *e)
1601 | Expr CMPop Expr ${ {
1602 struct binode *b = new(binode);
1608 | Expr ${ $0 = $<1; }$
1613 CMPop -> < ${ $0.op = Less; }$
1614 | > ${ $0.op = Gtr; }$
1615 | <= ${ $0.op = LessEq; }$
1616 | >= ${ $0.op = GtrEq; }$
1617 | == ${ $0.op = Eql; }$
1618 | != ${ $0.op = NEql; }$
1620 ###### print binode cases
1628 print_exec(b->left, -1, 0);
1630 case Less: printf(" < "); break;
1631 case LessEq: printf(" <= "); break;
1632 case Gtr: printf(" > "); break;
1633 case GtrEq: printf(" >= "); break;
1634 case Eql: printf(" == "); break;
1635 case NEql: printf(" != "); break;
1638 print_exec(b->right, -1, 0);
1641 ###### propagate binode cases
1648 /* Both must match but not labels, result is Vbool */
1649 t = propagate_types(b->left, c, ok, Vnolabel, 0);
1651 propagate_types(b->right, c, ok, t, 0);
1653 t = propagate_types(b->right, c, ok, Vnolabel, 0);
1655 t = propagate_types(b->left, c, ok, t, 0);
1657 if (!vtype_compat(type, Vbool, 0)) {
1658 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1664 ###### interp binode cases
1673 left = interp_exec(b->left);
1674 right = interp_exec(b->right);
1675 cmp = value_cmp(left, right);
1678 case Less: rv.bool = cmp < 0; break;
1679 case LessEq: rv.bool = cmp <= 0; break;
1680 case Gtr: rv.bool = cmp > 0; break;
1681 case GtrEq: rv.bool = cmp >= 0; break;
1682 case Eql: rv.bool = cmp == 0; break;
1683 case NEql: rv.bool = cmp != 0; break;
1684 default: rv.bool = 0; break;
1689 ### Expressions: The rest
1691 The remaining expressions with the highest precedence are arithmetic
1692 and string concatenation. There are `Expr`, `Term`, and `Factor`.
1693 The `Factor` is where the `Value` and `Variable` that we already have
1696 `+` and `-` are both infix and prefix operations (where they are
1697 absolute value and negation). These have different operator names.
1699 We also have a 'Bracket' operator which records where parentheses were
1700 found. This make it easy to reproduce these when printing. Once
1701 precedence is handled better I might be able to discard this.
1713 Expr -> Expr Eop Term ${ {
1714 struct binode *b = new(binode);
1720 | Term ${ $0 = $<1; }$
1722 Term -> Term Top Factor ${ {
1723 struct binode *b = new(binode);
1729 | Factor ${ $0 = $<1; }$
1731 Factor -> ( Expression ) ${ {
1732 struct binode *b = new_pos(binode, $1);
1738 struct binode *b = new(binode);
1743 | Value ${ $0 = $<1; }$
1744 | Variable ${ $0 = $<1; }$
1747 Eop -> + ${ $0.op = Plus; }$
1748 | - ${ $0.op = Minus; }$
1750 Uop -> + ${ $0.op = Absolute; }$
1751 | - ${ $0.op = Negate; }$
1753 Top -> * ${ $0.op = Times; }$
1754 | / ${ $0.op = Divide; }$
1755 | ++ ${ $0.op = Concat; }$
1757 ###### print binode cases
1763 print_exec(b->left, indent, 0);
1765 case Plus: printf(" + "); break;
1766 case Minus: printf(" - "); break;
1767 case Times: printf(" * "); break;
1768 case Divide: printf(" / "); break;
1769 case Concat: printf(" ++ "); break;
1772 print_exec(b->right, indent, 0);
1776 print_exec(b->right, indent, 0);
1780 print_exec(b->right, indent, 0);
1784 print_exec(b->right, indent, 0);
1788 ###### propagate binode cases
1793 /* both must be numbers, result is Vnum */
1796 /* as propagate_types ignores a NULL,
1797 * unary ops fit here too */
1798 propagate_types(b->left, c, ok, Vnum, 0);
1799 propagate_types(b->right, c, ok, Vnum, 0);
1800 if (!vtype_compat(type, Vnum, 0)) {
1801 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
1808 /* both must be Vstr, result is Vstr */
1809 propagate_types(b->left, c, ok, Vstr, 0);
1810 propagate_types(b->right, c, ok, Vstr, 0);
1811 if (!vtype_compat(type, Vstr, 0)) {
1812 type_err(c, "error: Concat returns %1 but %2 expected", prog,
1819 return propagate_types(b->right, c, ok, type, 0);
1821 ###### interp binode cases
1824 rv = interp_exec(b->left);
1825 right = interp_exec(b->right);
1826 mpq_add(rv.num, rv.num, right.num);
1829 rv = interp_exec(b->left);
1830 right = interp_exec(b->right);
1831 mpq_sub(rv.num, rv.num, right.num);
1834 rv = interp_exec(b->left);
1835 right = interp_exec(b->right);
1836 mpq_mul(rv.num, rv.num, right.num);
1839 rv = interp_exec(b->left);
1840 right = interp_exec(b->right);
1841 mpq_div(rv.num, rv.num, right.num);
1844 rv = interp_exec(b->right);
1845 mpq_neg(rv.num, rv.num);
1848 rv = interp_exec(b->right);
1849 mpq_abs(rv.num, rv.num);
1852 rv = interp_exec(b->right);
1855 left = interp_exec(b->left);
1856 right = interp_exec(b->right);
1858 rv.str = text_join(left.str, right.str);
1861 ### Blocks, Statements, and Statement lists.
1863 Now that we have expressions out of the way we need to turn to
1864 statements. There are simple statements and more complex statements.
1865 Simple statements do not contain newlines, complex statements do.
1867 Statements often come in sequences and we have corresponding simple
1868 statement lists and complex statement lists.
1869 The former comprise only simple statements separated by semicolons.
1870 The later comprise complex statements and simple statement lists. They are
1871 separated by newlines. Thus the semicolon is only used to separate
1872 simple statements on the one line. This may be overly restrictive,
1873 but I'm not sure I every want a complex statement to share a line with
1876 Note that a simple statement list can still use multiple lines if
1877 subsequent lines are indented, so
1879 ###### Example: wrapped simple statement list
1884 is a single simple statement list. This might allow room for
1885 confusion, so I'm not set on it yet.
1887 A simple statement list needs no extra syntax. A complex statement
1888 list has two syntactic forms. It can be enclosed in braces (much like
1889 C blocks), or it can be introduced by a colon and continue until an
1890 unindented newline (much like Python blocks). With this extra syntax
1891 it is referred to as a block.
1893 Note that a block does not have to include any newlines if it only
1894 contains simple statements. So both of:
1896 if condition: a=b; d=f
1898 if condition { a=b; print f }
1902 In either case the list is constructed from a `binode` list with
1903 `Block` as the operator. When parsing the list it is most convenient
1904 to append to the end, so a list is a list and a statement. When using
1905 the list it is more convenient to consider a list to be a statement
1906 and a list. So we need a function to re-order a list.
1907 `reorder_bilist` serves this purpose.
1909 The only stand-alone statement we introduce at this stage is `pass`
1910 which does nothing and is represented as a `NULL` pointer in a `Block`
1930 Block -> Open Statementlist Close ${ $0 = $<2; }$
1931 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1932 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1933 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1934 | : Statementlist ${ $0 = $<2; }$
1935 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1937 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1939 ComplexStatements -> ComplexStatements ComplexStatement ${
1945 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1946 | ComplexStatement ${
1954 ComplexStatement -> SimpleStatements NEWLINE ${
1955 $0 = reorder_bilist($<1);
1957 ## ComplexStatement Grammar
1960 SimpleStatements -> SimpleStatements ; SimpleStatement ${
1966 | SimpleStatement ${
1972 | SimpleStatements ; ${ $0 = $<1; }$
1974 SimpleStatement -> pass ${ $0 = NULL; }$
1975 ## SimpleStatement Grammar
1977 ###### print binode cases
1981 if (b->left == NULL)
1984 print_exec(b->left, indent, 0);
1987 print_exec(b->right, indent, 0);
1990 // block, one per line
1991 if (b->left == NULL)
1992 do_indent(indent, "pass\n");
1994 print_exec(b->left, indent, bracket);
1996 print_exec(b->right, indent, bracket);
2000 ###### propagate binode cases
2003 /* If any statement returns something other then Vnone
2004 * or Vbool then all such must return same type.
2005 * As each statement may be Vnone or something else,
2006 * we must always pass Vunknown down, otherwise an incorrect
2007 * error might occur. We never return Vnone unless it is
2012 for (e = b; e; e = cast(binode, e->right)) {
2013 t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
2014 if (bool_permitted && t == Vbool)
2016 if (t != Vunknown && t != Vnone && t != Vbool) {
2017 if (type == Vunknown)
2019 else if (t != type) {
2020 type_err(c, "error: expected %1, found %2",
2029 ###### interp binode cases
2031 while (rv.vtype == Vnone &&
2034 rv = interp_exec(b->left);
2035 b = cast(binode, b->right);
2039 ### The Print statement
2041 `print` is a simple statement that takes a comma-separated list of
2042 expressions and prints the values separated by spaces and terminated
2043 by a newline. No control of formatting is possible.
2045 `print` faces the same list-ordering issue as blocks, and uses the
2051 ###### SimpleStatement Grammar
2053 | print ExpressionList ${
2054 $0 = reorder_bilist($<2);
2056 | print ExpressionList , ${
2061 $0 = reorder_bilist($0);
2072 ExpressionList -> ExpressionList , Expression ${
2085 ###### print binode cases
2088 do_indent(indent, "print");
2092 print_exec(b->left, -1, 0);
2096 b = cast(binode, b->right);
2102 ###### propagate binode cases
2105 /* don't care but all must be consistent */
2106 propagate_types(b->left, c, ok, Vnolabel, 0);
2107 propagate_types(b->right, c, ok, Vnolabel, 0);
2110 ###### interp binode cases
2116 for ( ; b; b = cast(binode, b->right))
2120 left = interp_exec(b->left);
2133 ###### Assignment statement
2135 An assignment will assign a value to a variable, providing it hasn't
2136 be declared as a constant. The analysis phase ensures that the type
2137 will be correct so the interpreter just needs to perform the
2138 calculation. There is a form of assignment which declares a new
2139 variable as well as assigning a value. If a name is assigned before
2140 it is declared, and error will be raised as the name is created as
2141 `Vlabel` and it is illegal to assign to such names.
2147 ###### SimpleStatement Grammar
2148 | Variable = Expression ${ {
2149 struct var *v = cast(var, $1);
2155 if (v->var && !v->var->constant) {
2159 | VariableDecl Expression ${
2166 ###### print binode cases
2169 do_indent(indent, "");
2170 print_exec(b->left, indent, 0);
2172 print_exec(b->right, indent, 0);
2178 do_indent(indent, "");
2179 print_exec(b->left, indent, 0);
2180 if (cast(var, b->left)->var->constant)
2184 print_exec(b->right, indent, 0);
2189 ###### propagate binode cases
2193 /* Both must match and not be labels, result is Vnone */
2194 t = propagate_types(b->left, c, ok, Vnolabel, 0);
2196 if (propagate_types(b->right, c, ok, t, 0) != t)
2197 if (b->left->type == Xvar)
2198 type_err(c, "info: variable %v was set as %1 here.",
2199 cast(var, b->left)->var->where_set, t, Vnone);
2201 t = propagate_types(b->right, c, ok, Vnolabel, 0);
2203 propagate_types(b->left, c, ok, t, 0);
2209 ###### interp binode cases
2214 struct variable *v = cast(var, b->left)->var;
2217 right = interp_exec(b->right);
2220 right.vtype = Vunknown;
2224 ### The `use` statement
2226 The `use` statement is the last "simple" statement. It is needed when
2227 the condition in a conditional statement is a block. `use` works much
2228 like `return` in C, but only completes the `condition`, not the whole
2234 ###### SimpleStatement Grammar
2236 $0 = new_pos(binode, $1);
2241 ###### print binode cases
2244 do_indent(indent, "use ");
2245 print_exec(b->right, -1, 0);
2250 ###### propagate binode cases
2253 /* result matches value */
2254 return propagate_types(b->right, c, ok, type, 0);
2256 ###### interp binode cases
2259 rv = interp_exec(b->right);
2262 ### The Conditional Statement
2264 This is the biggy and currently the only complex statement. This
2265 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2266 It is comprised of a number of parts, all of which are optional though
2267 set combinations apply. Each part is (usually) a key word (`then` is
2268 sometimes optional) followed by either an expression of a code block,
2269 except the `casepart` which is a "key word and an expression" followed
2270 by a code block. The code-block option is valid for all parts and,
2271 where an expression is also allowed, the code block can use the `use`
2272 statement to report a value. If the code block does no report a value
2273 the effect is similar to reporting `False`.
2275 The `else` and `case` parts, as well as `then` when combined with
2276 `if`, can contain a `use` statement which will apply to some
2277 containing conditional statement. `for` parts, `do` parts and `then`
2278 parts used with `for` can never contain a `use`, except in some
2279 subordinate conditional statement.
2281 If there is a `forpart`, it is executed first, only once.
2282 If there is a `dopart`, then it is executed repeatedly providing
2283 always that the `condpart` or `cond`, if present, does not return a non-True
2284 value. `condpart` can fail to return any value if it simply executes
2285 to completion. This is treated the same as returning `True`.
2287 If there is a `thenpart` it will be executed whenever the `condpart`
2288 or `cond` returns True (or does not return any value), but this will happen
2289 *after* `dopart` (when present).
2291 If `elsepart` is present it will be executed at most once when the
2292 condition returns `False` or some value that isn't `True` and isn't
2293 matched by any `casepart`. If there are any `casepart`s, they will be
2294 executed when the condition returns a matching value.
2296 The particular sorts of values allowed in case parts has not yet been
2297 determined in the language design, so nothing is prohibited.
2299 The various blocks in this complex statement potentially provide scope
2300 for variables as described earlier. Each such block must include the
2301 "OpenScope" nonterminal before parsing the block, and must call
2302 `var_block_close()` when closing the block.
2304 The code following "`if`", "`switch`" and "`for`" does not get its own
2305 scope, but is in a scope covering the whole statement, so names
2306 declared there cannot be redeclared elsewhere. Similarly the
2307 condition following "`while`" is in a scope the covers the body
2308 ("`do`" part) of the loop, and which does not allow conditional scope
2309 extension. Code following "`then`" (both looping and non-looping),
2310 "`else`" and "`case`" each get their own local scope.
2312 The type requirements on the code block in a `whilepart` are quite
2313 unusal. It is allowed to return a value of some identifiable type, in
2314 which case the loop abort and an appropriate `casepart` is run, or it
2315 can return a Boolean, in which case the loop either continues to the
2316 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2317 This is different both from the `ifpart` code block which is expected to
2318 return a Boolean, or the `switchpart` code block which is expected to
2319 return the same type as the casepart values. The correct analysis of
2320 the type of the `whilepart` code block is the reason for the
2321 `bool_permitted` flag which is passed to `propagate_types()`.
2323 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2332 struct exec *action;
2333 struct casepart *next;
2335 struct cond_statement {
2337 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2338 struct casepart *casepart;
2341 ###### ast functions
2343 static void free_casepart(struct casepart *cp)
2347 free_exec(cp->value);
2348 free_exec(cp->action);
2355 static void free_cond_statement(struct cond_statement *s)
2359 free_exec(s->forpart);
2360 free_exec(s->condpart);
2361 free_exec(s->dopart);
2362 free_exec(s->thenpart);
2363 free_exec(s->elsepart);
2364 free_casepart(s->casepart);
2368 ###### free exec cases
2369 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2371 ###### ComplexStatement Grammar
2372 | CondStatement ${ $0 = $<1; }$
2377 // both ForThen and Whilepart open scopes, and CondSuffix only
2378 // closes one - so in the first branch here we have another to close.
2379 CondStatement -> ForThen WhilePart CondSuffix ${
2381 $0->forpart = $1.forpart; $1.forpart = NULL;
2382 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2383 $0->condpart = $2.condpart; $2.condpart = NULL;
2384 $0->dopart = $2.dopart; $2.dopart = NULL;
2385 var_block_close(config2context(config), CloseSequential);
2387 | WhilePart CondSuffix ${
2389 $0->condpart = $1.condpart; $1.condpart = NULL;
2390 $0->dopart = $1.dopart; $1.dopart = NULL;
2392 | SwitchPart CondSuffix ${
2396 | IfPart IfSuffix ${
2398 $0->condpart = $1.condpart; $1.condpart = NULL;
2399 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2400 // This is where we close an "if" statement
2401 var_block_close(config2context(config), CloseSequential);
2404 CondSuffix -> IfSuffix ${
2406 // This is where we close scope of the whole
2407 // "for" or "while" statement
2408 var_block_close(config2context(config), CloseSequential);
2410 | CasePart CondSuffix ${
2412 $1->next = $0->casepart;
2417 CasePart -> Newlines case Expression OpenScope Block ${
2418 $0 = calloc(1,sizeof(struct casepart));
2421 var_block_close(config2context(config), CloseParallel);
2423 | case Expression OpenScope Block ${
2424 $0 = calloc(1,sizeof(struct casepart));
2427 var_block_close(config2context(config), CloseParallel);
2431 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2432 | Newlines else OpenScope Block ${
2433 $0 = new(cond_statement);
2435 var_block_close(config2context(config), CloseElse);
2437 | else OpenScope Block ${
2438 $0 = new(cond_statement);
2440 var_block_close(config2context(config), CloseElse);
2442 | Newlines else OpenScope CondStatement ${
2443 $0 = new(cond_statement);
2445 var_block_close(config2context(config), CloseElse);
2447 | else OpenScope CondStatement ${
2448 $0 = new(cond_statement);
2450 var_block_close(config2context(config), CloseElse);
2455 // These scopes are closed in CondSuffix
2456 ForPart -> for OpenScope SimpleStatements ${
2457 $0 = reorder_bilist($<3);
2459 | for OpenScope Block ${
2463 ThenPart -> then OpenScope SimpleStatements ${
2464 $0 = reorder_bilist($<3);
2465 var_block_close(config2context(config), CloseSequential);
2467 | then OpenScope Block ${
2469 var_block_close(config2context(config), CloseSequential);
2472 ThenPartNL -> ThenPart OptNL ${
2476 // This scope is closed in CondSuffix
2477 WhileHead -> while OpenScope Block ${
2482 ForThen -> ForPart OptNL ThenPartNL ${
2490 // This scope is closed in CondSuffix
2491 WhilePart -> while OpenScope Expression Block ${
2492 $0.type = Xcond_statement;
2496 | WhileHead OptNL do Block ${
2497 $0.type = Xcond_statement;
2502 IfPart -> if OpenScope Expression OpenScope Block ${
2503 $0.type = Xcond_statement;
2506 var_block_close(config2context(config), CloseParallel);
2508 | if OpenScope Block OptNL then OpenScope Block ${
2509 $0.type = Xcond_statement;
2512 var_block_close(config2context(config), CloseParallel);
2516 // This scope is closed in CondSuffix
2517 SwitchPart -> switch OpenScope Expression ${
2520 | switch OpenScope Block ${
2524 ###### print exec cases
2526 case Xcond_statement:
2528 struct cond_statement *cs = cast(cond_statement, e);
2529 struct casepart *cp;
2531 do_indent(indent, "for");
2532 if (bracket) printf(" {\n"); else printf(":\n");
2533 print_exec(cs->forpart, indent+1, bracket);
2536 do_indent(indent, "} then {\n");
2538 do_indent(indent, "then:\n");
2539 print_exec(cs->thenpart, indent+1, bracket);
2541 if (bracket) do_indent(indent, "}\n");
2545 if (cs->condpart && cs->condpart->type == Xbinode &&
2546 cast(binode, cs->condpart)->op == Block) {
2548 do_indent(indent, "while {\n");
2550 do_indent(indent, "while:\n");
2551 print_exec(cs->condpart, indent+1, bracket);
2553 do_indent(indent, "} do {\n");
2555 do_indent(indent, "do:\n");
2556 print_exec(cs->dopart, indent+1, bracket);
2558 do_indent(indent, "}\n");
2560 do_indent(indent, "while ");
2561 print_exec(cs->condpart, 0, bracket);
2566 print_exec(cs->dopart, indent+1, bracket);
2568 do_indent(indent, "}\n");
2573 do_indent(indent, "switch");
2575 do_indent(indent, "if");
2576 if (cs->condpart && cs->condpart->type == Xbinode &&
2577 cast(binode, cs->condpart)->op == Block) {
2582 print_exec(cs->condpart, indent+1, bracket);
2584 do_indent(indent, "}\n");
2586 do_indent(indent, "then:\n");
2587 print_exec(cs->thenpart, indent+1, bracket);
2591 print_exec(cs->condpart, 0, bracket);
2597 print_exec(cs->thenpart, indent+1, bracket);
2599 do_indent(indent, "}\n");
2604 for (cp = cs->casepart; cp; cp = cp->next) {
2605 do_indent(indent, "case ");
2606 print_exec(cp->value, -1, 0);
2611 print_exec(cp->action, indent+1, bracket);
2613 do_indent(indent, "}\n");
2616 do_indent(indent, "else");
2621 print_exec(cs->elsepart, indent+1, bracket);
2623 do_indent(indent, "}\n");
2628 ###### propagate exec cases
2629 case Xcond_statement:
2631 // forpart and dopart must return Vnone
2632 // thenpart must return Vnone if there is a dopart,
2633 // otherwise it is like elsepart.
2635 // be bool if there is not casepart
2636 // match casepart->values if there is a switchpart
2637 // either be bool or match casepart->value if there
2639 // elsepart, casepart->action must match there return type
2640 // expected of this statement.
2641 struct cond_statement *cs = cast(cond_statement, prog);
2642 struct casepart *cp;
2644 t = propagate_types(cs->forpart, c, ok, Vnone, 0);
2645 if (!vtype_compat(Vnone, t, 0))
2647 t = propagate_types(cs->dopart, c, ok, Vnone, 0);
2648 if (!vtype_compat(Vnone, t, 0))
2651 t = propagate_types(cs->thenpart, c, ok, Vnone, 0);
2652 if (!vtype_compat(Vnone, t, 0))
2655 if (cs->casepart == NULL)
2656 propagate_types(cs->condpart, c, ok, Vbool, 0);
2658 /* Condpart must match case values, with bool permitted */
2660 for (cp = cs->casepart;
2661 cp && (t == Vunknown); cp = cp->next)
2662 t = propagate_types(cp->value, c, ok, Vunknown, 0);
2663 if (t == Vunknown && cs->condpart)
2664 t = propagate_types(cs->condpart, c, ok, Vunknown, 1);
2665 // Now we have a type (I hope) push it down
2666 if (t != Vunknown) {
2667 for (cp = cs->casepart; cp; cp = cp->next)
2668 propagate_types(cp->value, c, ok, t, 0);
2669 propagate_types(cs->condpart, c, ok, t, 1);
2672 // (if)then, else, and case parts must return expected type.
2673 if (!cs->dopart && type == Vunknown)
2674 type = propagate_types(cs->thenpart, c, ok, Vunknown, bool_permitted);
2675 if (type == Vunknown)
2676 type = propagate_types(cs->elsepart, c, ok, Vunknown, bool_permitted);
2677 for (cp = cs->casepart;
2678 cp && type == Vunknown;
2680 type = propagate_types(cp->action, c, ok, Vunknown, bool_permitted);
2681 if (type > Vunknown) {
2683 propagate_types(cs->thenpart, c, ok, type, bool_permitted);
2684 propagate_types(cs->elsepart, c, ok, type, bool_permitted);
2685 for (cp = cs->casepart; cp ; cp = cp->next)
2686 propagate_types(cp->action, c, ok, type, bool_permitted);
2692 ###### interp exec cases
2693 case Xcond_statement:
2695 struct value v, cnd;
2696 struct casepart *cp;
2697 struct cond_statement *c = cast(cond_statement, e);
2699 interp_exec(c->forpart);
2702 cnd = interp_exec(c->condpart);
2705 if (!(cnd.vtype == Vnone ||
2706 (cnd.vtype == Vbool && cnd.bool != 0)))
2710 interp_exec(c->dopart);
2713 v = interp_exec(c->thenpart);
2714 if (v.vtype != Vnone || !c->dopart)
2718 } while (c->dopart);
2720 for (cp = c->casepart; cp; cp = cp->next) {
2721 v = interp_exec(cp->value);
2722 if (value_cmp(v, cnd) == 0) {
2725 return interp_exec(cp->action);
2731 return interp_exec(c->elsepart);
2736 ### Finally the whole program.
2738 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2739 the keyword "program" and a list of variable names which are assigned
2740 values from command line arguments. Following this is a `block` which
2741 is the code to execute.
2743 As this is the top level, several things are handled a bit
2745 The whole program is not interpreted by `interp_exec` as that isn't
2746 passed the argument list which the program requires. Similarly type
2747 analysis is a bit more interesting at this level.
2752 ###### Parser: grammar
2755 Program -> program OpenScope Varlist Block OptNL ${
2758 $0->left = reorder_bilist($<3);
2760 var_block_close(config2context(config), CloseSequential);
2761 if (config2context(config)->scope_stack) abort();
2764 fprintf(stderr, "%s:%d:%d: error: unhandled parse error.\n",
2765 config2context(config)->file_name, $1.line, $1.col);
2766 config2context(config)->parse_error = 1;
2769 Varlist -> Varlist ArgDecl ${
2778 ArgDecl -> IDENTIFIER ${ {
2779 struct variable *v = var_decl(config2context(config), $1.txt);
2786 ###### print binode cases
2788 do_indent(indent, "program");
2789 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2791 print_exec(b2->left, 0, 0);
2797 print_exec(b->right, indent+1, bracket);
2799 do_indent(indent, "}\n");
2802 ###### propagate binode cases
2803 case Program: abort();
2805 ###### core functions
2807 static int analyse_prog(struct exec *prog, struct parse_context *c)
2809 struct binode *b = cast(binode, prog);
2816 propagate_types(b->right, c, &ok, Vnone, 0);
2821 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2822 struct var *v = cast(var, b->left);
2823 if (v->var->val.vtype == Vunknown) {
2824 v->var->where_set = b;
2825 val_init(&v->var->val, Vstr);
2828 b = cast(binode, prog);
2831 propagate_types(b->right, c, &ok, Vnone, 0);
2836 /* Make sure everything is still consistent */
2837 propagate_types(b->right, c, &ok, Vnone, 0);
2841 static void interp_prog(struct exec *prog, char **argv)
2843 struct binode *p = cast(binode, prog);
2849 al = cast(binode, p->left);
2851 struct var *v = cast(var, al->left);
2852 struct value *vl = &v->var->val;
2854 if (argv[0] == NULL) {
2855 printf("Not enough args\n");
2858 al = cast(binode, al->right);
2860 if (!parse_value(vl, argv[0]))
2864 v = interp_exec(p->right);
2868 ###### interp binode cases
2869 case Program: abort();
2871 ## And now to test it out.
2873 Having a language requires having a "hello world" program. I'll
2874 provide a little more than that: a program that prints "Hello world"
2875 finds the GCD of two numbers, prints the first few elements of
2876 Fibonacci, and performs a binary search for a number.
2878 ###### File: oceani.mk
2881 @echo "===== TEST ====="
2882 ./oceani --section "test: hello" oceani.mdc 55 33
2887 print "Hello World, what lovely oceans you have!"
2888 /* When a variable is defined in both branches of an 'if',
2889 * and used afterwards, the variables are merged.
2895 print "Is", A, "bigger than", B,"? ", bigger
2896 /* If a variable is not used after the 'if', no
2897 * merge happens, so types can be different
2901 print A, "is more than twice", B, "?", double
2904 print "double", A, "is only", double
2913 print "GCD of", A, "and", B,"is", a
2915 print a, "is not positive, cannot calculate GCD"
2917 print b, "is not positive, cannot calculate GCD"
2922 print "Fibonacci:", f1,f2,
2923 then togo = togo - 1
2931 /* Binary search... */
2936 mid := (lo + hi) / 2
2948 print "Yay, I found", target
2950 print "Closest I found was", mid