1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
213 for (ss = s; ss; ss = ss->next) {
214 struct text sec = ss->section;
215 if (sec.len == strlen(section) &&
216 strncmp(sec.txt, section, sec.len) == 0)
220 prog = parse_oceani(ss->code, &context.config,
221 dotrace ? stderr : NULL);
223 fprintf(stderr, "oceani: cannot find section %s\n",
228 prog = parse_oceani(s->code, &context.config,
229 dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: fatal parser error.\n");
232 context.parse_error = 1;
235 print_exec(*prog, 0, brackets);
236 if (prog && doexec && !context.parse_error) {
237 if (!analyse_prog(*prog, &context)) {
238 fprintf(stderr, "oceani: type error in program - not running.\n");
241 interp_prog(*prog, argv+optind+1);
248 struct section *t = s->next;
254 ## free context types
255 exit(context.parse_error ? 1 : 0);
260 These four requirements of parse, analyse, print, interpret apply to
261 each language element individually so that is how most of the code
264 Three of the four are fairly self explanatory. The one that requires
265 a little explanation is the analysis step.
267 The current language design does not require (or even allow) the types
268 of variables to be declared, but they must still have a single type.
269 Different operations impose different requirements on the variables,
270 for example addition requires both arguments to be numeric, and
271 assignment requires the variable on the left to have the same type as
272 the expression on the right.
274 Analysis involves propagating these type requirements around and
275 consequently setting the type of each variable. If any requirements
276 are violated (e.g. a string is compared with a number) or if a
277 variable needs to have two different types, then an error is raised
278 and the program will not run.
280 If the same variable is declared in both branchs of an 'if/else', or
281 in all cases of a 'switch' then the multiple instances may be merged
282 into just one variable if the variable is references after the
283 conditional statement. When this happens, the types must naturally be
284 consistent across all the branches. When the variable is not used
285 outside the if, the variables in the different branches are distinct
286 and can be of different types.
288 Determining the types of all variables early is important for
289 processing command line arguments. These can be assigned to any type
290 of variable, but we must first know the correct type so any required
291 conversion can happen. If a variable is associated with a command
292 line argument but no type can be interpreted (e.g. the variable is
293 only ever used in a `print` statement), then the type is set to
296 Undeclared names may only appear in "use" statements and "case" expressions.
297 These names are given a type of "label" and a unique value.
298 This allows them to fill the role of a name in an enumerated type, which
299 is useful for testing the `switch` statement.
301 As we will see, the condition part of a `while` statement can return
302 either a Boolean or some other type. This requires that the expect
303 type that gets passed around comprises a type (`enum vtype`) and a
304 flag to indicate that `Vbool` is also permitted.
306 As there are, as yet, no distinct types that are compatible, there
307 isn't much subtlety in the analysis. When we have distinct number
308 types, this will become more interesting.
312 When analysis discovers an inconsistency it needs to report an error;
313 just refusing to run the code ensures that the error doesn't cascade,
314 but by itself it isn't very useful. A clear understand of the sort of
315 error message that are useful will help guide the process of analysis.
317 At a simplistic level, the only sort of error that type analysis can
318 report is that the type of some construct doesn't match a contextual
319 requirement. For example, in `4 + "hello"` the addition provides a
320 contextual requirement for numbers, but `"hello"` is not a number. In
321 this particular example no further information is needed as the types
322 are obvious from local information. When a variable is involved that
323 isn't the case. It may be helpful to explain why the variable has a
324 particular type, by indicating the location where the type was set,
325 whether by declaration or usage.
327 Using a recursive-descent analysis we can easily detect a problem at
328 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
329 will detect that one argument is not a number and the usage of `hello`
330 will detect that a number was wanted, but not provided. In this
331 (early) version of the language, we will generate error reports at
332 multiple locations, so the use of `hello` will report an error and
333 explain were the value was set, and the addition will report an error
334 and say why numbers are needed. To be able to report locations for
335 errors, each language element will need to record a file location
336 (line and column) and each variable will need to record the language
337 element where its type was set. For now we will assume that each line
338 of an error message indicates one location in the file, and up to 2
339 types. So we provide a `printf`-like function which takes a format, a
340 language (a `struct exec` which has not yet been introduced), and 2
341 types. "`%1`" reports the first type, "`%2`" reports the second. We
342 will need a function to print the location, once we know how that is
343 stored. As will be explained later, there are sometimes extra rules for
344 type matching and they might affect error messages, we need to pass those
349 static void fput_loc(struct exec *loc, FILE *f);
351 ###### core functions
353 static void type_err(struct parse_context *c,
354 char *fmt, struct exec *loc,
355 struct type *t1, int rules, struct type *t2)
357 fprintf(stderr, "%s:", c->file_name);
358 fput_loc(loc, stderr);
359 for (; *fmt ; fmt++) {
366 case '%': fputc(*fmt, stderr); break;
367 default: fputc('?', stderr); break;
370 fprintf(stderr, "%.*s", t1->name.len, t1->name.txt);
372 fputs("*unknown*", stderr);
376 fprintf(stderr, "%.*s", t2->name.len, t2->name.txt);
378 fputs("*unknown*", stderr);
388 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
390 fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
396 One last introductory step before detailing the language elements and
397 providing their four requirements is to establish the data structures
398 to store these elements.
400 There are two key objects that we need to work with: executable
401 elements which comprise the program, and values which the program
402 works with. Between these are the variables in their various scopes
403 which hold the values, and types which classify the values stored and
404 manipulatd by executables.
408 Values come in a wide range of types, with more likely to be added.
409 Each type needs to be able to parse and print its own values (for
410 convenience at least) as well as to compare two values, at least for
411 equality and possibly for order. For now, values might need to be
412 duplicated and freed, though eventually such manipulations will be
413 better integrated into the language.
415 Rather than requiring every numeric type to support all numeric
416 operations (add, multiple, etc), we allow types to be able to present
417 as one of a few standard types: integer, float, and fraction. The
418 existance of these conversion functions enable types to determine if
419 they are compatible with other types.
421 Named type are stored in a simple linked list.
428 struct value (*init)(struct type *type);
429 struct value (*parse)(struct type *type, char *str);
430 void (*print)(struct value val);
431 int (*cmp_order)(struct value v1, struct value v2);
432 int (*cmp_eq)(struct value v1, struct value v2);
433 struct value (*dup)(struct value val);
434 void (*free)(struct value val);
435 struct type *(*compat)(struct type *this, struct type *other);
436 long long (*to_int)(struct value *v);
437 double (*to_float)(struct value *v);
438 int (*to_mpq)(mpq_t *q, struct value *v);
450 struct type *typelist;
454 static struct type *find_type(struct parse_context *c, struct text s)
456 struct type *l = c->typelist;
459 text_cmp(l->name, s) != 0)
464 static struct type *add_type(struct parse_context *c, struct text s,
469 n = calloc(1, sizeof(*n));
472 n->next = c->typelist;
477 static void free_type(struct type *t)
479 /* The type is always a reference to something in the
480 * context, so we don't need to free anything.
484 static void context_init(struct parse_context *c)
486 ## context initialization
489 ###### free context types
491 while (context.typelist) {
492 struct type *t = context.typelist;
494 context.typelist = t->next;
500 Values can be numbers, which we represent as multi-precision
501 fractions, strings, Booleans and labels. When analysing the program
502 we also need to allow for places where no value is meaningful (type
503 `Tnone`) and where we don't know what type to expect yet (type is
506 Values are never shared, they are always copied when used, and freed
507 when no longer needed.
509 When propagating type information around the program, we need to
510 determine if two types are compatible, where type `NULL` is compatible
511 with anything. There are two special cases with type compatibility,
512 both related to the Conditional Statement which will be described
513 later. In some cases a Boolean can be accepted as well as some other
514 primary type, and in others any type is acceptable except a label (`Vlabel`).
515 A separate function encode these cases will simplify some code later.
517 When assigning command line arguments to variables, we need to be able
518 to parse each type from a string.
526 myLDLIBS := libnumber.o libstring.o -lgmp
527 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
529 ###### type union fields
530 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
543 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
547 if (rules & Rnolabel)
548 fputs(" (labels not permitted)", stderr);
552 static void _free_value(struct value v)
554 switch (v.type->vtype) {
556 case Vstr: free(v.str.txt); break;
557 case Vnum: mpq_clear(v.num); break;
563 static void free_value(struct value v)
569 static int vtype_compat(struct type *require, struct type *have, int rules)
571 if ((rules & Rboolok) && have == Tbool)
573 if ((rules & Rnolabel) && have == Tlabel)
575 if (!require || !have)
578 return require == have;
581 ###### value functions
583 static struct value _val_init(struct type *type)
588 switch(type->vtype) {
592 mpq_init(rv.num); break;
594 rv.str.txt = malloc(1);
607 static struct value val_init(struct type *type)
612 return type->init(type);
617 static struct value _dup_value(struct value v)
621 switch (rv.type->vtype) {
632 mpq_set(rv.num, v.num);
635 rv.str.len = v.str.len;
636 rv.str.txt = malloc(rv.str.len);
637 memcpy(rv.str.txt, v.str.txt, v.str.len);
643 static struct value dup_value(struct value v)
646 return v.type->dup(v);
650 static int _value_cmp(struct value left, struct value right)
653 if (left.type != right.type)
654 return left.type - right.type;
655 switch (left.type->vtype) {
656 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
657 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
658 case Vstr: cmp = text_cmp(left.str, right.str); break;
659 case Vbool: cmp = left.bool - right.bool; break;
665 static int value_cmp(struct value left, struct value right)
667 if (left.type && left.type->cmp_order)
668 return left.type->cmp_order(left, right);
669 if (left.type && left.type->cmp_eq)
670 return left.type->cmp_eq(left, right);
674 static struct text text_join(struct text a, struct text b)
677 rv.len = a.len + b.len;
678 rv.txt = malloc(rv.len);
679 memcpy(rv.txt, a.txt, a.len);
680 memcpy(rv.txt+a.len, b.txt, b.len);
684 static void _print_value(struct value v)
686 switch (v.type->vtype) {
688 printf("*no-value*"); break;
690 printf("*label-%p*", v.label); break;
692 printf("%.*s", v.str.len, v.str.txt); break;
694 printf("%s", v.bool ? "True":"False"); break;
699 mpf_set_q(fl, v.num);
700 gmp_printf("%Fg", fl);
707 static void print_value(struct value v)
709 if (v.type && v.type->print)
715 static struct value _parse_value(struct type *type, char *arg)
723 switch(type->vtype) {
729 val.str.len = strlen(arg);
730 val.str.txt = malloc(val.str.len);
731 memcpy(val.str.txt, arg, val.str.len);
738 tx.txt = arg; tx.len = strlen(tx.txt);
739 if (number_parse(val.num, tail, tx) == 0)
742 mpq_neg(val.num, val.num);
744 printf("Unsupported suffix: %s\n", arg);
749 if (strcasecmp(arg, "true") == 0 ||
750 strcmp(arg, "1") == 0)
752 else if (strcasecmp(arg, "false") == 0 ||
753 strcmp(arg, "0") == 0)
756 printf("Bad bool: %s\n", arg);
764 static struct value parse_value(struct type *type, char *arg)
768 if (type && type->parse)
769 return type->parse(type, arg);
774 static void _free_value(struct value v);
776 static struct type base_prototype = {
778 .parse = _parse_value,
779 .print = _print_value,
780 .cmp_order = _value_cmp,
781 .cmp_eq = _value_cmp,
786 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
789 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
791 struct text txt = { n, strlen(n) };
794 t = add_type(c, txt, &base_prototype);
800 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt);
802 ###### context initialization
804 Tbool = add_base_type(c, "Boolean", Vbool);
805 Tstr = add_base_type(c, "string", Vstr);
806 Tnum = add_base_type(c, "number", Vnum);
807 Tnone = add_base_type(c, "none", Vnone);
808 Tlabel = add_base_type(c, "label", Vlabel);
812 Variables are scoped named values. We store the names in a linked
813 list of "bindings" sorted lexically, and use sequential search and
820 struct binding *next; // in lexical order
824 This linked list is stored in the parse context so that "reduce"
825 functions can find or add variables, and so the analysis phase can
826 ensure that every variable gets a type.
830 struct binding *varlist; // In lexical order
834 static struct binding *find_binding(struct parse_context *c, struct text s)
836 struct binding **l = &c->varlist;
841 (cmp = text_cmp((*l)->name, s)) < 0)
845 n = calloc(1, sizeof(*n));
852 Each name can be linked to multiple variables defined in different
853 scopes. Each scope starts where the name is declared and continues
854 until the end of the containing code block. Scopes of a given name
855 cannot nest, so a declaration while a name is in-scope is an error.
857 ###### binding fields
858 struct variable *var;
862 struct variable *previous;
864 struct binding *name;
865 struct exec *where_decl;// where name was declared
866 struct exec *where_set; // where type was set
870 While the naming seems strange, we include local constants in the
871 definition of variables. A name declared `var := value` can
872 subsequently be changed, but a name declared `var ::= value` cannot -
875 ###### variable fields
878 Scopes in parallel branches can be partially merged. More
879 specifically, if a given name is declared in both branches of an
880 if/else then it's scope is a candidate for merging. Similarly if
881 every branch of an exhaustive switch (e.g. has an "else" clause)
882 declares a given name, then the scopes from the branches are
883 candidates for merging.
885 Note that names declared inside a loop (which is only parallel to
886 itself) are never visible after the loop. Similarly names defined in
887 scopes which are not parallel, such as those started by `for` and
888 `switch`, are never visible after the scope. Only variables defined in
889 both `then` and `else` (including the implicit then after an `if`, and
890 excluding `then` used with `for`) and in all `case`s and `else` of a
891 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
893 Labels, which are a bit like variables, follow different rules.
894 Labels are not explicitly declared, but if an undeclared name appears
895 in a context where a label is legal, that effectively declares the
896 name as a label. The declaration remains in force (or in scope) at
897 least to the end of the immediately containing block and conditionally
898 in any larger containing block which does not declare the name in some
899 other way. Importantly, the conditional scope extension happens even
900 if the label is only used in one parallel branch of a conditional --
901 when used in one branch it is treated as having been declared in all
904 Merge candidates are tentatively visible beyond the end of the
905 branching statement which creates them. If the name is used, the
906 merge is affirmed and they become a single variable visible at the
907 outer layer. If not - if it is redeclared first - the merge lapses.
909 To track scopes we have an extra stack, implemented as a linked list,
910 which roughly parallels the parse stack and which is used exclusively
911 for scoping. When a new scope is opened, a new frame is pushed and
912 the child-count of the parent frame is incremented. This child-count
913 is used to distinguish between the first of a set of parallel scopes,
914 in which declared variables must not be in scope, and subsequent
915 branches, whether they must already be conditionally scoped.
917 To push a new frame *before* any code in the frame is parsed, we need a
918 grammar reduction. This is most easily achieved with a grammar
919 element which derives the empty string, and creates the new scope when
920 it is recognized. This can be placed, for example, between a keyword
921 like "if" and the code following it.
925 struct scope *parent;
931 struct scope *scope_stack;
934 static void scope_pop(struct parse_context *c)
936 struct scope *s = c->scope_stack;
938 c->scope_stack = s->parent;
943 static void scope_push(struct parse_context *c)
945 struct scope *s = calloc(1, sizeof(*s));
947 c->scope_stack->child_count += 1;
948 s->parent = c->scope_stack;
956 OpenScope -> ${ scope_push(config2context(config)); }$
959 Each variable records a scope depth and is in one of four states:
961 - "in scope". This is the case between the declaration of the
962 variable and the end of the containing block, and also between
963 the usage with affirms a merge and the end of that block.
965 The scope depth is not greater than the current parse context scope
966 nest depth. When the block of that depth closes, the state will
967 change. To achieve this, all "in scope" variables are linked
968 together as a stack in nesting order.
970 - "pending". The "in scope" block has closed, but other parallel
971 scopes are still being processed. So far, every parallel block at
972 the same level that has closed has declared the name.
974 The scope depth is the depth of the last parallel block that
975 enclosed the declaration, and that has closed.
977 - "conditionally in scope". The "in scope" block and all parallel
978 scopes have closed, and no further mention of the name has been
979 seen. This state includes a secondary nest depth which records the
980 outermost scope seen since the variable became conditionally in
981 scope. If a use of the name is found, the variable becomes "in
982 scope" and that secondary depth becomes the recorded scope depth.
983 If the name is declared as a new variable, the old variable becomes
984 "out of scope" and the recorded scope depth stays unchanged.
986 - "out of scope". The variable is neither in scope nor conditionally
987 in scope. It is permanently out of scope now and can be removed from
988 the "in scope" stack.
991 ###### variable fields
992 int depth, min_depth;
993 enum { OutScope, PendingScope, CondScope, InScope } scope;
994 struct variable *in_scope;
998 struct variable *in_scope;
1000 All variables with the same name are linked together using the
1001 'previous' link. Those variable that have
1002 been affirmatively merged all have a 'merged' pointer that points to
1003 one primary variable - the most recently declared instance. When
1004 merging variables, we need to also adjust the 'merged' pointer on any
1005 other variables that had previously been merged with the one that will
1006 no longer be primary.
1008 ###### variable fields
1009 struct variable *merged;
1011 ###### ast functions
1013 static void variable_merge(struct variable *primary, struct variable *secondary)
1017 if (primary->merged)
1019 primary = primary->merged;
1021 for (v = primary->previous; v; v=v->previous)
1022 if (v == secondary || v == secondary->merged ||
1023 v->merged == secondary ||
1024 (v->merged && v->merged == secondary->merged)) {
1025 v->scope = OutScope;
1026 v->merged = primary;
1030 ###### free context vars
1032 while (context.varlist) {
1033 struct binding *b = context.varlist;
1034 struct variable *v = b->var;
1035 context.varlist = b->next;
1038 struct variable *t = v;
1046 #### Manipulating Bindings
1048 When a name is conditionally visible, a new declaration discards the
1049 old binding - the condition lapses. Conversely a usage of the name
1050 affirms the visibility and extends it to the end of the containing
1051 block - i.e. the block that contains both the original declaration and
1052 the latest usage. This is determined from `min_depth`. When a
1053 conditionally visible variable gets affirmed like this, it is also
1054 merged with other conditionally visible variables with the same name.
1056 When we parse a variable declaration we either signal an error if the
1057 name is currently bound, or create a new variable at the current nest
1058 depth if the name is unbound or bound to a conditionally scoped or
1059 pending-scope variable. If the previous variable was conditionally
1060 scoped, it and its homonyms becomes out-of-scope.
1062 When we parse a variable reference (including non-declarative
1063 assignment) we signal an error if the name is not bound or is bound to
1064 a pending-scope variable; update the scope if the name is bound to a
1065 conditionally scoped variable; or just proceed normally if the named
1066 variable is in scope.
1068 When we exit a scope, any variables bound at this level are either
1069 marked out of scope or pending-scoped, depending on whether the
1070 scope was sequential or parallel.
1072 When exiting a parallel scope we check if there are any variables that
1073 were previously pending and are still visible. If there are, then
1074 there weren't redeclared in the most recent scope, so they cannot be
1075 merged and must become out-of-scope. If it is not the first of
1076 parallel scopes (based on `child_count`), we check that there was a
1077 previous binding that is still pending-scope. If there isn't, the new
1078 variable must now be out-of-scope.
1080 When exiting a sequential scope that immediately enclosed parallel
1081 scopes, we need to resolve any pending-scope variables. If there was
1082 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1083 we need to mark all pending-scope variable as out-of-scope. Otherwise
1084 all pending-scope variables become conditionally scoped.
1087 enum closetype { CloseSequential, CloseParallel, CloseElse };
1089 ###### ast functions
1091 static struct variable *var_decl(struct parse_context *c, struct text s)
1093 struct binding *b = find_binding(c, s);
1094 struct variable *v = b->var;
1096 switch (v ? v->scope : OutScope) {
1098 /* Caller will report the error */
1102 v && v->scope == CondScope;
1104 v->scope = OutScope;
1108 v = calloc(1, sizeof(*v));
1109 v->previous = b->var;
1112 v->min_depth = v->depth = c->scope_depth;
1114 v->in_scope = c->in_scope;
1116 v->val = val_init(NULL);
1120 static struct variable *var_ref(struct parse_context *c, struct text s)
1122 struct binding *b = find_binding(c, s);
1123 struct variable *v = b->var;
1124 struct variable *v2;
1126 switch (v ? v->scope : OutScope) {
1129 /* Signal an error - once that is possible */
1132 /* All CondScope variables of this name need to be merged
1133 * and become InScope
1135 v->depth = v->min_depth;
1137 for (v2 = v->previous;
1138 v2 && v2->scope == CondScope;
1140 variable_merge(v, v2);
1148 static void var_block_close(struct parse_context *c, enum closetype ct)
1150 /* close of all variables that are in_scope */
1151 struct variable *v, **vp, *v2;
1154 for (vp = &c->in_scope;
1155 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1159 case CloseParallel: /* handle PendingScope */
1163 if (c->scope_stack->child_count == 1)
1164 v->scope = PendingScope;
1165 else if (v->previous &&
1166 v->previous->scope == PendingScope)
1167 v->scope = PendingScope;
1168 else if (v->val.type == Tlabel)
1169 v->scope = PendingScope;
1170 else if (v->name->var == v)
1171 v->scope = OutScope;
1172 if (ct == CloseElse) {
1173 /* All Pending variables with this name
1174 * are now Conditional */
1176 v2 && v2->scope == PendingScope;
1178 v2->scope = CondScope;
1183 v2 && v2->scope == PendingScope;
1185 if (v2->val.type != Tlabel)
1186 v2->scope = OutScope;
1188 case OutScope: break;
1191 case CloseSequential:
1192 if (v->val.type == Tlabel)
1193 v->scope = PendingScope;
1196 v->scope = OutScope;
1199 /* There was no 'else', so we can only become
1200 * conditional if we know the cases were exhaustive,
1201 * and that doesn't mean anything yet.
1202 * So only labels become conditional..
1205 v2 && v2->scope == PendingScope;
1207 if (v2->val.type == Tlabel) {
1208 v2->scope = CondScope;
1209 v2->min_depth = c->scope_depth;
1211 v2->scope = OutScope;
1214 case OutScope: break;
1218 if (v->scope == OutScope)
1227 Executables can be lots of different things. In many cases an
1228 executable is just an operation combined with one or two other
1229 executables. This allows for expressions and lists etc. Other times
1230 an executable is something quite specific like a constant or variable
1231 name. So we define a `struct exec` to be a general executable with a
1232 type, and a `struct binode` which is a subclass of `exec`, forms a
1233 node in a binary tree, and holds an operation. There will be other
1234 subclasses, and to access these we need to be able to `cast` the
1235 `exec` into the various other types.
1238 #define cast(structname, pointer) ({ \
1239 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1240 if (__mptr && *__mptr != X##structname) abort(); \
1241 (struct structname *)( (char *)__mptr);})
1243 #define new(structname) ({ \
1244 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1245 __ptr->type = X##structname; \
1246 __ptr->line = -1; __ptr->column = -1; \
1249 #define new_pos(structname, token) ({ \
1250 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1251 __ptr->type = X##structname; \
1252 __ptr->line = token.line; __ptr->column = token.col; \
1261 enum exec_types type;
1269 struct exec *left, *right;
1272 ###### ast functions
1274 static int __fput_loc(struct exec *loc, FILE *f)
1276 if (loc->line >= 0) {
1277 fprintf(f, "%d:%d: ", loc->line, loc->column);
1280 if (loc->type == Xbinode)
1281 return __fput_loc(cast(binode,loc)->left, f) ||
1282 __fput_loc(cast(binode,loc)->right, f);
1285 static void fput_loc(struct exec *loc, FILE *f)
1287 if (!__fput_loc(loc, f))
1288 fprintf(f, "??:??: ");
1291 Each different type of `exec` node needs a number of functions
1292 defined, a bit like methods. We must be able to be able to free it,
1293 print it, analyse it and execute it. Once we have specific `exec`
1294 types we will need to parse them too. Let's take this a bit more
1299 The parser generator requires a `free_foo` function for each struct
1300 that stores attributes and they will be `exec`s and subtypes there-of.
1301 So we need `free_exec` which can handle all the subtypes, and we need
1304 ###### ast functions
1306 static void free_binode(struct binode *b)
1311 free_exec(b->right);
1315 ###### core functions
1316 static void free_exec(struct exec *e)
1325 ###### forward decls
1327 static void free_exec(struct exec *e);
1329 ###### free exec cases
1330 case Xbinode: free_binode(cast(binode, e)); break;
1334 Printing an `exec` requires that we know the current indent level for
1335 printing line-oriented components. As will become clear later, we
1336 also want to know what sort of bracketing to use.
1338 ###### ast functions
1340 static void do_indent(int i, char *str)
1347 ###### core functions
1348 static void print_binode(struct binode *b, int indent, int bracket)
1352 ## print binode cases
1356 static void print_exec(struct exec *e, int indent, int bracket)
1362 print_binode(cast(binode, e), indent, bracket); break;
1367 ###### forward decls
1369 static void print_exec(struct exec *e, int indent, int bracket);
1373 As discussed, analysis involves propagating type requirements around
1374 the program and looking for errors.
1376 So `propagate_types` is passed an expected type (being a `struct type`
1377 pointer together with some `val_rules` flags) that the `exec` is
1378 expected to return, and returns the type that it does return, either
1379 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1380 by reference. It is set to `0` when an error is found, and `2` when
1381 any change is made. If it remains unchanged at `1`, then no more
1382 propagation is needed.
1384 ###### core functions
1386 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1387 struct type *type, int rules)
1394 switch (prog->type) {
1397 struct binode *b = cast(binode, prog);
1399 ## propagate binode cases
1403 ## propagate exec cases
1410 Interpreting an `exec` doesn't require anything but the `exec`. State
1411 is stored in variables and each variable will be directly linked from
1412 within the `exec` tree. The exception to this is the whole `program`
1413 which needs to look at command line arguments. The `program` will be
1414 interpreted separately.
1416 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1418 ###### core functions
1420 static struct value interp_exec(struct exec *e)
1430 struct binode *b = cast(binode, e);
1431 struct value left, right;
1432 left.type = right.type = Tnone;
1434 ## interp binode cases
1436 free_value(left); free_value(right);
1439 ## interp exec cases
1444 ## Language elements
1446 Each language element needs to be parsed, printed, analysed,
1447 interpreted, and freed. There are several, so let's just start with
1448 the easy ones and work our way up.
1452 We have already met values as separate objects. When manifest
1453 constants appear in the program text, that must result in an executable
1454 which has a constant value. So the `val` structure embeds a value in
1470 $0 = new_pos(val, $1);
1471 $0->val.type = Tbool;
1475 $0 = new_pos(val, $1);
1476 $0->val.type = Tbool;
1480 $0 = new_pos(val, $1);
1481 $0->val.type = Tnum;
1484 if (number_parse($0->val.num, tail, $1.txt) == 0)
1485 mpq_init($0->val.num);
1487 tok_err(config2context(config), "error: unsupported number suffix.",
1492 $0 = new_pos(val, $1);
1493 $0->val.type = Tstr;
1496 string_parse(&$1, '\\', &$0->val.str, tail);
1498 tok_err(config2context(config), "error: unsupported string suffix.",
1503 $0 = new_pos(val, $1);
1504 $0->val.type = Tstr;
1507 string_parse(&$1, '\\', &$0->val.str, tail);
1509 tok_err(config2context(config), "error: unsupported string suffix.",
1514 ###### print exec cases
1517 struct val *v = cast(val, e);
1518 if (v->val.type == Tstr)
1520 print_value(v->val);
1521 if (v->val.type == Tstr)
1526 ###### propagate exec cases
1529 struct val *val = cast(val, prog);
1530 if (!vtype_compat(type, val->val.type, rules)) {
1531 type_err(c, "error: expected %1%r found %2",
1532 prog, type, rules, val->val.type);
1535 return val->val.type;
1538 ###### interp exec cases
1540 return dup_value(cast(val, e)->val);
1542 ###### ast functions
1543 static void free_val(struct val *v)
1551 ###### free exec cases
1552 case Xval: free_val(cast(val, e)); break;
1554 ###### ast functions
1555 // Move all nodes from 'b' to 'rv', reversing the order.
1556 // In 'b' 'left' is a list, and 'right' is the last node.
1557 // In 'rv', left' is the first node and 'right' is a list.
1558 static struct binode *reorder_bilist(struct binode *b)
1560 struct binode *rv = NULL;
1563 struct exec *t = b->right;
1567 b = cast(binode, b->left);
1577 Just as we used a `val` to wrap a value into an `exec`, we similarly
1578 need a `var` to wrap a `variable` into an exec. While each `val`
1579 contained a copy of the value, each `var` hold a link to the variable
1580 because it really is the same variable no matter where it appears.
1581 When a variable is used, we need to remember to follow the `->merged`
1582 link to find the primary instance.
1590 struct variable *var;
1596 VariableDecl -> IDENTIFIER := ${ {
1597 struct variable *v = var_decl(config2context(config), $1.txt);
1598 $0 = new_pos(var, $1);
1603 v = var_ref(config2context(config), $1.txt);
1605 type_err(config2context(config), "error: variable '%v' redeclared",
1606 $0, Tnone, 0, Tnone);
1607 type_err(config2context(config), "info: this is where '%v' was first declared",
1608 v->where_decl, Tnone, 0, Tnone);
1611 | IDENTIFIER ::= ${ {
1612 struct variable *v = var_decl(config2context(config), $1.txt);
1613 $0 = new_pos(var, $1);
1619 v = var_ref(config2context(config), $1.txt);
1621 type_err(config2context(config), "error: variable '%v' redeclared",
1622 $0, Tnone, 0, Tnone);
1623 type_err(config2context(config), "info: this is where '%v' was first declared",
1624 v->where_decl, Tnone, 0, Tnone);
1627 | IDENTIFIER : Type = ${ {
1628 struct variable *v = var_decl(config2context(config), $1.txt);
1629 $0 = new_pos(var, $1);
1634 v->val = val_init($<3);
1636 v = var_ref(config2context(config), $1.txt);
1638 type_err(config2context(config), "error: variable '%v' redeclared",
1639 $0, Tnone, 0, Tnone);
1640 type_err(config2context(config), "info: this is where '%v' was first declared",
1641 v->where_decl, Tnone, 0, Tnone);
1644 | IDENTIFIER :: Type = ${ {
1645 struct variable *v = var_decl(config2context(config), $1.txt);
1646 $0 = new_pos(var, $1);
1651 v->val = val_init($<3);
1654 v = var_ref(config2context(config), $1.txt);
1656 type_err(config2context(config), "error: variable '%v' redeclared",
1657 $0, Tnone, 0, Tnone);
1658 type_err(config2context(config), "info: this is where '%v' was first declared",
1659 v->where_decl, Tnone, 0, Tnone);
1663 Variable -> IDENTIFIER ${ {
1664 struct variable *v = var_ref(config2context(config), $1.txt);
1665 $0 = new_pos(var, $1);
1667 /* This might be a label - allocate a var just in case */
1668 v = var_decl(config2context(config), $1.txt);
1670 v->val = val_init(Tlabel);
1671 v->val.label = &v->val;
1679 Type -> IDENTIFIER ${
1680 $0 = find_type(config2context(config), $1.txt);
1682 tok_err(config2context(config),
1683 "error: undefined type", &$1);
1689 ###### print exec cases
1692 struct var *v = cast(var, e);
1694 struct binding *b = v->var->name;
1695 printf("%.*s", b->name.len, b->name.txt);
1702 if (loc->type == Xvar) {
1703 struct var *v = cast(var, loc);
1705 struct binding *b = v->var->name;
1706 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1708 fputs("???", stderr);
1710 fputs("NOTVAR", stderr);
1713 ###### propagate exec cases
1717 struct var *var = cast(var, prog);
1718 struct variable *v = var->var;
1720 type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone);
1726 if (v->val.type == NULL) {
1727 if (type && *ok != 0) {
1728 v->val = val_init(type);
1729 v->where_set = prog;
1734 if (!vtype_compat(type, v->val.type, rules)) {
1735 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
1736 type, rules, v->val.type);
1737 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1738 v->val.type, rules, Tnone);
1746 ###### interp exec cases
1749 struct var *var = cast(var, e);
1750 struct variable *v = var->var;
1754 return dup_value(v->val);
1757 ###### ast functions
1759 static void free_var(struct var *v)
1764 ###### free exec cases
1765 case Xvar: free_var(cast(var, e)); break;
1767 ### Expressions: Boolean
1769 Our first user of the `binode` will be expressions, and particularly
1770 Boolean expressions. As I haven't implemented precedence in the
1771 parser generator yet, we need different names from each precedence
1772 level used by expressions. The outer most or lowest level precedence
1773 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1784 Expression -> Expression or BTerm ${ {
1785 struct binode *b = new(binode);
1791 | BTerm ${ $0 = $<1; }$
1793 BTerm -> BTerm and BFact ${ {
1794 struct binode *b = new(binode);
1800 | BFact ${ $0 = $<1; }$
1802 BFact -> not BFact ${ {
1803 struct binode *b = new(binode);
1810 ###### print binode cases
1812 print_exec(b->left, -1, 0);
1814 print_exec(b->right, -1, 0);
1817 print_exec(b->left, -1, 0);
1819 print_exec(b->right, -1, 0);
1823 print_exec(b->right, -1, 0);
1826 ###### propagate binode cases
1830 /* both must be Tbool, result is Tbool */
1831 propagate_types(b->left, c, ok, Tbool, 0);
1832 propagate_types(b->right, c, ok, Tbool, 0);
1833 if (type && type != Tbool) {
1834 type_err(c, "error: %1 operation found where %2 expected", prog,
1840 ###### interp binode cases
1842 rv = interp_exec(b->left);
1843 right = interp_exec(b->right);
1844 rv.bool = rv.bool && right.bool;
1847 rv = interp_exec(b->left);
1848 right = interp_exec(b->right);
1849 rv.bool = rv.bool || right.bool;
1852 rv = interp_exec(b->right);
1856 ### Expressions: Comparison
1858 Of slightly higher precedence that Boolean expressions are
1860 A comparison takes arguments of any type, but the two types must be
1863 To simplify the parsing we introduce an `eop` which can record an
1864 expression operator.
1871 ###### ast functions
1872 static void free_eop(struct eop *e)
1887 | Expr CMPop Expr ${ {
1888 struct binode *b = new(binode);
1894 | Expr ${ $0 = $<1; }$
1899 CMPop -> < ${ $0.op = Less; }$
1900 | > ${ $0.op = Gtr; }$
1901 | <= ${ $0.op = LessEq; }$
1902 | >= ${ $0.op = GtrEq; }$
1903 | == ${ $0.op = Eql; }$
1904 | != ${ $0.op = NEql; }$
1906 ###### print binode cases
1914 print_exec(b->left, -1, 0);
1916 case Less: printf(" < "); break;
1917 case LessEq: printf(" <= "); break;
1918 case Gtr: printf(" > "); break;
1919 case GtrEq: printf(" >= "); break;
1920 case Eql: printf(" == "); break;
1921 case NEql: printf(" != "); break;
1924 print_exec(b->right, -1, 0);
1927 ###### propagate binode cases
1934 /* Both must match but not labels, result is Tbool */
1935 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
1937 propagate_types(b->right, c, ok, t, 0);
1939 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
1941 t = propagate_types(b->left, c, ok, t, 0);
1943 if (!vtype_compat(type, Tbool, 0)) {
1944 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1945 Tbool, rules, type);
1950 ###### interp binode cases
1959 left = interp_exec(b->left);
1960 right = interp_exec(b->right);
1961 cmp = value_cmp(left, right);
1964 case Less: rv.bool = cmp < 0; break;
1965 case LessEq: rv.bool = cmp <= 0; break;
1966 case Gtr: rv.bool = cmp > 0; break;
1967 case GtrEq: rv.bool = cmp >= 0; break;
1968 case Eql: rv.bool = cmp == 0; break;
1969 case NEql: rv.bool = cmp != 0; break;
1970 default: rv.bool = 0; break;
1975 ### Expressions: The rest
1977 The remaining expressions with the highest precedence are arithmetic
1978 and string concatenation. They are `Expr`, `Term`, and `Factor`.
1979 The `Factor` is where the `Value` and `Variable` that we already have
1982 `+` and `-` are both infix and prefix operations (where they are
1983 absolute value and negation). These have different operator names.
1985 We also have a 'Bracket' operator which records where parentheses were
1986 found. This make it easy to reproduce these when printing. Once
1987 precedence is handled better I might be able to discard this.
1999 Expr -> Expr Eop Term ${ {
2000 struct binode *b = new(binode);
2006 | Term ${ $0 = $<1; }$
2008 Term -> Term Top Factor ${ {
2009 struct binode *b = new(binode);
2015 | Factor ${ $0 = $<1; }$
2017 Factor -> ( Expression ) ${ {
2018 struct binode *b = new_pos(binode, $1);
2024 struct binode *b = new(binode);
2029 | Value ${ $0 = $<1; }$
2030 | Variable ${ $0 = $<1; }$
2033 Eop -> + ${ $0.op = Plus; }$
2034 | - ${ $0.op = Minus; }$
2036 Uop -> + ${ $0.op = Absolute; }$
2037 | - ${ $0.op = Negate; }$
2039 Top -> * ${ $0.op = Times; }$
2040 | / ${ $0.op = Divide; }$
2041 | ++ ${ $0.op = Concat; }$
2043 ###### print binode cases
2049 print_exec(b->left, indent, 0);
2051 case Plus: printf(" + "); break;
2052 case Minus: printf(" - "); break;
2053 case Times: printf(" * "); break;
2054 case Divide: printf(" / "); break;
2055 case Concat: printf(" ++ "); break;
2058 print_exec(b->right, indent, 0);
2062 print_exec(b->right, indent, 0);
2066 print_exec(b->right, indent, 0);
2070 print_exec(b->right, indent, 0);
2074 ###### propagate binode cases
2079 /* both must be numbers, result is Tnum */
2082 /* as propagate_types ignores a NULL,
2083 * unary ops fit here too */
2084 propagate_types(b->left, c, ok, Tnum, 0);
2085 propagate_types(b->right, c, ok, Tnum, 0);
2086 if (!vtype_compat(type, Tnum, 0)) {
2087 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2094 /* both must be Tstr, result is Tstr */
2095 propagate_types(b->left, c, ok, Tstr, 0);
2096 propagate_types(b->right, c, ok, Tstr, 0);
2097 if (!vtype_compat(type, Tstr, 0)) {
2098 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2105 return propagate_types(b->right, c, ok, type, 0);
2107 ###### interp binode cases
2110 rv = interp_exec(b->left);
2111 right = interp_exec(b->right);
2112 mpq_add(rv.num, rv.num, right.num);
2115 rv = interp_exec(b->left);
2116 right = interp_exec(b->right);
2117 mpq_sub(rv.num, rv.num, right.num);
2120 rv = interp_exec(b->left);
2121 right = interp_exec(b->right);
2122 mpq_mul(rv.num, rv.num, right.num);
2125 rv = interp_exec(b->left);
2126 right = interp_exec(b->right);
2127 mpq_div(rv.num, rv.num, right.num);
2130 rv = interp_exec(b->right);
2131 mpq_neg(rv.num, rv.num);
2134 rv = interp_exec(b->right);
2135 mpq_abs(rv.num, rv.num);
2138 rv = interp_exec(b->right);
2141 left = interp_exec(b->left);
2142 right = interp_exec(b->right);
2144 rv.str = text_join(left.str, right.str);
2147 ### Blocks, Statements, and Statement lists.
2149 Now that we have expressions out of the way we need to turn to
2150 statements. There are simple statements and more complex statements.
2151 Simple statements do not contain newlines, complex statements do.
2153 Statements often come in sequences and we have corresponding simple
2154 statement lists and complex statement lists.
2155 The former comprise only simple statements separated by semicolons.
2156 The later comprise complex statements and simple statement lists. They are
2157 separated by newlines. Thus the semicolon is only used to separate
2158 simple statements on the one line. This may be overly restrictive,
2159 but I'm not sure I ever want a complex statement to share a line with
2162 Note that a simple statement list can still use multiple lines if
2163 subsequent lines are indented, so
2165 ###### Example: wrapped simple statement list
2170 is a single simple statement list. This might allow room for
2171 confusion, so I'm not set on it yet.
2173 A simple statement list needs no extra syntax. A complex statement
2174 list has two syntactic forms. It can be enclosed in braces (much like
2175 C blocks), or it can be introduced by a colon and continue until an
2176 unindented newline (much like Python blocks). With this extra syntax
2177 it is referred to as a block.
2179 Note that a block does not have to include any newlines if it only
2180 contains simple statements. So both of:
2182 if condition: a=b; d=f
2184 if condition { a=b; print f }
2188 In either case the list is constructed from a `binode` list with
2189 `Block` as the operator. When parsing the list it is most convenient
2190 to append to the end, so a list is a list and a statement. When using
2191 the list it is more convenient to consider a list to be a statement
2192 and a list. So we need a function to re-order a list.
2193 `reorder_bilist` serves this purpose.
2195 The only stand-alone statement we introduce at this stage is `pass`
2196 which does nothing and is represented as a `NULL` pointer in a `Block`
2216 Block -> Open Statementlist Close ${ $0 = $<2; }$
2217 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2218 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2219 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2220 | : Statementlist ${ $0 = $<2; }$
2221 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2223 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2225 ComplexStatements -> ComplexStatements ComplexStatement ${
2231 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2232 | ComplexStatement ${
2240 ComplexStatement -> SimpleStatements NEWLINE ${
2241 $0 = reorder_bilist($<1);
2243 ## ComplexStatement Grammar
2246 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2252 | SimpleStatement ${
2258 | SimpleStatements ; ${ $0 = $<1; }$
2260 SimpleStatement -> pass ${ $0 = NULL; }$
2261 ## SimpleStatement Grammar
2263 ###### print binode cases
2267 if (b->left == NULL)
2270 print_exec(b->left, indent, 0);
2273 print_exec(b->right, indent, 0);
2276 // block, one per line
2277 if (b->left == NULL)
2278 do_indent(indent, "pass\n");
2280 print_exec(b->left, indent, bracket);
2282 print_exec(b->right, indent, bracket);
2286 ###### propagate binode cases
2289 /* If any statement returns something other then Tnone
2290 * or Tbool then all such must return same type.
2291 * As each statement may be Tnone or something else,
2292 * we must always pass NULL (unknown) down, otherwise an incorrect
2293 * error might occur. We never return Tnone unless it is
2298 for (e = b; e; e = cast(binode, e->right)) {
2299 t = propagate_types(e->left, c, ok, NULL, rules);
2300 if ((rules & Rboolok) && t == Tbool)
2302 if (t && t != Tnone && t != Tbool) {
2305 else if (t != type) {
2306 type_err(c, "error: expected %1%r, found %2",
2307 e->left, type, rules, t);
2315 ###### interp binode cases
2317 while (rv.type == Tnone &&
2320 rv = interp_exec(b->left);
2321 b = cast(binode, b->right);
2325 ### The Print statement
2327 `print` is a simple statement that takes a comma-separated list of
2328 expressions and prints the values separated by spaces and terminated
2329 by a newline. No control of formatting is possible.
2331 `print` faces the same list-ordering issue as blocks, and uses the
2337 ###### SimpleStatement Grammar
2339 | print ExpressionList ${
2340 $0 = reorder_bilist($<2);
2342 | print ExpressionList , ${
2347 $0 = reorder_bilist($0);
2358 ExpressionList -> ExpressionList , Expression ${
2371 ###### print binode cases
2374 do_indent(indent, "print");
2378 print_exec(b->left, -1, 0);
2382 b = cast(binode, b->right);
2388 ###### propagate binode cases
2391 /* don't care but all must be consistent */
2392 propagate_types(b->left, c, ok, NULL, Rnolabel);
2393 propagate_types(b->right, c, ok, NULL, Rnolabel);
2396 ###### interp binode cases
2402 for ( ; b; b = cast(binode, b->right))
2406 left = interp_exec(b->left);
2419 ###### Assignment statement
2421 An assignment will assign a value to a variable, providing it hasn't
2422 be declared as a constant. The analysis phase ensures that the type
2423 will be correct so the interpreter just needs to perform the
2424 calculation. There is a form of assignment which declares a new
2425 variable as well as assigning a value. If a name is assigned before
2426 it is declared, and error will be raised as the name is created as
2427 `Tlabel` and it is illegal to assign to such names.
2433 ###### SimpleStatement Grammar
2434 | Variable = Expression ${ {
2435 struct var *v = cast(var, $1);
2441 if (v->var && !v->var->constant) {
2445 | VariableDecl Expression ${
2452 ###### print binode cases
2455 do_indent(indent, "");
2456 print_exec(b->left, indent, 0);
2458 print_exec(b->right, indent, 0);
2465 struct variable *v = cast(var, b->left)->var;
2466 do_indent(indent, "");
2467 print_exec(b->left, indent, 0);
2468 if (cast(var, b->left)->var->constant) {
2469 if (v->where_decl == v->where_set)
2470 printf("::%.*s = ", v->val.type->name.len,
2471 v->val.type->name.txt);
2475 if (v->where_decl == v->where_set)
2476 printf(":%.*s = ", v->val.type->name.len,
2477 v->val.type->name.txt);
2481 print_exec(b->right, indent, 0);
2487 ###### propagate binode cases
2491 /* Both must match and not be labels, result is Tnone */
2492 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2494 if (propagate_types(b->right, c, ok, t, 0) != t)
2495 if (b->left->type == Xvar)
2496 type_err(c, "info: variable '%v' was set as %1 here.",
2497 cast(var, b->left)->var->where_set, t, rules, Tnone);
2499 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2501 propagate_types(b->left, c, ok, t, 0);
2507 ###### interp binode cases
2512 struct variable *v = cast(var, b->left)->var;
2515 right = interp_exec(b->right);
2522 ### The `use` statement
2524 The `use` statement is the last "simple" statement. It is needed when
2525 the condition in a conditional statement is a block. `use` works much
2526 like `return` in C, but only completes the `condition`, not the whole
2532 ###### SimpleStatement Grammar
2534 $0 = new_pos(binode, $1);
2539 ###### print binode cases
2542 do_indent(indent, "use ");
2543 print_exec(b->right, -1, 0);
2548 ###### propagate binode cases
2551 /* result matches value */
2552 return propagate_types(b->right, c, ok, type, 0);
2554 ###### interp binode cases
2557 rv = interp_exec(b->right);
2560 ### The Conditional Statement
2562 This is the biggy and currently the only complex statement. This
2563 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2564 It is comprised of a number of parts, all of which are optional though
2565 set combinations apply. Each part is (usually) a key word (`then` is
2566 sometimes optional) followed by either an expression or a code block,
2567 except the `casepart` which is a "key word and an expression" followed
2568 by a code block. The code-block option is valid for all parts and,
2569 where an expression is also allowed, the code block can use the `use`
2570 statement to report a value. If the code block does not report a value
2571 the effect is similar to reporting `True`.
2573 The `else` and `case` parts, as well as `then` when combined with
2574 `if`, can contain a `use` statement which will apply to some
2575 containing conditional statement. `for` parts, `do` parts and `then`
2576 parts used with `for` can never contain a `use`, except in some
2577 subordinate conditional statement.
2579 If there is a `forpart`, it is executed first, only once.
2580 If there is a `dopart`, then it is executed repeatedly providing
2581 always that the `condpart` or `cond`, if present, does not return a non-True
2582 value. `condpart` can fail to return any value if it simply executes
2583 to completion. This is treated the same as returning `True`.
2585 If there is a `thenpart` it will be executed whenever the `condpart`
2586 or `cond` returns True (or does not return any value), but this will happen
2587 *after* `dopart` (when present).
2589 If `elsepart` is present it will be executed at most once when the
2590 condition returns `False` or some value that isn't `True` and isn't
2591 matched by any `casepart`. If there are any `casepart`s, they will be
2592 executed when the condition returns a matching value.
2594 The particular sorts of values allowed in case parts has not yet been
2595 determined in the language design, so nothing is prohibited.
2597 The various blocks in this complex statement potentially provide scope
2598 for variables as described earlier. Each such block must include the
2599 "OpenScope" nonterminal before parsing the block, and must call
2600 `var_block_close()` when closing the block.
2602 The code following "`if`", "`switch`" and "`for`" does not get its own
2603 scope, but is in a scope covering the whole statement, so names
2604 declared there cannot be redeclared elsewhere. Similarly the
2605 condition following "`while`" is in a scope the covers the body
2606 ("`do`" part) of the loop, and which does not allow conditional scope
2607 extension. Code following "`then`" (both looping and non-looping),
2608 "`else`" and "`case`" each get their own local scope.
2610 The type requirements on the code block in a `whilepart` are quite
2611 unusal. It is allowed to return a value of some identifiable type, in
2612 which case the loop aborts and an appropriate `casepart` is run, or it
2613 can return a Boolean, in which case the loop either continues to the
2614 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2615 This is different both from the `ifpart` code block which is expected to
2616 return a Boolean, or the `switchpart` code block which is expected to
2617 return the same type as the casepart values. The correct analysis of
2618 the type of the `whilepart` code block is the reason for the
2619 `Rboolok` flag which is passed to `propagate_types()`.
2621 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2630 struct exec *action;
2631 struct casepart *next;
2633 struct cond_statement {
2635 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2636 struct casepart *casepart;
2639 ###### ast functions
2641 static void free_casepart(struct casepart *cp)
2645 free_exec(cp->value);
2646 free_exec(cp->action);
2653 static void free_cond_statement(struct cond_statement *s)
2657 free_exec(s->forpart);
2658 free_exec(s->condpart);
2659 free_exec(s->dopart);
2660 free_exec(s->thenpart);
2661 free_exec(s->elsepart);
2662 free_casepart(s->casepart);
2666 ###### free exec cases
2667 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2669 ###### ComplexStatement Grammar
2670 | CondStatement ${ $0 = $<1; }$
2675 // both ForThen and Whilepart open scopes, and CondSuffix only
2676 // closes one - so in the first branch here we have another to close.
2677 CondStatement -> ForThen WhilePart CondSuffix ${
2679 $0->forpart = $1.forpart; $1.forpart = NULL;
2680 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2681 $0->condpart = $2.condpart; $2.condpart = NULL;
2682 $0->dopart = $2.dopart; $2.dopart = NULL;
2683 var_block_close(config2context(config), CloseSequential);
2685 | WhilePart CondSuffix ${
2687 $0->condpart = $1.condpart; $1.condpart = NULL;
2688 $0->dopart = $1.dopart; $1.dopart = NULL;
2690 | SwitchPart CondSuffix ${
2694 | IfPart IfSuffix ${
2696 $0->condpart = $1.condpart; $1.condpart = NULL;
2697 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2698 // This is where we close an "if" statement
2699 var_block_close(config2context(config), CloseSequential);
2702 CondSuffix -> IfSuffix ${
2704 // This is where we close scope of the whole
2705 // "for" or "while" statement
2706 var_block_close(config2context(config), CloseSequential);
2708 | CasePart CondSuffix ${
2710 $1->next = $0->casepart;
2715 CasePart -> Newlines case Expression OpenScope Block ${
2716 $0 = calloc(1,sizeof(struct casepart));
2719 var_block_close(config2context(config), CloseParallel);
2721 | case Expression OpenScope Block ${
2722 $0 = calloc(1,sizeof(struct casepart));
2725 var_block_close(config2context(config), CloseParallel);
2729 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2730 | Newlines else OpenScope Block ${
2731 $0 = new(cond_statement);
2733 var_block_close(config2context(config), CloseElse);
2735 | else OpenScope Block ${
2736 $0 = new(cond_statement);
2738 var_block_close(config2context(config), CloseElse);
2740 | Newlines else OpenScope CondStatement ${
2741 $0 = new(cond_statement);
2743 var_block_close(config2context(config), CloseElse);
2745 | else OpenScope CondStatement ${
2746 $0 = new(cond_statement);
2748 var_block_close(config2context(config), CloseElse);
2753 // These scopes are closed in CondSuffix
2754 ForPart -> for OpenScope SimpleStatements ${
2755 $0 = reorder_bilist($<3);
2757 | for OpenScope Block ${
2761 ThenPart -> then OpenScope SimpleStatements ${
2762 $0 = reorder_bilist($<3);
2763 var_block_close(config2context(config), CloseSequential);
2765 | then OpenScope Block ${
2767 var_block_close(config2context(config), CloseSequential);
2770 ThenPartNL -> ThenPart OptNL ${
2774 // This scope is closed in CondSuffix
2775 WhileHead -> while OpenScope Block ${
2780 ForThen -> ForPart OptNL ThenPartNL ${
2788 // This scope is closed in CondSuffix
2789 WhilePart -> while OpenScope Expression Block ${
2790 $0.type = Xcond_statement;
2794 | WhileHead OptNL do Block ${
2795 $0.type = Xcond_statement;
2800 IfPart -> if OpenScope Expression OpenScope Block ${
2801 $0.type = Xcond_statement;
2804 var_block_close(config2context(config), CloseParallel);
2806 | if OpenScope Block OptNL then OpenScope Block ${
2807 $0.type = Xcond_statement;
2810 var_block_close(config2context(config), CloseParallel);
2814 // This scope is closed in CondSuffix
2815 SwitchPart -> switch OpenScope Expression ${
2818 | switch OpenScope Block ${
2822 ###### print exec cases
2824 case Xcond_statement:
2826 struct cond_statement *cs = cast(cond_statement, e);
2827 struct casepart *cp;
2829 do_indent(indent, "for");
2830 if (bracket) printf(" {\n"); else printf(":\n");
2831 print_exec(cs->forpart, indent+1, bracket);
2834 do_indent(indent, "} then {\n");
2836 do_indent(indent, "then:\n");
2837 print_exec(cs->thenpart, indent+1, bracket);
2839 if (bracket) do_indent(indent, "}\n");
2843 if (cs->condpart && cs->condpart->type == Xbinode &&
2844 cast(binode, cs->condpart)->op == Block) {
2846 do_indent(indent, "while {\n");
2848 do_indent(indent, "while:\n");
2849 print_exec(cs->condpart, indent+1, bracket);
2851 do_indent(indent, "} do {\n");
2853 do_indent(indent, "do:\n");
2854 print_exec(cs->dopart, indent+1, bracket);
2856 do_indent(indent, "}\n");
2858 do_indent(indent, "while ");
2859 print_exec(cs->condpart, 0, bracket);
2864 print_exec(cs->dopart, indent+1, bracket);
2866 do_indent(indent, "}\n");
2871 do_indent(indent, "switch");
2873 do_indent(indent, "if");
2874 if (cs->condpart && cs->condpart->type == Xbinode &&
2875 cast(binode, cs->condpart)->op == Block) {
2880 print_exec(cs->condpart, indent+1, bracket);
2882 do_indent(indent, "}\n");
2884 do_indent(indent, "then:\n");
2885 print_exec(cs->thenpart, indent+1, bracket);
2889 print_exec(cs->condpart, 0, bracket);
2895 print_exec(cs->thenpart, indent+1, bracket);
2897 do_indent(indent, "}\n");
2902 for (cp = cs->casepart; cp; cp = cp->next) {
2903 do_indent(indent, "case ");
2904 print_exec(cp->value, -1, 0);
2909 print_exec(cp->action, indent+1, bracket);
2911 do_indent(indent, "}\n");
2914 do_indent(indent, "else");
2919 print_exec(cs->elsepart, indent+1, bracket);
2921 do_indent(indent, "}\n");
2926 ###### propagate exec cases
2927 case Xcond_statement:
2929 // forpart and dopart must return Tnone
2930 // thenpart must return Tnone if there is a dopart,
2931 // otherwise it is like elsepart.
2933 // be bool if there is not casepart
2934 // match casepart->values if there is a switchpart
2935 // either be bool or match casepart->value if there
2937 // elsepart, casepart->action must match there return type
2938 // expected of this statement.
2939 struct cond_statement *cs = cast(cond_statement, prog);
2940 struct casepart *cp;
2942 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
2943 if (!vtype_compat(Tnone, t, 0))
2945 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
2946 if (!vtype_compat(Tnone, t, 0))
2949 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
2950 if (!vtype_compat(Tnone, t, 0))
2953 if (cs->casepart == NULL)
2954 propagate_types(cs->condpart, c, ok, Tbool, 0);
2956 /* Condpart must match case values, with bool permitted */
2958 for (cp = cs->casepart;
2959 cp && !t; cp = cp->next)
2960 t = propagate_types(cp->value, c, ok, NULL, 0);
2961 if (!t && cs->condpart)
2962 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
2963 // Now we have a type (I hope) push it down
2965 for (cp = cs->casepart; cp; cp = cp->next)
2966 propagate_types(cp->value, c, ok, t, 0);
2967 propagate_types(cs->condpart, c, ok, t, Rboolok);
2970 // (if)then, else, and case parts must return expected type.
2971 if (!cs->dopart && !type)
2972 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
2974 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
2975 for (cp = cs->casepart;
2978 type = propagate_types(cp->action, c, ok, NULL, rules);
2981 propagate_types(cs->thenpart, c, ok, type, rules);
2982 propagate_types(cs->elsepart, c, ok, type, rules);
2983 for (cp = cs->casepart; cp ; cp = cp->next)
2984 propagate_types(cp->action, c, ok, type, rules);
2990 ###### interp exec cases
2991 case Xcond_statement:
2993 struct value v, cnd;
2994 struct casepart *cp;
2995 struct cond_statement *c = cast(cond_statement, e);
2998 interp_exec(c->forpart);
3001 cnd = interp_exec(c->condpart);
3004 if (!(cnd.type == Tnone ||
3005 (cnd.type == Tbool && cnd.bool != 0)))
3007 // cnd is Tnone or Tbool, doesn't need to be freed
3009 interp_exec(c->dopart);
3012 v = interp_exec(c->thenpart);
3013 if (v.type != Tnone || !c->dopart)
3017 } while (c->dopart);
3019 for (cp = c->casepart; cp; cp = cp->next) {
3020 v = interp_exec(cp->value);
3021 if (value_cmp(v, cnd) == 0) {
3024 return interp_exec(cp->action);
3030 return interp_exec(c->elsepart);
3035 ### Finally the whole program.
3037 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3038 the keyword "program" and a list of variable names which are assigned
3039 values from command line arguments. Following this is a `block` which
3040 is the code to execute.
3042 As this is the top level, several things are handled a bit
3044 The whole program is not interpreted by `interp_exec` as that isn't
3045 passed the argument list which the program requires. Similarly type
3046 analysis is a bit more interesting at this level.
3051 ###### Parser: grammar
3054 Program -> InitProgram OpenScope Varlist Block OptNL ${
3057 $0->left = reorder_bilist($<3);
3059 var_block_close(config2context(config), CloseSequential);
3060 if (config2context(config)->scope_stack) abort();
3063 tok_err(config2context(config),
3064 "error: unhandled parse error.", &$1);
3067 InitProgram -> program ${
3068 context_init(config2context(config));
3071 Varlist -> Varlist ArgDecl ${
3080 ArgDecl -> IDENTIFIER ${ {
3081 struct variable *v = var_decl(config2context(config), $1.txt);
3088 ###### print binode cases
3090 do_indent(indent, "program");
3091 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3093 print_exec(b2->left, 0, 0);
3099 print_exec(b->right, indent+1, bracket);
3101 do_indent(indent, "}\n");
3104 ###### propagate binode cases
3105 case Program: abort();
3107 ###### core functions
3109 static int analyse_prog(struct exec *prog, struct parse_context *c)
3111 struct binode *b = cast(binode, prog);
3118 propagate_types(b->right, c, &ok, Tnone, 0);
3123 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3124 struct var *v = cast(var, b->left);
3125 if (!v->var->val.type) {
3126 v->var->where_set = b;
3127 v->var->val = val_init(Tstr);
3130 b = cast(binode, prog);
3133 propagate_types(b->right, c, &ok, Tnone, 0);
3138 /* Make sure everything is still consistent */
3139 propagate_types(b->right, c, &ok, Tnone, 0);
3143 static void interp_prog(struct exec *prog, char **argv)
3145 struct binode *p = cast(binode, prog);
3151 al = cast(binode, p->left);
3153 struct var *v = cast(var, al->left);
3154 struct value *vl = &v->var->val;
3156 if (argv[0] == NULL) {
3157 printf("Not enough args\n");
3160 al = cast(binode, al->right);
3162 *vl = parse_value(vl->type, argv[0]);
3163 if (vl->type == NULL)
3167 v = interp_exec(p->right);
3171 ###### interp binode cases
3172 case Program: abort();
3174 ## And now to test it out.
3176 Having a language requires having a "hello world" program. I'll
3177 provide a little more than that: a program that prints "Hello world"
3178 finds the GCD of two numbers, prints the first few elements of
3179 Fibonacci, and performs a binary search for a number.
3181 ###### File: oceani.mk
3184 @echo "===== TEST ====="
3185 ./oceani --section "test: hello" oceani.mdc 55 33
3190 print "Hello World, what lovely oceans you have!"
3191 /* When a variable is defined in both branches of an 'if',
3192 * and used afterwards, the variables are merged.
3198 print "Is", A, "bigger than", B,"? ", bigger
3199 /* If a variable is not used after the 'if', no
3200 * merge happens, so types can be different
3203 double:string = "yes"
3204 print A, "is more than twice", B, "?", double
3207 print "double", A, "is only", double
3217 print "GCD of", A, "and", B,"is", a
3219 print a, "is not positive, cannot calculate GCD"
3221 print b, "is not positive, cannot calculate GCD"
3226 print "Fibonacci:", f1,f2,
3227 then togo = togo - 1
3235 /* Binary search... */
3240 mid := (lo + hi) / 2
3252 print "Yay, I found", target
3254 print "Closest I found was", mid