1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/`, `%` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing,
77 - Analyse the parsed program to ensure consistency,
79 - Execute the program.
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 This code must be compiled with `-fplan9-extensions` so that anonymous
92 structures can be used.
94 ###### File: oceani.mk
96 myCFLAGS := -Wall -g -fplan9-extensions
97 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
98 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
99 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
101 all :: $(LDLIBS) oceani
102 oceani.c oceani.h : oceani.mdc parsergen
103 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
104 oceani.mk: oceani.mdc md2c
107 oceani: oceani.o $(LDLIBS)
108 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
110 ###### Parser: header
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
136 #include <sys/mman.h>
155 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
156 "--section=SectionName prog.ocn\n";
157 static const struct option long_options[] = {
158 {"trace", 0, NULL, 't'},
159 {"print", 0, NULL, 'p'},
160 {"noexec", 0, NULL, 'n'},
161 {"brackets", 0, NULL, 'b'},
162 {"section", 1, NULL, 's'},
165 const char *options = "tpnbs";
166 int main(int argc, char *argv[])
172 char *section = NULL;
173 struct parse_context context = {
175 .ignored = (1 << TK_line_comment)
176 | (1 << TK_block_comment),
177 .number_chars = ".,_+-",
182 int doprint=0, dotrace=0, doexec=1, brackets=0;
184 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
187 case 't': dotrace=1; break;
188 case 'p': doprint=1; break;
189 case 'n': doexec=0; break;
190 case 'b': brackets=1; break;
191 case 's': section = optarg; break;
192 default: fprintf(stderr, Usage);
196 if (optind >= argc) {
197 fprintf(stderr, "oceani: no input file given\n");
200 fd = open(argv[optind], O_RDONLY);
202 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
205 context.file_name = argv[optind];
206 len = lseek(fd, 0, 2);
207 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
208 s = code_extract(file, file+len, NULL);
210 fprintf(stderr, "oceani: could not find any code in %s\n",
215 ## context initialization
219 for (ss = s; ss; ss = ss->next) {
220 struct text sec = ss->section;
221 if (sec.len == strlen(section) &&
222 strncmp(sec.txt, section, sec.len) == 0)
226 parse_oceani(ss->code, &context.config,
227 dotrace ? stderr : NULL);
229 fprintf(stderr, "oceani: cannot find section %s\n",
234 parse_oceani(s->code, &context.config,
235 dotrace ? stderr : NULL);
237 fprintf(stderr, "oceani: no program found.\n");
238 context.parse_error = 1;
240 if (context.prog && doprint) {
243 print_exec(context.prog, 0, brackets);
245 if (context.prog && doexec && !context.parse_error) {
246 if (!analyse_prog(context.prog, &context)) {
247 fprintf(stderr, "oceani: type error in program - not running.\n");
250 interp_prog(context.prog, argv+optind+1);
253 free_exec(context.prog);
256 struct section *t = s->next;
262 ## free context types
263 exit(context.parse_error ? 1 : 0);
268 The four requirements of parse, analyse, print, interpret apply to
269 each language element individually so that is how most of the code
272 Three of the four are fairly self explanatory. The one that requires
273 a little explanation is the analysis step.
275 The current language design does not require the types of variables to
276 be declared, but they must still have a single type. Different
277 operations impose different requirements on the variables, for example
278 addition requires both arguments to be numeric, and assignment
279 requires the variable on the left to have the same type as the
280 expression on the right.
282 Analysis involves propagating these type requirements around and
283 consequently setting the type of each variable. If any requirements
284 are violated (e.g. a string is compared with a number) or if a
285 variable needs to have two different types, then an error is raised
286 and the program will not run.
288 If the same variable is declared in both branchs of an 'if/else', or
289 in all cases of a 'switch' then the multiple instances may be merged
290 into just one variable if the variable is references after the
291 conditional statement. When this happens, the types must naturally be
292 consistent across all the branches. When the variable is not used
293 outside the if, the variables in the different branches are distinct
294 and can be of different types.
296 Determining the types of all variables early is important for
297 processing command line arguments. These can be assigned to any type
298 of variable, but we must first know the correct type so any required
299 conversion can happen. If a variable is associated with a command
300 line argument but no type can be interpreted (e.g. the variable is
301 only ever used in a `print` statement), then the type is set to
304 Undeclared names may only appear in "use" statements and "case" expressions.
305 These names are given a type of "label" and a unique value.
306 This allows them to fill the role of a name in an enumerated type, which
307 is useful for testing the `switch` statement.
309 As we will see, the condition part of a `while` statement can return
310 either a Boolean or some other type. This requires that the expect
311 type that gets passed around comprises a type (`enum vtype`) and a
312 flag to indicate that `Vbool` is also permitted.
314 As there are, as yet, no distinct types that are compatible, there
315 isn't much subtlety in the analysis. When we have distinct number
316 types, this will become more interesting.
320 When analysis discovers an inconsistency it needs to report an error;
321 just refusing to run the code ensures that the error doesn't cascade,
322 but by itself it isn't very useful. A clear understand of the sort of
323 error message that are useful will help guide the process of analysis.
325 At a simplistic level, the only sort of error that type analysis can
326 report is that the type of some construct doesn't match a contextual
327 requirement. For example, in `4 + "hello"` the addition provides a
328 contextual requirement for numbers, but `"hello"` is not a number. In
329 this particular example no further information is needed as the types
330 are obvious from local information. When a variable is involved that
331 isn't the case. It may be helpful to explain why the variable has a
332 particular type, by indicating the location where the type was set,
333 whether by declaration or usage.
335 Using a recursive-descent analysis we can easily detect a problem at
336 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
337 will detect that one argument is not a number and the usage of `hello`
338 will detect that a number was wanted, but not provided. In this
339 (early) version of the language, we will generate error reports at
340 multiple locations, so the use of `hello` will report an error and
341 explain were the value was set, and the addition will report an error
342 and say why numbers are needed. To be able to report locations for
343 errors, each language element will need to record a file location
344 (line and column) and each variable will need to record the language
345 element where its type was set. For now we will assume that each line
346 of an error message indicates one location in the file, and up to 2
347 types. So we provide a `printf`-like function which takes a format, a
348 language (a `struct exec` which has not yet been introduced), and 2
349 types. "`%1`" reports the first type, "`%2`" reports the second. We
350 will need a function to print the location, once we know how that is
351 stored. As will be explained later, there are sometimes extra rules for
352 type matching and they might affect error messages, we need to pass those
355 As well as type errors, we sometimes need to report problems with
356 tokens, which might be unexpected or might name a type that has not
357 been defined. For these we have `tok_err()` which reports an error
358 with a given token. Each of the error functions sets the flag in the
359 context so indicate that parsing failed.
363 static void fput_loc(struct exec *loc, FILE *f);
365 ###### core functions
367 static void type_err(struct parse_context *c,
368 char *fmt, struct exec *loc,
369 struct type *t1, int rules, struct type *t2)
371 fprintf(stderr, "%s:", c->file_name);
372 fput_loc(loc, stderr);
373 for (; *fmt ; fmt++) {
380 case '%': fputc(*fmt, stderr); break; // NOTEST
381 default: fputc('?', stderr); break; // NOTEST
383 type_print(t1, stderr);
386 type_print(t2, stderr);
395 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
397 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
398 t->txt.len, t->txt.txt);
404 One last introductory step before detailing the language elements and
405 providing their four requirements is to establish the data structures
406 to store these elements.
408 There are two key objects that we need to work with: executable
409 elements which comprise the program, and values which the program
410 works with. Between these are the variables in their various scopes
411 which hold the values, and types which classify the values stored and
412 manipulatd by executables.
416 Values come in a wide range of types, with more likely to be added.
417 Each type needs to be able to parse and print its own values (for
418 convenience at least) as well as to compare two values, at least for
419 equality and possibly for order. For now, values might need to be
420 duplicated and freed, though eventually such manipulations will be
421 better integrated into the language.
423 Rather than requiring every numeric type to support all numeric
424 operations (add, multiple, etc), we allow types to be able to present
425 as one of a few standard types: integer, float, and fraction. The
426 existance of these conversion functions enable types to determine if
427 they are compatible with other types.
429 Named type are stored in a simple linked list. Objects of each type are "values"
430 which are often passed around by value.
437 ## value union fields
444 struct value (*init)(struct type *type);
445 struct value (*prepare)(struct type *type);
446 struct value (*parse)(struct type *type, char *str);
447 void (*print)(struct value val);
448 void (*print_type)(struct type *type, FILE *f);
449 void (*print_type_decl)(struct type *type, FILE *f);
450 int (*cmp_order)(struct value v1, struct value v2);
451 int (*cmp_eq)(struct value v1, struct value v2);
452 struct value (*dup)(struct value val);
453 void (*free)(struct value val);
454 void (*free_type)(struct type *t);
455 int (*compat)(struct type *this, struct type *other);
456 long long (*to_int)(struct value *v);
457 double (*to_float)(struct value *v);
458 int (*to_mpq)(mpq_t *q, struct value *v);
466 struct type *typelist;
470 static struct type *find_type(struct parse_context *c, struct text s)
472 struct type *l = c->typelist;
475 text_cmp(l->name, s) != 0)
480 static struct type *add_type(struct parse_context *c, struct text s,
485 n = calloc(1, sizeof(*n));
488 n->next = c->typelist;
493 static void free_type(struct type *t)
495 /* The type is always a reference to something in the
496 * context, so we don't need to free anything.
500 static void free_value(struct value v)
506 static int type_compat(struct type *require, struct type *have, int rules)
508 if ((rules & Rboolok) && have == Tbool)
510 if ((rules & Rnolabel) && have == Tlabel)
512 if (!require || !have)
516 return require->compat(require, have);
518 return require == have;
521 static void type_print(struct type *type, FILE *f)
524 fputs("*unknown*type*", f);
525 else if (type->name.len)
526 fprintf(f, "%.*s", type->name.len, type->name.txt);
527 else if (type->print_type)
528 type->print_type(type, f);
530 fputs("*invalid*type*", f); // NOTEST
533 static struct value val_prepare(struct type *type)
538 return type->prepare(type);
543 static struct value val_init(struct type *type)
548 return type->init(type);
553 static struct value dup_value(struct value v)
556 return v.type->dup(v);
560 static int value_cmp(struct value left, struct value right)
562 if (left.type && left.type->cmp_order)
563 return left.type->cmp_order(left, right);
564 if (left.type && left.type->cmp_eq)
565 return left.type->cmp_eq(left, right);
569 static void print_value(struct value v)
571 if (v.type && v.type->print)
574 printf("*Unknown*"); // NOTEST
577 static struct value parse_value(struct type *type, char *arg)
581 if (type && type->parse)
582 return type->parse(type, arg);
583 rv.type = NULL; // NOTEST
589 static void free_value(struct value v);
590 static int type_compat(struct type *require, struct type *have, int rules);
591 static void type_print(struct type *type, FILE *f);
592 static struct value val_init(struct type *type);
593 static struct value dup_value(struct value v);
594 static int value_cmp(struct value left, struct value right);
595 static void print_value(struct value v);
596 static struct value parse_value(struct type *type, char *arg);
598 ###### free context types
600 while (context.typelist) {
601 struct type *t = context.typelist;
603 context.typelist = t->next;
611 Values of the base types can be numbers, which we represent as
612 multi-precision fractions, strings, Booleans and labels. When
613 analysing the program we also need to allow for places where no value
614 is meaningful (type `Tnone`) and where we don't know what type to
615 expect yet (type is `NULL`).
617 Values are never shared, they are always copied when used, and freed
618 when no longer needed.
620 When propagating type information around the program, we need to
621 determine if two types are compatible, where type `NULL` is compatible
622 with anything. There are two special cases with type compatibility,
623 both related to the Conditional Statement which will be described
624 later. In some cases a Boolean can be accepted as well as some other
625 primary type, and in others any type is acceptable except a label (`Vlabel`).
626 A separate function encode these cases will simplify some code later.
628 When assigning command line arguments to variables, we need to be able
629 to parse each type from a string.
637 myLDLIBS := libnumber.o libstring.o -lgmp
638 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
640 ###### type union fields
641 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
643 ###### value union fields
650 static void _free_value(struct value v)
652 switch (v.type->vtype) {
654 case Vstr: free(v.str.txt); break;
655 case Vnum: mpq_clear(v.num); break;
661 ###### value functions
663 static struct value _val_prepare(struct type *type)
668 switch(type->vtype) {
672 memset(&rv.num, 0, sizeof(rv.num));
688 static struct value _val_init(struct type *type)
693 switch(type->vtype) {
694 case Vnone: // NOTEST
697 mpq_init(rv.num); break;
699 rv.str.txt = malloc(1);
705 case Vlabel: // NOTEST
706 rv.label = NULL; // NOTEST
712 static struct value _dup_value(struct value v)
716 switch (rv.type->vtype) {
717 case Vnone: // NOTEST
727 mpq_set(rv.num, v.num);
730 rv.str.len = v.str.len;
731 rv.str.txt = malloc(rv.str.len);
732 memcpy(rv.str.txt, v.str.txt, v.str.len);
738 static int _value_cmp(struct value left, struct value right)
741 if (left.type != right.type)
742 return left.type - right.type; // NOTEST
743 switch (left.type->vtype) {
744 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
745 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
746 case Vstr: cmp = text_cmp(left.str, right.str); break;
747 case Vbool: cmp = left.bool - right.bool; break;
748 case Vnone: cmp = 0; // NOTEST
753 static void _print_value(struct value v)
755 switch (v.type->vtype) {
756 case Vnone: // NOTEST
757 printf("*no-value*"); break; // NOTEST
758 case Vlabel: // NOTEST
759 printf("*label-%p*", v.label); break; // NOTEST
761 printf("%.*s", v.str.len, v.str.txt); break;
763 printf("%s", v.bool ? "True":"False"); break;
768 mpf_set_q(fl, v.num);
769 gmp_printf("%Fg", fl);
776 static struct value _parse_value(struct type *type, char *arg)
784 switch(type->vtype) {
785 case Vlabel: // NOTEST
786 case Vnone: // NOTEST
787 val.type = NULL; // NOTEST
790 val.str.len = strlen(arg);
791 val.str.txt = malloc(val.str.len);
792 memcpy(val.str.txt, arg, val.str.len);
799 tx.txt = arg; tx.len = strlen(tx.txt);
800 if (number_parse(val.num, tail, tx) == 0)
803 mpq_neg(val.num, val.num);
805 printf("Unsupported suffix: %s\n", arg);
810 if (strcasecmp(arg, "true") == 0 ||
811 strcmp(arg, "1") == 0)
813 else if (strcasecmp(arg, "false") == 0 ||
814 strcmp(arg, "0") == 0)
817 printf("Bad bool: %s\n", arg);
825 static void _free_value(struct value v);
827 static struct type base_prototype = {
829 .prepare = _val_prepare,
830 .parse = _parse_value,
831 .print = _print_value,
832 .cmp_order = _value_cmp,
833 .cmp_eq = _value_cmp,
838 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
841 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
843 struct text txt = { n, strlen(n) };
846 t = add_type(c, txt, &base_prototype);
851 ###### context initialization
853 Tbool = add_base_type(&context, "Boolean", Vbool);
854 Tstr = add_base_type(&context, "string", Vstr);
855 Tnum = add_base_type(&context, "number", Vnum);
856 Tnone = add_base_type(&context, "none", Vnone);
857 Tlabel = add_base_type(&context, "label", Vlabel);
861 Variables are scoped named values. We store the names in a linked
862 list of "bindings" sorted lexically, and use sequential search and
869 struct binding *next; // in lexical order
873 This linked list is stored in the parse context so that "reduce"
874 functions can find or add variables, and so the analysis phase can
875 ensure that every variable gets a type.
879 struct binding *varlist; // In lexical order
883 static struct binding *find_binding(struct parse_context *c, struct text s)
885 struct binding **l = &c->varlist;
890 (cmp = text_cmp((*l)->name, s)) < 0)
894 n = calloc(1, sizeof(*n));
901 Each name can be linked to multiple variables defined in different
902 scopes. Each scope starts where the name is declared and continues
903 until the end of the containing code block. Scopes of a given name
904 cannot nest, so a declaration while a name is in-scope is an error.
906 ###### binding fields
907 struct variable *var;
911 struct variable *previous;
913 struct binding *name;
914 struct exec *where_decl;// where name was declared
915 struct exec *where_set; // where type was set
919 While the naming seems strange, we include local constants in the
920 definition of variables. A name declared `var := value` can
921 subsequently be changed, but a name declared `var ::= value` cannot -
924 ###### variable fields
927 Scopes in parallel branches can be partially merged. More
928 specifically, if a given name is declared in both branches of an
929 if/else then its scope is a candidate for merging. Similarly if
930 every branch of an exhaustive switch (e.g. has an "else" clause)
931 declares a given name, then the scopes from the branches are
932 candidates for merging.
934 Note that names declared inside a loop (which is only parallel to
935 itself) are never visible after the loop. Similarly names defined in
936 scopes which are not parallel, such as those started by `for` and
937 `switch`, are never visible after the scope. Only variables defined in
938 both `then` and `else` (including the implicit then after an `if`, and
939 excluding `then` used with `for`) and in all `case`s and `else` of a
940 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
942 Labels, which are a bit like variables, follow different rules.
943 Labels are not explicitly declared, but if an undeclared name appears
944 in a context where a label is legal, that effectively declares the
945 name as a label. The declaration remains in force (or in scope) at
946 least to the end of the immediately containing block and conditionally
947 in any larger containing block which does not declare the name in some
948 other way. Importantly, the conditional scope extension happens even
949 if the label is only used in one parallel branch of a conditional --
950 when used in one branch it is treated as having been declared in all
953 Merge candidates are tentatively visible beyond the end of the
954 branching statement which creates them. If the name is used, the
955 merge is affirmed and they become a single variable visible at the
956 outer layer. If not - if it is redeclared first - the merge lapses.
958 To track scopes we have an extra stack, implemented as a linked list,
959 which roughly parallels the parse stack and which is used exclusively
960 for scoping. When a new scope is opened, a new frame is pushed and
961 the child-count of the parent frame is incremented. This child-count
962 is used to distinguish between the first of a set of parallel scopes,
963 in which declared variables must not be in scope, and subsequent
964 branches, whether they must already be conditionally scoped.
966 To push a new frame *before* any code in the frame is parsed, we need a
967 grammar reduction. This is most easily achieved with a grammar
968 element which derives the empty string, and creates the new scope when
969 it is recognized. This can be placed, for example, between a keyword
970 like "if" and the code following it.
974 struct scope *parent;
980 struct scope *scope_stack;
983 static void scope_pop(struct parse_context *c)
985 struct scope *s = c->scope_stack;
987 c->scope_stack = s->parent;
992 static void scope_push(struct parse_context *c)
994 struct scope *s = calloc(1, sizeof(*s));
996 c->scope_stack->child_count += 1;
997 s->parent = c->scope_stack;
1005 OpenScope -> ${ scope_push(config2context(config)); }$
1008 Each variable records a scope depth and is in one of four states:
1010 - "in scope". This is the case between the declaration of the
1011 variable and the end of the containing block, and also between
1012 the usage with affirms a merge and the end of that block.
1014 The scope depth is not greater than the current parse context scope
1015 nest depth. When the block of that depth closes, the state will
1016 change. To achieve this, all "in scope" variables are linked
1017 together as a stack in nesting order.
1019 - "pending". The "in scope" block has closed, but other parallel
1020 scopes are still being processed. So far, every parallel block at
1021 the same level that has closed has declared the name.
1023 The scope depth is the depth of the last parallel block that
1024 enclosed the declaration, and that has closed.
1026 - "conditionally in scope". The "in scope" block and all parallel
1027 scopes have closed, and no further mention of the name has been
1028 seen. This state includes a secondary nest depth which records the
1029 outermost scope seen since the variable became conditionally in
1030 scope. If a use of the name is found, the variable becomes "in
1031 scope" and that secondary depth becomes the recorded scope depth.
1032 If the name is declared as a new variable, the old variable becomes
1033 "out of scope" and the recorded scope depth stays unchanged.
1035 - "out of scope". The variable is neither in scope nor conditionally
1036 in scope. It is permanently out of scope now and can be removed from
1037 the "in scope" stack.
1040 ###### variable fields
1041 int depth, min_depth;
1042 enum { OutScope, PendingScope, CondScope, InScope } scope;
1043 struct variable *in_scope;
1045 ###### parse context
1047 struct variable *in_scope;
1049 All variables with the same name are linked together using the
1050 'previous' link. Those variable that have
1051 been affirmatively merged all have a 'merged' pointer that points to
1052 one primary variable - the most recently declared instance. When
1053 merging variables, we need to also adjust the 'merged' pointer on any
1054 other variables that had previously been merged with the one that will
1055 no longer be primary.
1057 ###### variable fields
1058 struct variable *merged;
1060 ###### ast functions
1062 static void variable_merge(struct variable *primary, struct variable *secondary)
1066 if (primary->merged)
1068 primary = primary->merged;
1070 for (v = primary->previous; v; v=v->previous)
1071 if (v == secondary || v == secondary->merged ||
1072 v->merged == secondary ||
1073 (v->merged && v->merged == secondary->merged)) {
1074 v->scope = OutScope;
1075 v->merged = primary;
1079 ###### free context vars
1081 while (context.varlist) {
1082 struct binding *b = context.varlist;
1083 struct variable *v = b->var;
1084 context.varlist = b->next;
1087 struct variable *t = v;
1091 if (t->min_depth == 0)
1092 free_exec(t->where_decl);
1097 #### Manipulating Bindings
1099 When a name is conditionally visible, a new declaration discards the
1100 old binding - the condition lapses. Conversely a usage of the name
1101 affirms the visibility and extends it to the end of the containing
1102 block - i.e. the block that contains both the original declaration and
1103 the latest usage. This is determined from `min_depth`. When a
1104 conditionally visible variable gets affirmed like this, it is also
1105 merged with other conditionally visible variables with the same name.
1107 When we parse a variable declaration we either signal an error if the
1108 name is currently bound, or create a new variable at the current nest
1109 depth if the name is unbound or bound to a conditionally scoped or
1110 pending-scope variable. If the previous variable was conditionally
1111 scoped, it and its homonyms becomes out-of-scope.
1113 When we parse a variable reference (including non-declarative
1114 assignment) we signal an error if the name is not bound or is bound to
1115 a pending-scope variable; update the scope if the name is bound to a
1116 conditionally scoped variable; or just proceed normally if the named
1117 variable is in scope.
1119 When we exit a scope, any variables bound at this level are either
1120 marked out of scope or pending-scoped, depending on whether the
1121 scope was sequential or parallel.
1123 When exiting a parallel scope we check if there are any variables that
1124 were previously pending and are still visible. If there are, then
1125 there weren't redeclared in the most recent scope, so they cannot be
1126 merged and must become out-of-scope. If it is not the first of
1127 parallel scopes (based on `child_count`), we check that there was a
1128 previous binding that is still pending-scope. If there isn't, the new
1129 variable must now be out-of-scope.
1131 When exiting a sequential scope that immediately enclosed parallel
1132 scopes, we need to resolve any pending-scope variables. If there was
1133 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1134 we need to mark all pending-scope variable as out-of-scope. Otherwise
1135 all pending-scope variables become conditionally scoped.
1138 enum closetype { CloseSequential, CloseParallel, CloseElse };
1140 ###### ast functions
1142 static struct variable *var_decl(struct parse_context *c, struct text s)
1144 struct binding *b = find_binding(c, s);
1145 struct variable *v = b->var;
1147 switch (v ? v->scope : OutScope) {
1149 /* Caller will report the error */
1153 v && v->scope == CondScope;
1155 v->scope = OutScope;
1159 v = calloc(1, sizeof(*v));
1160 v->previous = b->var;
1163 v->min_depth = v->depth = c->scope_depth;
1165 v->in_scope = c->in_scope;
1167 v->val = val_prepare(NULL);
1171 static struct variable *var_ref(struct parse_context *c, struct text s)
1173 struct binding *b = find_binding(c, s);
1174 struct variable *v = b->var;
1175 struct variable *v2;
1177 switch (v ? v->scope : OutScope) {
1180 /* Caller will report the error */
1183 /* All CondScope variables of this name need to be merged
1184 * and become InScope
1186 v->depth = v->min_depth;
1188 for (v2 = v->previous;
1189 v2 && v2->scope == CondScope;
1191 variable_merge(v, v2);
1199 static void var_block_close(struct parse_context *c, enum closetype ct)
1201 /* Close off all variables that are in_scope */
1202 struct variable *v, **vp, *v2;
1205 for (vp = &c->in_scope;
1206 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1208 if (v->name->var == v) switch (ct) {
1210 case CloseParallel: /* handle PendingScope */
1214 if (c->scope_stack->child_count == 1)
1215 v->scope = PendingScope;
1216 else if (v->previous &&
1217 v->previous->scope == PendingScope)
1218 v->scope = PendingScope;
1219 else if (v->val.type == Tlabel)
1220 v->scope = PendingScope;
1221 else if (v->name->var == v)
1222 v->scope = OutScope;
1223 if (ct == CloseElse) {
1224 /* All Pending variables with this name
1225 * are now Conditional */
1227 v2 && v2->scope == PendingScope;
1229 v2->scope = CondScope;
1234 v2 && v2->scope == PendingScope;
1236 if (v2->val.type != Tlabel)
1237 v2->scope = OutScope;
1239 case OutScope: break;
1242 case CloseSequential:
1243 if (v->val.type == Tlabel)
1244 v->scope = PendingScope;
1247 v->scope = OutScope;
1250 /* There was no 'else', so we can only become
1251 * conditional if we know the cases were exhaustive,
1252 * and that doesn't mean anything yet.
1253 * So only labels become conditional..
1256 v2 && v2->scope == PendingScope;
1258 if (v2->val.type == Tlabel) {
1259 v2->scope = CondScope;
1260 v2->min_depth = c->scope_depth;
1262 v2->scope = OutScope;
1265 case OutScope: break;
1269 if (v->scope == OutScope || v->name->var != v)
1278 Executables can be lots of different things. In many cases an
1279 executable is just an operation combined with one or two other
1280 executables. This allows for expressions and lists etc. Other times
1281 an executable is something quite specific like a constant or variable
1282 name. So we define a `struct exec` to be a general executable with a
1283 type, and a `struct binode` which is a subclass of `exec`, forms a
1284 node in a binary tree, and holds an operation. There will be other
1285 subclasses, and to access these we need to be able to `cast` the
1286 `exec` into the various other types.
1289 #define cast(structname, pointer) ({ \
1290 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1291 if (__mptr && *__mptr != X##structname) abort(); \
1292 (struct structname *)( (char *)__mptr);})
1294 #define new(structname) ({ \
1295 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1296 __ptr->type = X##structname; \
1297 __ptr->line = -1; __ptr->column = -1; \
1300 #define new_pos(structname, token) ({ \
1301 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1302 __ptr->type = X##structname; \
1303 __ptr->line = token.line; __ptr->column = token.col; \
1312 enum exec_types type;
1320 struct exec *left, *right;
1323 ###### ast functions
1325 static int __fput_loc(struct exec *loc, FILE *f)
1329 if (loc->line >= 0) {
1330 fprintf(f, "%d:%d: ", loc->line, loc->column);
1333 if (loc->type == Xbinode)
1334 return __fput_loc(cast(binode,loc)->left, f) ||
1335 __fput_loc(cast(binode,loc)->right, f);
1338 static void fput_loc(struct exec *loc, FILE *f)
1340 if (!__fput_loc(loc, f))
1341 fprintf(f, "??:??: "); // NOTEST
1344 Each different type of `exec` node needs a number of functions
1345 defined, a bit like methods. We must be able to be able to free it,
1346 print it, analyse it and execute it. Once we have specific `exec`
1347 types we will need to parse them too. Let's take this a bit more
1352 The parser generator requires a `free_foo` function for each struct
1353 that stores attributes and they will be `exec`s and subtypes there-of.
1354 So we need `free_exec` which can handle all the subtypes, and we need
1357 ###### ast functions
1359 static void free_binode(struct binode *b)
1364 free_exec(b->right);
1368 ###### core functions
1369 static void free_exec(struct exec *e)
1378 ###### forward decls
1380 static void free_exec(struct exec *e);
1382 ###### free exec cases
1383 case Xbinode: free_binode(cast(binode, e)); break;
1387 Printing an `exec` requires that we know the current indent level for
1388 printing line-oriented components. As will become clear later, we
1389 also want to know what sort of bracketing to use.
1391 ###### ast functions
1393 static void do_indent(int i, char *str)
1400 ###### core functions
1401 static void print_binode(struct binode *b, int indent, int bracket)
1405 ## print binode cases
1409 static void print_exec(struct exec *e, int indent, int bracket)
1415 print_binode(cast(binode, e), indent, bracket); break;
1420 ###### forward decls
1422 static void print_exec(struct exec *e, int indent, int bracket);
1426 As discussed, analysis involves propagating type requirements around
1427 the program and looking for errors.
1429 So `propagate_types` is passed an expected type (being a `struct type`
1430 pointer together with some `val_rules` flags) that the `exec` is
1431 expected to return, and returns the type that it does return, either
1432 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1433 by reference. It is set to `0` when an error is found, and `2` when
1434 any change is made. If it remains unchanged at `1`, then no more
1435 propagation is needed.
1439 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1443 if (rules & Rnolabel)
1444 fputs(" (labels not permitted)", stderr);
1447 ###### core functions
1449 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1450 struct type *type, int rules)
1457 switch (prog->type) {
1460 struct binode *b = cast(binode, prog);
1462 ## propagate binode cases
1466 ## propagate exec cases
1473 Interpreting an `exec` doesn't require anything but the `exec`. State
1474 is stored in variables and each variable will be directly linked from
1475 within the `exec` tree. The exception to this is the whole `program`
1476 which needs to look at command line arguments. The `program` will be
1477 interpreted separately.
1479 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1481 ###### core functions
1484 struct value val, *lval;
1487 static struct lrval _interp_exec(struct exec *e);
1489 static struct value interp_exec(struct exec *e)
1491 struct lrval ret = _interp_exec(e);
1494 return dup_value(*ret.lval);
1499 static struct value *linterp_exec(struct exec *e)
1501 struct lrval ret = _interp_exec(e);
1506 static struct lrval _interp_exec(struct exec *e)
1509 struct value rv, *lrv = NULL;
1520 struct binode *b = cast(binode, e);
1521 struct value left, right, *lleft;
1522 left.type = right.type = Tnone;
1524 ## interp binode cases
1526 free_value(left); free_value(right);
1529 ## interp exec cases
1538 Now that we have the shape of the interpreter in place we can add some
1539 complex types and connected them in to the data structures and the
1540 different phases of parse, analyse, print, interpret.
1542 Thus far we have arrays and structs.
1546 Arrays can be declared by giving a size and a type, as `[size]type' so
1547 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1548 size can be an arbitrary expression which is evaluated when the name
1551 Arrays cannot be assigned. When pointers are introduced we will also
1552 introduce array slices which can refer to part or all of an array -
1553 the assignment syntax will create a slice. For now, an array can only
1554 ever be referenced by the name it is declared with. It is likely that
1555 a "`copy`" primitive will eventually be define which can be used to
1556 make a copy of an array with controllable depth.
1558 ###### type union fields
1562 struct variable *vsize;
1563 struct type *member;
1566 ###### value union fields
1568 struct value *elmnts;
1571 ###### value functions
1573 static struct value array_prepare(struct type *type)
1578 ret.array.elmnts = NULL;
1582 static struct value array_init(struct type *type)
1588 if (type->array.vsize) {
1591 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1592 mpq_denref(type->array.vsize->val.num));
1593 type->array.size = mpz_get_si(q);
1596 ret.array.elmnts = calloc(type->array.size,
1597 sizeof(ret.array.elmnts[0]));
1598 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1599 ret.array.elmnts[i] = val_init(type->array.member);
1603 static void array_free(struct value val)
1607 if (val.array.elmnts)
1608 for (i = 0; i < val.type->array.size; i++)
1609 free_value(val.array.elmnts[i]);
1610 free(val.array.elmnts);
1613 static int array_compat(struct type *require, struct type *have)
1615 if (have->compat != require->compat)
1617 /* Both are arrays, so we can look at details */
1618 if (!type_compat(require->array.member, have->array.member, 0))
1620 if (require->array.vsize == NULL && have->array.vsize == NULL)
1621 return require->array.size == have->array.size;
1623 return require->array.vsize == have->array.vsize;
1626 static void array_print_type(struct type *type, FILE *f)
1629 if (type->array.vsize) {
1630 struct binding *b = type->array.vsize->name;
1631 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1633 fprintf(f, "%d]", type->array.size);
1634 type_print(type->array.member, f);
1637 static struct type array_prototype = {
1638 .prepare = array_prepare,
1640 .print_type = array_print_type,
1641 .compat = array_compat,
1647 | [ NUMBER ] Type ${
1648 $0 = calloc(1, sizeof(struct type));
1649 *($0) = array_prototype;
1650 $0->array.member = $<4;
1651 $0->array.vsize = NULL;
1653 struct parse_context *c = config2context(config);
1656 if (number_parse(num, tail, $2.txt) == 0)
1657 tok_err(c, "error: unrecognised number", &$2);
1659 tok_err(c, "error: unsupported number suffix", &$2);
1661 $0->array.size = mpz_get_ui(mpq_numref(num));
1662 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1663 tok_err(c, "error: array size must be an integer",
1665 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1666 tok_err(c, "error: array size is too large",
1670 $0->next= c->anon_typelist;
1671 c->anon_typelist = $0;
1675 | [ IDENTIFIER ] Type ${ {
1676 struct parse_context *c = config2context(config);
1677 struct variable *v = var_ref(c, $2.txt);
1680 tok_err(config2context(config), "error: name undeclared", &$2);
1681 else if (!v->constant)
1682 tok_err(config2context(config), "error: array size must be a constant", &$2);
1684 $0 = calloc(1, sizeof(struct type));
1685 *($0) = array_prototype;
1686 $0->array.member = $<4;
1688 $0->array.vsize = v;
1689 $0->next= c->anon_typelist;
1690 c->anon_typelist = $0;
1693 ###### parse context
1695 struct type *anon_typelist;
1697 ###### free context types
1699 while (context.anon_typelist) {
1700 struct type *t = context.anon_typelist;
1702 context.anon_typelist = t->next;
1709 ###### variable grammar
1711 | Variable [ Expression ] ${ {
1712 struct binode *b = new(binode);
1719 ###### print binode cases
1721 print_exec(b->left, -1, 0);
1723 print_exec(b->right, -1, 0);
1727 ###### propagate binode cases
1729 /* left must be an array, right must be a number,
1730 * result is the member type of the array
1732 propagate_types(b->right, c, ok, Tnum, 0);
1733 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1734 if (!t || t->compat != array_compat) {
1735 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1739 if (!type_compat(type, t->array.member, rules)) {
1740 type_err(c, "error: have %1 but need %2", prog,
1741 t->array.member, rules, type);
1744 return t->array.member;
1748 ###### interp binode cases
1753 lleft = linterp_exec(b->left);
1754 right = interp_exec(b->right);
1756 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1760 if (i >= 0 && i < lleft->type->array.size)
1761 lrv = &lleft->array.elmnts[i];
1763 rv = val_init(lleft->type->array.member);
1769 A `struct` is a data-type that contains one or more other data-types.
1770 It differs from an array in that each member can be of a different
1771 type, and they are accessed by name rather than by number. Thus you
1772 cannot choose and element by calculation, you need to know what you
1775 The language makes no promises about how a give structure will be
1776 stored in memory - it is free to rearrange fields to suit whatever
1777 criteria seems important.
1779 Structs are declared separately from program code - they cannot be
1780 declared in-line in a variable declaration like arrays can. A struct
1781 is given a name and this name is used to identify the type - the name
1782 is not prefixed by the word `struct` as it would be in C.
1784 Each component datum is identified much like a variable is declared,
1785 with a name, one or to colons, and a type. The type cannot be omitted
1786 as there is no opportunity to deduce the type from usage. An initial
1787 value can be given following an equals sign, so
1789 Structs are only treated as the same if they have the same name.
1790 Simply having the same fields in the same order is not enough. This
1791 might change once we can create structure initializes from a list of
1794 ##### Example: a struct type
1800 would declare a type called "complex" which has two number fields,
1801 each initialised to zero.
1803 ###### type union fields
1814 ###### value union fields
1816 struct value *fields;
1819 ###### value functions
1821 static struct value structure_prepare(struct type *type)
1826 ret.structure.fields = NULL;
1830 static struct value structure_init(struct type *type)
1836 ret.structure.fields = calloc(type->structure.nfields,
1837 sizeof(ret.structure.fields[0]));
1838 for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
1839 ret.structure.fields[i] = val_init(type->structure.fields[i].type);
1843 static void structure_free(struct value val)
1847 if (val.structure.fields)
1848 for (i = 0; i < val.type->structure.nfields; i++)
1849 free_value(val.structure.fields[i]);
1850 free(val.structure.fields);
1853 static void structure_free_type(struct type *t)
1856 for (i = 0; i < t->structure.nfields; i++)
1857 free_value(t->structure.fields[i].init);
1858 free(t->structure.fields);
1861 static struct type structure_prototype = {
1862 .prepare = structure_prepare,
1863 .init = structure_init,
1864 .free = structure_free,
1865 .free_type = structure_free_type,
1866 .print_type_decl = structure_print_type,
1880 ###### free exec cases
1882 free_exec(cast(fieldref, e)->left);
1886 ###### variable grammar
1888 | Variable . IDENTIFIER ${ {
1889 struct fieldref *fr = new_pos(fieldref, $2);
1896 ###### print exec cases
1900 struct fieldref *f = cast(fieldref, e);
1901 print_exec(f->left, -1, 0);
1902 printf(".%.*s", f->name.len, f->name.txt);
1906 ###### ast functions
1907 static int find_struct_index(struct type *type, struct text field)
1910 for (i = 0; i < type->structure.nfields; i++)
1911 if (text_cmp(type->structure.fields[i].name, field) == 0)
1916 ###### propagate exec cases
1920 struct fieldref *f = cast(fieldref, prog);
1921 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1924 type_err(c, "error: unknown type for field access", f->left,
1926 else if (st->prepare != structure_prepare)
1927 type_err(c, "error: field reference attempted on %1, not a struct",
1928 f->left, st, 0, NULL);
1929 else if (f->index == -2) {
1930 f->index = find_struct_index(st, f->name);
1932 type_err(c, "error: cannot find requested field in %1",
1933 f->left, st, 0, NULL);
1937 if (f->index >= 0) {
1938 struct type *ft = st->structure.fields[f->index].type;
1939 if (!type_compat(type, ft, rules)) {
1940 type_err(c, "error: have %1 but need %2", prog,
1949 ###### interp exec cases
1952 struct fieldref *f = cast(fieldref, e);
1953 struct value *lleft = linterp_exec(f->left);
1954 lrv = &lleft->structure.fields[f->index];
1960 struct fieldlist *prev;
1964 ###### ast functions
1965 static void free_fieldlist(struct fieldlist *f)
1969 free_fieldlist(f->prev);
1970 free_value(f->f.init);
1974 ###### top level grammar
1975 DeclareStruct -> struct IDENTIFIER FieldBlock ${ {
1977 add_type(config2context(config), $2.txt, &structure_prototype);
1979 struct fieldlist *f;
1981 for (f = $3; f; f=f->prev)
1984 t->structure.nfields = cnt;
1985 t->structure.fields = calloc(cnt, sizeof(struct field));
1989 t->structure.fields[cnt] = f->f;
1990 f->f.init = val_prepare(Tnone);
1996 FieldBlock -> Open SimpleFieldList Close ${ $0 = $<2; }$
1997 | Open Newlines SimpleFieldList Close ${ $0 = $<3; }$
1998 | : FieldList ${ $0 = $<2; }$
2000 FieldList -> Field NEWLINE ${ $0 = $<1; }$
2001 | FieldList NEWLINE ${ $0 = $<1; }$
2002 | FieldList Field NEWLINE ${
2007 SimpleFieldList -> Field ; ${ $0 = $<1; }$
2008 | SimpleFieldList Field ; ${
2013 Field -> IDENTIFIER : Type = Expression ${ {
2016 $0 = calloc(1, sizeof(struct fieldlist));
2017 $0->f.name = $1.txt;
2019 $0->f.init = val_prepare($0->f.type);
2022 propagate_types($<5, config2context(config), &ok, $3, 0);
2025 config2context(config)->parse_error = 1;
2027 $0->f.init = interp_exec($5);
2029 | IDENTIFIER : Type ${
2030 $0 = calloc(1, sizeof(struct fieldlist));
2031 $0->f.name = $1.txt;
2033 $0->f.init = val_init($3);
2036 ###### forward decls
2037 static void structure_print_type(struct type *t, FILE *f);
2039 ###### value functions
2040 static void structure_print_type(struct type *t, FILE *f)
2044 fprintf(f, "struct %.*s:\n", t->name.len, t->name.txt);
2046 for (i = 0; i < t->structure.nfields; i++) {
2047 struct field *fl = t->structure.fields + i;
2048 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2049 type_print(fl->type, f);
2050 if (fl->init.type->print) {
2052 if (fl->init.type == Tstr)
2054 print_value(fl->init);
2055 if (fl->init.type == Tstr)
2067 while (target != 0) {
2069 for (t = context.typelist; t ; t=t->next)
2070 if (t->print_type_decl) {
2079 t->print_type_decl(t, stdout);
2085 ## Language elements
2087 Each language element needs to be parsed, printed, analysed,
2088 interpreted, and freed. There are several, so let's just start with
2089 the easy ones and work our way up.
2093 We have already met values as separate objects. When manifest
2094 constants appear in the program text, that must result in an executable
2095 which has a constant value. So the `val` structure embeds a value in
2111 $0 = new_pos(val, $1);
2112 $0->val.type = Tbool;
2116 $0 = new_pos(val, $1);
2117 $0->val.type = Tbool;
2121 $0 = new_pos(val, $1);
2122 $0->val.type = Tnum;
2125 if (number_parse($0->val.num, tail, $1.txt) == 0)
2126 mpq_init($0->val.num);
2128 tok_err(config2context(config), "error: unsupported number suffix",
2133 $0 = new_pos(val, $1);
2134 $0->val.type = Tstr;
2137 string_parse(&$1, '\\', &$0->val.str, tail);
2139 tok_err(config2context(config), "error: unsupported string suffix",
2144 $0 = new_pos(val, $1);
2145 $0->val.type = Tstr;
2148 string_parse(&$1, '\\', &$0->val.str, tail);
2150 tok_err(config2context(config), "error: unsupported string suffix",
2155 ###### print exec cases
2158 struct val *v = cast(val, e);
2159 if (v->val.type == Tstr)
2161 print_value(v->val);
2162 if (v->val.type == Tstr)
2167 ###### propagate exec cases
2170 struct val *val = cast(val, prog);
2171 if (!type_compat(type, val->val.type, rules)) {
2172 type_err(c, "error: expected %1%r found %2",
2173 prog, type, rules, val->val.type);
2176 return val->val.type;
2179 ###### interp exec cases
2181 rv = dup_value(cast(val, e)->val);
2184 ###### ast functions
2185 static void free_val(struct val *v)
2193 ###### free exec cases
2194 case Xval: free_val(cast(val, e)); break;
2196 ###### ast functions
2197 // Move all nodes from 'b' to 'rv', reversing the order.
2198 // In 'b' 'left' is a list, and 'right' is the last node.
2199 // In 'rv', left' is the first node and 'right' is a list.
2200 static struct binode *reorder_bilist(struct binode *b)
2202 struct binode *rv = NULL;
2205 struct exec *t = b->right;
2209 b = cast(binode, b->left);
2219 Just as we used a `val` to wrap a value into an `exec`, we similarly
2220 need a `var` to wrap a `variable` into an exec. While each `val`
2221 contained a copy of the value, each `var` hold a link to the variable
2222 because it really is the same variable no matter where it appears.
2223 When a variable is used, we need to remember to follow the `->merged`
2224 link to find the primary instance.
2232 struct variable *var;
2238 VariableDecl -> IDENTIFIER : ${ {
2239 struct variable *v = var_decl(config2context(config), $1.txt);
2240 $0 = new_pos(var, $1);
2245 v = var_ref(config2context(config), $1.txt);
2247 type_err(config2context(config), "error: variable '%v' redeclared",
2248 $0, Tnone, 0, Tnone);
2249 type_err(config2context(config), "info: this is where '%v' was first declared",
2250 v->where_decl, Tnone, 0, Tnone);
2253 | IDENTIFIER :: ${ {
2254 struct variable *v = var_decl(config2context(config), $1.txt);
2255 $0 = new_pos(var, $1);
2261 v = var_ref(config2context(config), $1.txt);
2263 type_err(config2context(config), "error: variable '%v' redeclared",
2264 $0, Tnone, 0, Tnone);
2265 type_err(config2context(config), "info: this is where '%v' was first declared",
2266 v->where_decl, Tnone, 0, Tnone);
2269 | IDENTIFIER : Type ${ {
2270 struct variable *v = var_decl(config2context(config), $1.txt);
2271 $0 = new_pos(var, $1);
2276 v->val = val_prepare($<3);
2278 v = var_ref(config2context(config), $1.txt);
2280 type_err(config2context(config), "error: variable '%v' redeclared",
2281 $0, Tnone, 0, Tnone);
2282 type_err(config2context(config), "info: this is where '%v' was first declared",
2283 v->where_decl, Tnone, 0, Tnone);
2286 | IDENTIFIER :: Type ${ {
2287 struct variable *v = var_decl(config2context(config), $1.txt);
2288 $0 = new_pos(var, $1);
2293 v->val = val_prepare($<3);
2296 v = var_ref(config2context(config), $1.txt);
2298 type_err(config2context(config), "error: variable '%v' redeclared",
2299 $0, Tnone, 0, Tnone);
2300 type_err(config2context(config), "info: this is where '%v' was first declared",
2301 v->where_decl, Tnone, 0, Tnone);
2306 Variable -> IDENTIFIER ${ {
2307 struct variable *v = var_ref(config2context(config), $1.txt);
2308 $0 = new_pos(var, $1);
2310 /* This might be a label - allocate a var just in case */
2311 v = var_decl(config2context(config), $1.txt);
2313 v->val = val_prepare(Tlabel);
2314 v->val.label = &v->val;
2318 cast(var, $0)->var = v;
2323 Type -> IDENTIFIER ${
2324 $0 = find_type(config2context(config), $1.txt);
2326 tok_err(config2context(config),
2327 "error: undefined type", &$1);
2334 ###### print exec cases
2337 struct var *v = cast(var, e);
2339 struct binding *b = v->var->name;
2340 printf("%.*s", b->name.len, b->name.txt);
2347 if (loc->type == Xvar) {
2348 struct var *v = cast(var, loc);
2350 struct binding *b = v->var->name;
2351 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2353 fputs("???", stderr); // NOTEST
2355 fputs("NOTVAR", stderr); // NOTEST
2358 ###### propagate exec cases
2362 struct var *var = cast(var, prog);
2363 struct variable *v = var->var;
2365 type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone); // NOTEST
2367 return Tnone; // NOTEST
2371 if (v->constant && (rules & Rnoconstant)) {
2372 type_err(c, "error: Cannot assign to a constant: %v",
2373 prog, NULL, 0, NULL);
2374 type_err(c, "info: name was defined as a constant here",
2375 v->where_decl, NULL, 0, NULL);
2379 if (v->val.type == NULL) {
2380 if (type && *ok != 0) {
2381 v->val = val_prepare(type);
2382 v->where_set = prog;
2387 if (!type_compat(type, v->val.type, rules)) {
2388 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2389 type, rules, v->val.type);
2390 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2391 v->val.type, rules, Tnone);
2399 ###### interp exec cases
2402 struct var *var = cast(var, e);
2403 struct variable *v = var->var;
2411 ###### ast functions
2413 static void free_var(struct var *v)
2418 ###### free exec cases
2419 case Xvar: free_var(cast(var, e)); break;
2421 ### Expressions: Conditional
2423 Our first user of the `binode` will be conditional expressions, which
2424 is a bit odd as they actually have three components. That will be
2425 handled by having 2 binodes for each expression. The conditional
2426 expression is the lowest precedence operatior, so it gets to define
2427 what an "Expression" is. The next level up is "BoolExpr", which
2430 Conditional expressions are of the form "value `if` condition `else`
2431 other_value". There is no associativite with this operator: the
2432 values and conditions can only be other conditional expressions if
2433 they are enclosed in parentheses. Allowing nesting without
2434 parentheses would be too confusing.
2442 Expression -> BoolExpr if BoolExpr else BoolExpr ${ {
2443 struct binode *b1 = new(binode);
2444 struct binode *b2 = new(binode);
2453 | BoolExpr ${ $0 = $<1; }$
2455 ###### print binode cases
2458 b2 = cast(binode, b->right);
2459 print_exec(b2->left, -1, 0);
2461 print_exec(b->left, -1, 0);
2463 print_exec(b2->right, -1, 0);
2466 ###### propagate binode cases
2469 /* cond must be Tbool, others must match */
2470 struct binode *b2 = cast(binode, b->right);
2473 propagate_types(b->left, c, ok, Tbool, 0);
2474 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2475 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2479 ###### interp binode cases
2482 struct binode *b2 = cast(binode, b->right);
2483 left = interp_exec(b->left);
2485 rv = interp_exec(b2->left);
2487 rv = interp_exec(b2->right);
2491 ### Expressions: Boolean
2493 The next class of expressions to use the `binode` will be Boolean
2494 expressions. As I haven't implemented precedence in the parser
2495 generator yet, we need different names for each precedence level used
2496 by expressions. The outer most or lowest level precedence are
2497 conditional expressions are Boolean operators which form an `BoolExpr`
2498 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2499 have `and then` and `or else` which only evaluate the second operand
2500 if the result would make a difference.
2512 BoolExpr -> BoolExpr or BTerm ${ {
2513 struct binode *b = new(binode);
2519 | BoolExpr or else BTerm ${ {
2520 struct binode *b = new(binode);
2526 | BTerm ${ $0 = $<1; }$
2528 BTerm -> BTerm and BFact ${ {
2529 struct binode *b = new(binode);
2535 | BTerm and then BFact ${ {
2536 struct binode *b = new(binode);
2542 | BFact ${ $0 = $<1; }$
2544 BFact -> not BFact ${ {
2545 struct binode *b = new(binode);
2552 ###### print binode cases
2554 print_exec(b->left, -1, 0);
2556 print_exec(b->right, -1, 0);
2559 print_exec(b->left, -1, 0);
2560 printf(" and then ");
2561 print_exec(b->right, -1, 0);
2564 print_exec(b->left, -1, 0);
2566 print_exec(b->right, -1, 0);
2569 print_exec(b->left, -1, 0);
2570 printf(" or else ");
2571 print_exec(b->right, -1, 0);
2575 print_exec(b->right, -1, 0);
2578 ###### propagate binode cases
2584 /* both must be Tbool, result is Tbool */
2585 propagate_types(b->left, c, ok, Tbool, 0);
2586 propagate_types(b->right, c, ok, Tbool, 0);
2587 if (type && type != Tbool) {
2588 type_err(c, "error: %1 operation found where %2 expected", prog,
2594 ###### interp binode cases
2596 rv = interp_exec(b->left);
2597 right = interp_exec(b->right);
2598 rv.bool = rv.bool && right.bool;
2601 rv = interp_exec(b->left);
2603 rv = interp_exec(b->right);
2606 rv = interp_exec(b->left);
2607 right = interp_exec(b->right);
2608 rv.bool = rv.bool || right.bool;
2611 rv = interp_exec(b->left);
2613 rv = interp_exec(b->right);
2616 rv = interp_exec(b->right);
2620 ### Expressions: Comparison
2622 Of slightly higher precedence that Boolean expressions are
2624 A comparison takes arguments of any type, but the two types must be
2627 To simplify the parsing we introduce an `eop` which can record an
2628 expression operator.
2635 ###### ast functions
2636 static void free_eop(struct eop *e)
2651 | Expr CMPop Expr ${ {
2652 struct binode *b = new(binode);
2658 | Expr ${ $0 = $<1; }$
2663 CMPop -> < ${ $0.op = Less; }$
2664 | > ${ $0.op = Gtr; }$
2665 | <= ${ $0.op = LessEq; }$
2666 | >= ${ $0.op = GtrEq; }$
2667 | == ${ $0.op = Eql; }$
2668 | != ${ $0.op = NEql; }$
2670 ###### print binode cases
2678 print_exec(b->left, -1, 0);
2680 case Less: printf(" < "); break;
2681 case LessEq: printf(" <= "); break;
2682 case Gtr: printf(" > "); break;
2683 case GtrEq: printf(" >= "); break;
2684 case Eql: printf(" == "); break;
2685 case NEql: printf(" != "); break;
2686 default: abort(); // NOTEST
2688 print_exec(b->right, -1, 0);
2691 ###### propagate binode cases
2698 /* Both must match but not be labels, result is Tbool */
2699 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2701 propagate_types(b->right, c, ok, t, 0);
2703 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2705 t = propagate_types(b->left, c, ok, t, 0);
2707 if (!type_compat(type, Tbool, 0)) {
2708 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2709 Tbool, rules, type);
2714 ###### interp binode cases
2723 left = interp_exec(b->left);
2724 right = interp_exec(b->right);
2725 cmp = value_cmp(left, right);
2728 case Less: rv.bool = cmp < 0; break;
2729 case LessEq: rv.bool = cmp <= 0; break;
2730 case Gtr: rv.bool = cmp > 0; break;
2731 case GtrEq: rv.bool = cmp >= 0; break;
2732 case Eql: rv.bool = cmp == 0; break;
2733 case NEql: rv.bool = cmp != 0; break;
2734 default: rv.bool = 0; break; // NOTEST
2739 ### Expressions: The rest
2741 The remaining expressions with the highest precedence are arithmetic
2742 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2743 The `Factor` is where the `Value` and `Variable` that we already have
2746 `+` and `-` are both infix and prefix operations (where they are
2747 absolute value and negation). These have different operator names.
2749 We also have a 'Bracket' operator which records where parentheses were
2750 found. This makes it easy to reproduce these when printing. Once
2751 precedence is handled better I might be able to discard this.
2763 Expr -> Expr Eop Term ${ {
2764 struct binode *b = new(binode);
2770 | Term ${ $0 = $<1; }$
2772 Term -> Term Top Factor ${ {
2773 struct binode *b = new(binode);
2779 | Factor ${ $0 = $<1; }$
2781 Factor -> ( Expression ) ${ {
2782 struct binode *b = new_pos(binode, $1);
2788 struct binode *b = new(binode);
2793 | Value ${ $0 = $<1; }$
2794 | Variable ${ $0 = $<1; }$
2797 Eop -> + ${ $0.op = Plus; }$
2798 | - ${ $0.op = Minus; }$
2800 Uop -> + ${ $0.op = Absolute; }$
2801 | - ${ $0.op = Negate; }$
2803 Top -> * ${ $0.op = Times; }$
2804 | / ${ $0.op = Divide; }$
2805 | % ${ $0.op = Rem; }$
2806 | ++ ${ $0.op = Concat; }$
2808 ###### print binode cases
2815 print_exec(b->left, indent, 0);
2817 case Plus: fputs(" + ", stdout); break;
2818 case Minus: fputs(" - ", stdout); break;
2819 case Times: fputs(" * ", stdout); break;
2820 case Divide: fputs(" / ", stdout); break;
2821 case Rem: fputs(" % ", stdout); break;
2822 case Concat: fputs(" ++ ", stdout); break;
2823 default: abort(); // NOTEST
2825 print_exec(b->right, indent, 0);
2829 print_exec(b->right, indent, 0);
2833 print_exec(b->right, indent, 0);
2837 print_exec(b->right, indent, 0);
2841 ###### propagate binode cases
2847 /* both must be numbers, result is Tnum */
2850 /* as propagate_types ignores a NULL,
2851 * unary ops fit here too */
2852 propagate_types(b->left, c, ok, Tnum, 0);
2853 propagate_types(b->right, c, ok, Tnum, 0);
2854 if (!type_compat(type, Tnum, 0)) {
2855 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2862 /* both must be Tstr, result is Tstr */
2863 propagate_types(b->left, c, ok, Tstr, 0);
2864 propagate_types(b->right, c, ok, Tstr, 0);
2865 if (!type_compat(type, Tstr, 0)) {
2866 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2873 return propagate_types(b->right, c, ok, type, 0);
2875 ###### interp binode cases
2878 rv = interp_exec(b->left);
2879 right = interp_exec(b->right);
2880 mpq_add(rv.num, rv.num, right.num);
2883 rv = interp_exec(b->left);
2884 right = interp_exec(b->right);
2885 mpq_sub(rv.num, rv.num, right.num);
2888 rv = interp_exec(b->left);
2889 right = interp_exec(b->right);
2890 mpq_mul(rv.num, rv.num, right.num);
2893 rv = interp_exec(b->left);
2894 right = interp_exec(b->right);
2895 mpq_div(rv.num, rv.num, right.num);
2900 left = interp_exec(b->left);
2901 right = interp_exec(b->right);
2902 mpz_init(l); mpz_init(r); mpz_init(rem);
2903 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2904 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2905 mpz_tdiv_r(rem, l, r);
2906 rv = val_init(Tnum);
2907 mpq_set_z(rv.num, rem);
2908 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2912 rv = interp_exec(b->right);
2913 mpq_neg(rv.num, rv.num);
2916 rv = interp_exec(b->right);
2917 mpq_abs(rv.num, rv.num);
2920 rv = interp_exec(b->right);
2923 left = interp_exec(b->left);
2924 right = interp_exec(b->right);
2926 rv.str = text_join(left.str, right.str);
2930 ###### value functions
2932 static struct text text_join(struct text a, struct text b)
2935 rv.len = a.len + b.len;
2936 rv.txt = malloc(rv.len);
2937 memcpy(rv.txt, a.txt, a.len);
2938 memcpy(rv.txt+a.len, b.txt, b.len);
2943 ### Blocks, Statements, and Statement lists.
2945 Now that we have expressions out of the way we need to turn to
2946 statements. There are simple statements and more complex statements.
2947 Simple statements do not contain newlines, complex statements do.
2949 Statements often come in sequences and we have corresponding simple
2950 statement lists and complex statement lists.
2951 The former comprise only simple statements separated by semicolons.
2952 The later comprise complex statements and simple statement lists. They are
2953 separated by newlines. Thus the semicolon is only used to separate
2954 simple statements on the one line. This may be overly restrictive,
2955 but I'm not sure I ever want a complex statement to share a line with
2958 Note that a simple statement list can still use multiple lines if
2959 subsequent lines are indented, so
2961 ###### Example: wrapped simple statement list
2966 is a single simple statement list. This might allow room for
2967 confusion, so I'm not set on it yet.
2969 A simple statement list needs no extra syntax. A complex statement
2970 list has two syntactic forms. It can be enclosed in braces (much like
2971 C blocks), or it can be introduced by a colon and continue until an
2972 unindented newline (much like Python blocks). With this extra syntax
2973 it is referred to as a block.
2975 Note that a block does not have to include any newlines if it only
2976 contains simple statements. So both of:
2978 if condition: a=b; d=f
2980 if condition { a=b; print f }
2984 In either case the list is constructed from a `binode` list with
2985 `Block` as the operator. When parsing the list it is most convenient
2986 to append to the end, so a list is a list and a statement. When using
2987 the list it is more convenient to consider a list to be a statement
2988 and a list. So we need a function to re-order a list.
2989 `reorder_bilist` serves this purpose.
2991 The only stand-alone statement we introduce at this stage is `pass`
2992 which does nothing and is represented as a `NULL` pointer in a `Block`
2993 list. Other stand-alone statements will follow once the infrastructure
3013 Block -> Open Statementlist Close ${ $0 = $<2; }$
3014 | Open Newlines Statementlist Close ${ $0 = $<3; }$
3015 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
3016 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
3017 | : Statementlist ${ $0 = $<2; }$
3018 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
3020 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
3022 ComplexStatements -> ComplexStatements ComplexStatement ${
3028 | ComplexStatements NEWLINE ${ $0 = $<1; }$
3029 | ComplexStatement ${
3037 ComplexStatement -> SimpleStatements NEWLINE ${
3038 $0 = reorder_bilist($<1);
3040 ## ComplexStatement Grammar
3043 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3049 | SimpleStatement ${
3055 | SimpleStatements ; ${ $0 = $<1; }$
3057 SimpleStatement -> pass ${ $0 = NULL; }$
3058 ## SimpleStatement Grammar
3060 ###### print binode cases
3064 if (b->left == NULL)
3067 print_exec(b->left, indent, 0);
3070 print_exec(b->right, indent, 0);
3073 // block, one per line
3074 if (b->left == NULL)
3075 do_indent(indent, "pass\n");
3077 print_exec(b->left, indent, bracket);
3079 print_exec(b->right, indent, bracket);
3083 ###### propagate binode cases
3086 /* If any statement returns something other than Tnone
3087 * or Tbool then all such must return same type.
3088 * As each statement may be Tnone or something else,
3089 * we must always pass NULL (unknown) down, otherwise an incorrect
3090 * error might occur. We never return Tnone unless it is
3095 for (e = b; e; e = cast(binode, e->right)) {
3096 t = propagate_types(e->left, c, ok, NULL, rules);
3097 if ((rules & Rboolok) && t == Tbool)
3099 if (t && t != Tnone && t != Tbool) {
3102 else if (t != type) {
3103 type_err(c, "error: expected %1%r, found %2",
3104 e->left, type, rules, t);
3112 ###### interp binode cases
3114 while (rv.type == Tnone &&
3117 rv = interp_exec(b->left);
3118 b = cast(binode, b->right);
3122 ### The Print statement
3124 `print` is a simple statement that takes a comma-separated list of
3125 expressions and prints the values separated by spaces and terminated
3126 by a newline. No control of formatting is possible.
3128 `print` faces the same list-ordering issue as blocks, and uses the
3134 ###### SimpleStatement Grammar
3136 | print ExpressionList ${
3137 $0 = reorder_bilist($<2);
3139 | print ExpressionList , ${
3144 $0 = reorder_bilist($0);
3155 ExpressionList -> ExpressionList , Expression ${
3168 ###### print binode cases
3171 do_indent(indent, "print");
3175 print_exec(b->left, -1, 0);
3179 b = cast(binode, b->right);
3185 ###### propagate binode cases
3188 /* don't care but all must be consistent */
3189 propagate_types(b->left, c, ok, NULL, Rnolabel);
3190 propagate_types(b->right, c, ok, NULL, Rnolabel);
3193 ###### interp binode cases
3199 for ( ; b; b = cast(binode, b->right))
3203 left = interp_exec(b->left);
3216 ###### Assignment statement
3218 An assignment will assign a value to a variable, providing it hasn't
3219 be declared as a constant. The analysis phase ensures that the type
3220 will be correct so the interpreter just needs to perform the
3221 calculation. There is a form of assignment which declares a new
3222 variable as well as assigning a value. If a name is assigned before
3223 it is declared, and error will be raised as the name is created as
3224 `Tlabel` and it is illegal to assign to such names.
3230 ###### SimpleStatement Grammar
3231 | Variable = Expression ${
3237 | VariableDecl = Expression ${
3245 if ($1->var->where_set == NULL) {
3246 type_err(config2context(config), "Variable declared with no type or value: %v",
3256 ###### print binode cases
3259 do_indent(indent, "");
3260 print_exec(b->left, indent, 0);
3262 print_exec(b->right, indent, 0);
3269 struct variable *v = cast(var, b->left)->var;
3270 do_indent(indent, "");
3271 print_exec(b->left, indent, 0);
3272 if (cast(var, b->left)->var->constant) {
3273 if (v->where_decl == v->where_set) {
3275 type_print(v->val.type, stdout);
3280 if (v->where_decl == v->where_set) {
3282 type_print(v->val.type, stdout);
3289 print_exec(b->right, indent, 0);
3296 ###### propagate binode cases
3300 /* Both must match and not be labels,
3301 * Type must support 'dup',
3302 * For Assign, left must not be constant.
3305 t = propagate_types(b->left, c, ok, NULL,
3306 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3311 if (propagate_types(b->right, c, ok, t, 0) != t)
3312 if (b->left->type == Xvar)
3313 type_err(c, "info: variable '%v' was set as %1 here.",
3314 cast(var, b->left)->var->where_set, t, rules, Tnone);
3316 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3318 propagate_types(b->left, c, ok, t,
3319 (b->op == Assign ? Rnoconstant : 0));
3321 if (t && t->dup == NULL) {
3322 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3329 ###### interp binode cases
3332 lleft = linterp_exec(b->left);
3333 right = interp_exec(b->right);
3338 free_value(right); // NOTEST
3344 struct variable *v = cast(var, b->left)->var;
3348 right = interp_exec(b->right);
3350 right = val_init(v->val.type);
3357 ### The `use` statement
3359 The `use` statement is the last "simple" statement. It is needed when
3360 the condition in a conditional statement is a block. `use` works much
3361 like `return` in C, but only completes the `condition`, not the whole
3367 ###### SimpleStatement Grammar
3369 $0 = new_pos(binode, $1);
3374 ###### print binode cases
3377 do_indent(indent, "use ");
3378 print_exec(b->right, -1, 0);
3383 ###### propagate binode cases
3386 /* result matches value */
3387 return propagate_types(b->right, c, ok, type, 0);
3389 ###### interp binode cases
3392 rv = interp_exec(b->right);
3395 ### The Conditional Statement
3397 This is the biggy and currently the only complex statement. This
3398 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3399 It is comprised of a number of parts, all of which are optional though
3400 set combinations apply. Each part is (usually) a key word (`then` is
3401 sometimes optional) followed by either an expression or a code block,
3402 except the `casepart` which is a "key word and an expression" followed
3403 by a code block. The code-block option is valid for all parts and,
3404 where an expression is also allowed, the code block can use the `use`
3405 statement to report a value. If the code block does not report a value
3406 the effect is similar to reporting `True`.
3408 The `else` and `case` parts, as well as `then` when combined with
3409 `if`, can contain a `use` statement which will apply to some
3410 containing conditional statement. `for` parts, `do` parts and `then`
3411 parts used with `for` can never contain a `use`, except in some
3412 subordinate conditional statement.
3414 If there is a `forpart`, it is executed first, only once.
3415 If there is a `dopart`, then it is executed repeatedly providing
3416 always that the `condpart` or `cond`, if present, does not return a non-True
3417 value. `condpart` can fail to return any value if it simply executes
3418 to completion. This is treated the same as returning `True`.
3420 If there is a `thenpart` it will be executed whenever the `condpart`
3421 or `cond` returns True (or does not return any value), but this will happen
3422 *after* `dopart` (when present).
3424 If `elsepart` is present it will be executed at most once when the
3425 condition returns `False` or some value that isn't `True` and isn't
3426 matched by any `casepart`. If there are any `casepart`s, they will be
3427 executed when the condition returns a matching value.
3429 The particular sorts of values allowed in case parts has not yet been
3430 determined in the language design, so nothing is prohibited.
3432 The various blocks in this complex statement potentially provide scope
3433 for variables as described earlier. Each such block must include the
3434 "OpenScope" nonterminal before parsing the block, and must call
3435 `var_block_close()` when closing the block.
3437 The code following "`if`", "`switch`" and "`for`" does not get its own
3438 scope, but is in a scope covering the whole statement, so names
3439 declared there cannot be redeclared elsewhere. Similarly the
3440 condition following "`while`" is in a scope the covers the body
3441 ("`do`" part) of the loop, and which does not allow conditional scope
3442 extension. Code following "`then`" (both looping and non-looping),
3443 "`else`" and "`case`" each get their own local scope.
3445 The type requirements on the code block in a `whilepart` are quite
3446 unusal. It is allowed to return a value of some identifiable type, in
3447 which case the loop aborts and an appropriate `casepart` is run, or it
3448 can return a Boolean, in which case the loop either continues to the
3449 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3450 This is different both from the `ifpart` code block which is expected to
3451 return a Boolean, or the `switchpart` code block which is expected to
3452 return the same type as the casepart values. The correct analysis of
3453 the type of the `whilepart` code block is the reason for the
3454 `Rboolok` flag which is passed to `propagate_types()`.
3456 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3465 struct exec *action;
3466 struct casepart *next;
3468 struct cond_statement {
3470 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3471 struct casepart *casepart;
3474 ###### ast functions
3476 static void free_casepart(struct casepart *cp)
3480 free_exec(cp->value);
3481 free_exec(cp->action);
3488 static void free_cond_statement(struct cond_statement *s)
3492 free_exec(s->forpart);
3493 free_exec(s->condpart);
3494 free_exec(s->dopart);
3495 free_exec(s->thenpart);
3496 free_exec(s->elsepart);
3497 free_casepart(s->casepart);
3501 ###### free exec cases
3502 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3504 ###### ComplexStatement Grammar
3505 | CondStatement ${ $0 = $<1; }$
3510 // both ForThen and Whilepart open scopes, and CondSuffix only
3511 // closes one - so in the first branch here we have another to close.
3512 CondStatement -> ForThen WhilePart CondSuffix ${
3514 $0->forpart = $1.forpart; $1.forpart = NULL;
3515 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3516 $0->condpart = $2.condpart; $2.condpart = NULL;
3517 $0->dopart = $2.dopart; $2.dopart = NULL;
3518 var_block_close(config2context(config), CloseSequential);
3520 | WhilePart CondSuffix ${
3522 $0->condpart = $1.condpart; $1.condpart = NULL;
3523 $0->dopart = $1.dopart; $1.dopart = NULL;
3525 | SwitchPart CondSuffix ${
3529 | IfPart IfSuffix ${
3531 $0->condpart = $1.condpart; $1.condpart = NULL;
3532 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3533 // This is where we close an "if" statement
3534 var_block_close(config2context(config), CloseSequential);
3537 CondSuffix -> IfSuffix ${
3539 // This is where we close scope of the whole
3540 // "for" or "while" statement
3541 var_block_close(config2context(config), CloseSequential);
3543 | CasePart CondSuffix ${
3545 $1->next = $0->casepart;
3550 CasePart -> Newlines case Expression OpenScope Block ${
3551 $0 = calloc(1,sizeof(struct casepart));
3554 var_block_close(config2context(config), CloseParallel);
3556 | case Expression OpenScope Block ${
3557 $0 = calloc(1,sizeof(struct casepart));
3560 var_block_close(config2context(config), CloseParallel);
3564 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3565 | Newlines else OpenScope Block ${
3566 $0 = new(cond_statement);
3568 var_block_close(config2context(config), CloseElse);
3570 | else OpenScope Block ${
3571 $0 = new(cond_statement);
3573 var_block_close(config2context(config), CloseElse);
3575 | Newlines else OpenScope CondStatement ${
3576 $0 = new(cond_statement);
3578 var_block_close(config2context(config), CloseElse);
3580 | else OpenScope CondStatement ${
3581 $0 = new(cond_statement);
3583 var_block_close(config2context(config), CloseElse);
3588 // These scopes are closed in CondSuffix
3589 ForPart -> for OpenScope SimpleStatements ${
3590 $0 = reorder_bilist($<3);
3592 | for OpenScope Block ${
3596 ThenPart -> then OpenScope SimpleStatements ${
3597 $0 = reorder_bilist($<3);
3598 var_block_close(config2context(config), CloseSequential);
3600 | then OpenScope Block ${
3602 var_block_close(config2context(config), CloseSequential);
3605 ThenPartNL -> ThenPart OptNL ${
3609 // This scope is closed in CondSuffix
3610 WhileHead -> while OpenScope Block ${
3615 ForThen -> ForPart OptNL ThenPartNL ${
3623 // This scope is closed in CondSuffix
3624 WhilePart -> while OpenScope Expression Block ${
3625 $0.type = Xcond_statement;
3629 | WhileHead OptNL do Block ${
3630 $0.type = Xcond_statement;
3635 IfPart -> if OpenScope Expression OpenScope Block ${
3636 $0.type = Xcond_statement;
3639 var_block_close(config2context(config), CloseParallel);
3641 | if OpenScope Block OptNL then OpenScope Block ${
3642 $0.type = Xcond_statement;
3645 var_block_close(config2context(config), CloseParallel);
3649 // This scope is closed in CondSuffix
3650 SwitchPart -> switch OpenScope Expression ${
3653 | switch OpenScope Block ${
3657 ###### print exec cases
3659 case Xcond_statement:
3661 struct cond_statement *cs = cast(cond_statement, e);
3662 struct casepart *cp;
3664 do_indent(indent, "for");
3665 if (bracket) printf(" {\n"); else printf(":\n");
3666 print_exec(cs->forpart, indent+1, bracket);
3669 do_indent(indent, "} then {\n");
3671 do_indent(indent, "then:\n");
3672 print_exec(cs->thenpart, indent+1, bracket);
3674 if (bracket) do_indent(indent, "}\n");
3678 if (cs->condpart && cs->condpart->type == Xbinode &&
3679 cast(binode, cs->condpart)->op == Block) {
3681 do_indent(indent, "while {\n");
3683 do_indent(indent, "while:\n");
3684 print_exec(cs->condpart, indent+1, bracket);
3686 do_indent(indent, "} do {\n");
3688 do_indent(indent, "do:\n");
3689 print_exec(cs->dopart, indent+1, bracket);
3691 do_indent(indent, "}\n");
3693 do_indent(indent, "while ");
3694 print_exec(cs->condpart, 0, bracket);
3699 print_exec(cs->dopart, indent+1, bracket);
3701 do_indent(indent, "}\n");
3706 do_indent(indent, "switch");
3708 do_indent(indent, "if");
3709 if (cs->condpart && cs->condpart->type == Xbinode &&
3710 cast(binode, cs->condpart)->op == Block) {
3715 print_exec(cs->condpart, indent+1, bracket);
3717 do_indent(indent, "}\n");
3719 do_indent(indent, "then:\n");
3720 print_exec(cs->thenpart, indent+1, bracket);
3724 print_exec(cs->condpart, 0, bracket);
3730 print_exec(cs->thenpart, indent+1, bracket);
3732 do_indent(indent, "}\n");
3737 for (cp = cs->casepart; cp; cp = cp->next) {
3738 do_indent(indent, "case ");
3739 print_exec(cp->value, -1, 0);
3744 print_exec(cp->action, indent+1, bracket);
3746 do_indent(indent, "}\n");
3749 do_indent(indent, "else");
3754 print_exec(cs->elsepart, indent+1, bracket);
3756 do_indent(indent, "}\n");
3761 ###### propagate exec cases
3762 case Xcond_statement:
3764 // forpart and dopart must return Tnone
3765 // thenpart must return Tnone if there is a dopart,
3766 // otherwise it is like elsepart.
3768 // be bool if there is no casepart
3769 // match casepart->values if there is a switchpart
3770 // either be bool or match casepart->value if there
3772 // elsepart and casepart->action must match the return type
3773 // expected of this statement.
3774 struct cond_statement *cs = cast(cond_statement, prog);
3775 struct casepart *cp;
3777 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3778 if (!type_compat(Tnone, t, 0))
3780 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3781 if (!type_compat(Tnone, t, 0))
3784 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3785 if (!type_compat(Tnone, t, 0))
3788 if (cs->casepart == NULL)
3789 propagate_types(cs->condpart, c, ok, Tbool, 0);
3791 /* Condpart must match case values, with bool permitted */
3793 for (cp = cs->casepart;
3794 cp && !t; cp = cp->next)
3795 t = propagate_types(cp->value, c, ok, NULL, 0);
3796 if (!t && cs->condpart)
3797 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3798 // Now we have a type (I hope) push it down
3800 for (cp = cs->casepart; cp; cp = cp->next)
3801 propagate_types(cp->value, c, ok, t, 0);
3802 propagate_types(cs->condpart, c, ok, t, Rboolok);
3805 // (if)then, else, and case parts must return expected type.
3806 if (!cs->dopart && !type)
3807 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3809 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3810 for (cp = cs->casepart;
3813 type = propagate_types(cp->action, c, ok, NULL, rules);
3816 propagate_types(cs->thenpart, c, ok, type, rules);
3817 propagate_types(cs->elsepart, c, ok, type, rules);
3818 for (cp = cs->casepart; cp ; cp = cp->next)
3819 propagate_types(cp->action, c, ok, type, rules);
3825 ###### interp exec cases
3826 case Xcond_statement:
3828 struct value v, cnd;
3829 struct casepart *cp;
3830 struct cond_statement *c = cast(cond_statement, e);
3833 interp_exec(c->forpart);
3836 cnd = interp_exec(c->condpart);
3839 if (!(cnd.type == Tnone ||
3840 (cnd.type == Tbool && cnd.bool != 0)))
3842 // cnd is Tnone or Tbool, doesn't need to be freed
3844 interp_exec(c->dopart);
3847 rv = interp_exec(c->thenpart);
3848 if (rv.type != Tnone || !c->dopart)
3852 } while (c->dopart);
3854 for (cp = c->casepart; cp; cp = cp->next) {
3855 v = interp_exec(cp->value);
3856 if (value_cmp(v, cnd) == 0) {
3859 rv = interp_exec(cp->action);
3866 rv = interp_exec(c->elsepart);
3873 ### Top level structure
3875 All the language elements so far can be used in various places. Now
3876 it is time to clarify what those places are.
3878 At the top level of a file there will be a number of declarations.
3879 Many of the things that can be declared haven't been described yet,
3880 such as functions, procedures, imports, named types, and probably
3882 For now there are two sorts of things that can appear at the top
3883 level. They are predefined constants and the main program. While the
3884 syntax will allow the main program to appear multiple times, that will
3885 trigger an error if it is actually attempted.
3887 The various declarations do not return anything. They store the
3888 various declarations in the parse context.
3890 ###### Parser: grammar
3893 Ocean -> DeclarationList
3895 DeclarationList -> Declaration
3896 | DeclarationList Declaration
3898 Declaration -> DeclareConstant
3903 ## top level grammar
3905 ### The `const` section
3907 As well as being defined in with the code that uses them, constants
3908 can be declared at the top level. These have full-file scope, so they
3909 are always `InScope`. The value of a top level constant can be given
3910 as an expression, and this is evaluated immediately rather than in the
3911 later interpretation stage. Once we add functions to the language, we
3912 will need rules concern which, if any, can be used to define a top
3915 Constants are defined in a sectiont that starts with the reserved word
3916 `const` and then has a block with a list of assignment statements.
3917 For syntactic consistency, these must use the double-colon syntax to
3918 make it clear that they are constants. Type can also be given: if
3919 not, the type will be determined during analysis, as with other
3922 As the types constants are inserted at the head of a list, printing
3923 them in the same order that they were read is not straight forward.
3924 We take a quadratic approach here and count the number of constants
3925 (variables of depth 0), then count down from there, each time
3926 searching through for the Nth constant for decreasing N.
3928 ###### top level grammar
3930 DeclareConstant -> const Open ConstList Close
3931 | const Open Newlines ConstList Close
3932 | const Open SimpleConstList }
3933 | const Open Newlines SimpleConstList }
3935 | const SimpleConstList
3937 ConstList -> ComplexConsts
3938 ComplexConsts -> ComplexConst ComplexConsts
3940 ComplexConst -> SimpleConstList NEWLINE
3941 SimpleConstList -> Const ; SimpleConstList
3943 | Const ; SimpleConstList ;
3946 CType -> Type ${ $0 = $<1; }$
3949 Const -> IDENTIFIER :: CType = Expression ${ {
3953 v = var_decl(config2context(config), $1.txt);
3955 struct var *var = new_pos(var, $1);
3956 v->where_decl = var;
3961 v = var_ref(config2context(config), $1.txt);
3962 tok_err(config2context(config), "error: name already declared", &$1);
3963 type_err(config2context(config), "info: this is where '%v' was first declared",
3964 v->where_decl, NULL, 0, NULL);
3968 propagate_types($5, config2context(config), &ok, $3, 0);
3971 config2context(config)->parse_error = 1;
3973 v->val = interp_exec($5);
3977 ###### print const decls
3982 while (target != 0) {
3984 for (v = context.in_scope; v; v=v->in_scope)
3985 if (v->depth == 0) {
3996 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
3997 type_print(v->val.type, stdout);
3999 if (v->val.type == Tstr)
4001 print_value(v->val);
4002 if (v->val.type == Tstr)
4010 ### Finally the whole program.
4012 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4013 the keyword "program" and a list of variable names which are assigned
4014 values from command line arguments. Following this is a `block` which
4015 is the code to execute. Unlike Pascal, constants and other
4016 declarations come *before* the program.
4018 As this is the top level, several things are handled a bit
4020 The whole program is not interpreted by `interp_exec` as that isn't
4021 passed the argument list which the program requires. Similarly type
4022 analysis is a bit more interesting at this level.
4027 ###### top level grammar
4029 DeclareProgram -> Program ${ {
4030 struct parse_context *c = config2context(config);
4032 type_err(c, "Program defined a second time",
4040 Program -> program OpenScope Varlist Block OptNL ${
4043 $0->left = reorder_bilist($<3);
4045 var_block_close(config2context(config), CloseSequential);
4046 if (config2context(config)->scope_stack) abort();
4049 tok_err(config2context(config),
4050 "error: unhandled parse error", &$1);
4053 Varlist -> Varlist ArgDecl ${
4062 ArgDecl -> IDENTIFIER ${ {
4063 struct variable *v = var_decl(config2context(config), $1.txt);
4070 ###### print binode cases
4072 do_indent(indent, "program");
4073 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4075 print_exec(b2->left, 0, 0);
4081 print_exec(b->right, indent+1, bracket);
4083 do_indent(indent, "}\n");
4086 ###### propagate binode cases
4087 case Program: abort(); // NOTEST
4089 ###### core functions
4091 static int analyse_prog(struct exec *prog, struct parse_context *c)
4093 struct binode *b = cast(binode, prog);
4100 propagate_types(b->right, c, &ok, Tnone, 0);
4105 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4106 struct var *v = cast(var, b->left);
4107 if (!v->var->val.type) {
4108 v->var->where_set = b;
4109 v->var->val = val_prepare(Tstr);
4112 b = cast(binode, prog);
4115 propagate_types(b->right, c, &ok, Tnone, 0);
4120 /* Make sure everything is still consistent */
4121 propagate_types(b->right, c, &ok, Tnone, 0);
4125 static void interp_prog(struct exec *prog, char **argv)
4127 struct binode *p = cast(binode, prog);
4133 al = cast(binode, p->left);
4135 struct var *v = cast(var, al->left);
4136 struct value *vl = &v->var->val;
4138 if (argv[0] == NULL) {
4139 printf("Not enough args\n");
4142 al = cast(binode, al->right);
4144 *vl = parse_value(vl->type, argv[0]);
4145 if (vl->type == NULL)
4149 v = interp_exec(p->right);
4153 ###### interp binode cases
4154 case Program: abort(); // NOTEST
4156 ## And now to test it out.
4158 Having a language requires having a "hello world" program. I'll
4159 provide a little more than that: a program that prints "Hello world"
4160 finds the GCD of two numbers, prints the first few elements of
4161 Fibonacci, and performs a binary search for a number.
4163 ###### File: oceani.mk
4166 @echo "===== TEST ====="
4167 ./oceani --section "test: hello" oceani.mdc 55 33
4173 four ::= 2 + 2 ; five ::= 10/2
4174 const pie ::= "I like Pie";
4175 cake ::= "The cake is"
4184 print "Hello World, what lovely oceans you have!"
4185 print "are there", five, "?"
4186 print pi, pie, "but", cake
4188 /* When a variable is defined in both branches of an 'if',
4189 * and used afterwards, the variables are merged.
4195 print "Is", A, "bigger than", B,"? ", bigger
4196 /* If a variable is not used after the 'if', no
4197 * merge happens, so types can be different
4200 double:string = "yes"
4201 print A, "is more than twice", B, "?", double
4204 print "double", B, "is", double
4209 if a > 0 and then b > 0:
4215 print "GCD of", A, "and", B,"is", a
4217 print a, "is not positive, cannot calculate GCD"
4219 print b, "is not positive, cannot calculate GCD"
4224 print "Fibonacci:", f1,f2,
4225 then togo = togo - 1
4233 /* Binary search... */
4238 mid := (lo + hi) / 2
4250 print "Yay, I found", target
4252 print "Closest I found was", mid
4257 for i:=1; then i = i + 1; while i < size:
4258 n := list[i-1] * list[i-1]
4259 list[i] = (n / 100) % 10000
4261 print "Before sort:"
4262 for i:=0; then i = i + 1; while i < size:
4263 print "list[",i,"]=",list[i]
4265 for i := 1; then i=i+1; while i < size:
4266 for j:=i-1; then j=j-1; while j >= 0:
4267 if list[j] > list[j+1]:
4272 for i:=0; then i = i + 1; while i < size:
4273 print "list[",i,"]=",list[i]
4277 bob.alive = (bob.name == "Hello")