1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
126 ###### Parser: reduce
127 struct parse_context *c = config2context(config);
135 #include <sys/mman.h>
154 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
155 "--section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
170 struct section *s, *ss;
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_line_comment)
175 | (1 << TK_block_comment)
177 .number_chars = ".,_+- ",
182 int doprint=0, dotrace=0, doexec=1, brackets=0;
184 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
187 case 't': dotrace=1; break;
188 case 'p': doprint=1; break;
189 case 'n': doexec=0; break;
190 case 'b': brackets=1; break;
191 case 's': section = optarg; break;
192 default: fprintf(stderr, Usage);
196 if (optind >= argc) {
197 fprintf(stderr, "oceani: no input file given\n");
200 fd = open(argv[optind], O_RDONLY);
202 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
205 context.file_name = argv[optind];
206 len = lseek(fd, 0, 2);
207 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
208 s = code_extract(file, file+len, NULL);
210 fprintf(stderr, "oceani: could not find any code in %s\n",
215 ## context initialization
218 for (ss = s; ss; ss = ss->next) {
219 struct text sec = ss->section;
220 if (sec.len == strlen(section) &&
221 strncmp(sec.txt, section, sec.len) == 0)
225 fprintf(stderr, "oceani: cannot find section %s\n",
231 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
234 fprintf(stderr, "oceani: no program found.\n");
235 context.parse_error = 1;
237 if (context.prog && doprint) {
240 print_exec(context.prog, 0, brackets);
242 if (context.prog && doexec && !context.parse_error) {
243 if (!analyse_prog(context.prog, &context)) {
244 fprintf(stderr, "oceani: type error in program - not running.\n");
247 interp_prog(context.prog, argv+optind+1);
249 free_exec(context.prog);
252 struct section *t = s->next;
258 ## free context types
259 exit(context.parse_error ? 1 : 0);
264 The four requirements of parse, analyse, print, interpret apply to
265 each language element individually so that is how most of the code
268 Three of the four are fairly self explanatory. The one that requires
269 a little explanation is the analysis step.
271 The current language design does not require the types of variables to
272 be declared, but they must still have a single type. Different
273 operations impose different requirements on the variables, for example
274 addition requires both arguments to be numeric, and assignment
275 requires the variable on the left to have the same type as the
276 expression on the right.
278 Analysis involves propagating these type requirements around and
279 consequently setting the type of each variable. If any requirements
280 are violated (e.g. a string is compared with a number) or if a
281 variable needs to have two different types, then an error is raised
282 and the program will not run.
284 If the same variable is declared in both branchs of an 'if/else', or
285 in all cases of a 'switch' then the multiple instances may be merged
286 into just one variable if the variable is references after the
287 conditional statement. When this happens, the types must naturally be
288 consistent across all the branches. When the variable is not used
289 outside the if, the variables in the different branches are distinct
290 and can be of different types.
292 Determining the types of all variables early is important for
293 processing command line arguments. These can be assigned to any of
294 several types of variable, but we must first know the correct type so
295 any required conversion can happen. If a variable is associated with
296 a command line argument but no type can be interpreted (e.g. the
297 variable is only ever used in a `print` statement), then the type is
300 Undeclared names may only appear in "use" statements and "case" expressions.
301 These names are given a type of "label" and a unique value.
302 This allows them to fill the role of a name in an enumerated type, which
303 is useful for testing the `switch` statement.
305 As we will see, the condition part of a `while` statement can return
306 either a Boolean or some other type. This requires that the expected
307 type that gets passed around comprises a type and a flag to indicate
308 that `Tbool` is also permitted.
310 As there are, as yet, no distinct types that are compatible, there
311 isn't much subtlety in the analysis. When we have distinct number
312 types, this will become more interesting.
316 When analysis discovers an inconsistency it needs to report an error;
317 just refusing to run the code ensures that the error doesn't cascade,
318 but by itself it isn't very useful. A clear understanding of the sort
319 of error message that are useful will help guide the process of
322 At a simplistic level, the only sort of error that type analysis can
323 report is that the type of some construct doesn't match a contextual
324 requirement. For example, in `4 + "hello"` the addition provides a
325 contextual requirement for numbers, but `"hello"` is not a number. In
326 this particular example no further information is needed as the types
327 are obvious from local information. When a variable is involved that
328 isn't the case. It may be helpful to explain why the variable has a
329 particular type, by indicating the location where the type was set,
330 whether by declaration or usage.
332 Using a recursive-descent analysis we can easily detect a problem at
333 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
334 will detect that one argument is not a number and the usage of `hello`
335 will detect that a number was wanted, but not provided. In this
336 (early) version of the language, we will generate error reports at
337 multiple locations, so the use of `hello` will report an error and
338 explain were the value was set, and the addition will report an error
339 and say why numbers are needed. To be able to report locations for
340 errors, each language element will need to record a file location
341 (line and column) and each variable will need to record the language
342 element where its type was set. For now we will assume that each line
343 of an error message indicates one location in the file, and up to 2
344 types. So we provide a `printf`-like function which takes a format, a
345 language (a `struct exec` which has not yet been introduced), and 2
346 types. "`%1`" reports the first type, "`%2`" reports the second. We
347 will need a function to print the location, once we know how that is
348 stored. As will be explained later, there are sometimes extra rules for
349 type matching and they might affect error messages, we need to pass those
352 As well as type errors, we sometimes need to report problems with
353 tokens, which might be unexpected or might name a type that has not
354 been defined. For these we have `tok_err()` which reports an error
355 with a given token. Each of the error functions sets the flag in the
356 context so indicate that parsing failed.
360 static void fput_loc(struct exec *loc, FILE *f);
362 ###### core functions
364 static void type_err(struct parse_context *c,
365 char *fmt, struct exec *loc,
366 struct type *t1, int rules, struct type *t2)
368 fprintf(stderr, "%s:", c->file_name);
369 fput_loc(loc, stderr);
370 for (; *fmt ; fmt++) {
377 case '%': fputc(*fmt, stderr); break; // NOTEST
378 default: fputc('?', stderr); break; // NOTEST
380 type_print(t1, stderr);
383 type_print(t2, stderr);
392 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
394 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
395 t->txt.len, t->txt.txt);
399 ## Entities: declared and predeclared.
401 There are various "things" that the language and/or the interpreter
402 needs to know about to parse and execute a program. These include
403 types, variables, values, and executable code. These are all lumped
404 together under the term "entities" (calling them "objects" would be
405 confusing) and introduced here. These will introduced and described
406 here. The following section will present the different specific code
407 elements which comprise or manipulate these various entities.
411 Values come in a wide range of types, with more likely to be added.
412 Each type needs to be able to parse and print its own values (for
413 convenience at least) as well as to compare two values, at least for
414 equality and possibly for order. For now, values might need to be
415 duplicated and freed, though eventually such manipulations will be
416 better integrated into the language.
418 Rather than requiring every numeric type to support all numeric
419 operations (add, multiple, etc), we allow types to be able to present
420 as one of a few standard types: integer, float, and fraction. The
421 existence of these conversion functions eventaully enable types to
422 determine if they are compatible with other types, though such types
423 have not yet been implemented.
425 Named type are stored in a simple linked list. Objects of each type are "values"
426 which are often passed around by value.
433 ## value union fields
440 struct value (*init)(struct type *type);
441 struct value (*prepare)(struct type *type);
442 struct value (*parse)(struct type *type, char *str);
443 void (*print)(struct value val);
444 void (*print_type)(struct type *type, FILE *f);
445 int (*cmp_order)(struct value v1, struct value v2);
446 int (*cmp_eq)(struct value v1, struct value v2);
447 struct value (*dup)(struct value val);
448 void (*free)(struct value val);
449 void (*free_type)(struct type *t);
450 int (*compat)(struct type *this, struct type *other);
451 long long (*to_int)(struct value *v);
452 double (*to_float)(struct value *v);
453 int (*to_mpq)(mpq_t *q, struct value *v);
462 struct type *typelist;
466 static struct type *find_type(struct parse_context *c, struct text s)
468 struct type *l = c->typelist;
471 text_cmp(l->name, s) != 0)
476 static struct type *add_type(struct parse_context *c, struct text s,
481 n = calloc(1, sizeof(*n));
484 n->next = c->typelist;
489 static void free_type(struct type *t)
491 /* The type is always a reference to something in the
492 * context, so we don't need to free anything.
496 static void free_value(struct value v)
502 static int type_compat(struct type *require, struct type *have, int rules)
504 if ((rules & Rboolok) && have == Tbool)
506 if ((rules & Rnolabel) && have == Tlabel)
508 if (!require || !have)
512 return require->compat(require, have);
514 return require == have;
517 static void type_print(struct type *type, FILE *f)
520 fputs("*unknown*type*", f);
521 else if (type->name.len)
522 fprintf(f, "%.*s", type->name.len, type->name.txt);
523 else if (type->print_type)
524 type->print_type(type, f);
526 fputs("*invalid*type*", f); // NOTEST
529 static struct value val_prepare(struct type *type)
534 return type->prepare(type);
539 static struct value val_init(struct type *type)
544 return type->init(type);
549 static struct value dup_value(struct value v)
552 return v.type->dup(v);
556 static int value_cmp(struct value left, struct value right)
558 if (left.type && left.type->cmp_order)
559 return left.type->cmp_order(left, right);
560 if (left.type && left.type->cmp_eq)
561 return left.type->cmp_eq(left, right);
565 static void print_value(struct value v)
567 if (v.type && v.type->print)
570 printf("*Unknown*"); // NOTEST
573 static struct value parse_value(struct type *type, char *arg)
577 if (type && type->parse)
578 return type->parse(type, arg);
579 rv.type = NULL; // NOTEST
585 static void free_value(struct value v);
586 static int type_compat(struct type *require, struct type *have, int rules);
587 static void type_print(struct type *type, FILE *f);
588 static struct value val_init(struct type *type);
589 static struct value dup_value(struct value v);
590 static int value_cmp(struct value left, struct value right);
591 static void print_value(struct value v);
592 static struct value parse_value(struct type *type, char *arg);
594 ###### free context types
596 while (context.typelist) {
597 struct type *t = context.typelist;
599 context.typelist = t->next;
607 Values of the base types can be numbers, which we represent as
608 multi-precision fractions, strings, Booleans and labels. When
609 analysing the program we also need to allow for places where no value
610 is meaningful (type `Tnone`) and where we don't know what type to
611 expect yet (type is `NULL`).
613 Values are never shared, they are always copied when used, and freed
614 when no longer needed.
616 When propagating type information around the program, we need to
617 determine if two types are compatible, where type `NULL` is compatible
618 with anything. There are two special cases with type compatibility,
619 both related to the Conditional Statement which will be described
620 later. In some cases a Boolean can be accepted as well as some other
621 primary type, and in others any type is acceptable except a label (`Vlabel`).
622 A separate function encoding these cases will simplify some code later.
624 When assigning command line arguments to variables, we need to be able
625 to parse each type from a string.
627 The distinction beteen "prepare" and "init" needs to be explained.
628 "init" sets up an initial value, such as "zero" or the empty string.
629 "prepare" simply prepares the data structure so that if "free" gets
630 called on it, it won't do something silly. Normally a value will be
631 stored after "prepare" but before "free", but this might not happen if
640 myLDLIBS := libnumber.o libstring.o -lgmp
641 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
643 ###### type union fields
644 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
646 ###### value union fields
653 static void _free_value(struct value v)
655 switch (v.type->vtype) {
657 case Vstr: free(v.str.txt); break;
658 case Vnum: mpq_clear(v.num); break;
664 ###### value functions
666 static struct value _val_prepare(struct type *type)
671 switch(type->vtype) {
675 memset(&rv.num, 0, sizeof(rv.num));
691 static struct value _val_init(struct type *type)
696 switch(type->vtype) {
697 case Vnone: // NOTEST
700 mpq_init(rv.num); break;
702 rv.str.txt = malloc(1);
708 case Vlabel: // NOTEST
709 rv.label = NULL; // NOTEST
715 static struct value _dup_value(struct value v)
719 switch (rv.type->vtype) {
720 case Vnone: // NOTEST
730 mpq_set(rv.num, v.num);
733 rv.str.len = v.str.len;
734 rv.str.txt = malloc(rv.str.len);
735 memcpy(rv.str.txt, v.str.txt, v.str.len);
741 static int _value_cmp(struct value left, struct value right)
744 if (left.type != right.type)
745 return left.type - right.type; // NOTEST
746 switch (left.type->vtype) {
747 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
748 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
749 case Vstr: cmp = text_cmp(left.str, right.str); break;
750 case Vbool: cmp = left.bool - right.bool; break;
751 case Vnone: cmp = 0; // NOTEST
756 static void _print_value(struct value v)
758 switch (v.type->vtype) {
759 case Vnone: // NOTEST
760 printf("*no-value*"); break; // NOTEST
761 case Vlabel: // NOTEST
762 printf("*label-%p*", v.label); break; // NOTEST
764 printf("%.*s", v.str.len, v.str.txt); break;
766 printf("%s", v.bool ? "True":"False"); break;
771 mpf_set_q(fl, v.num);
772 gmp_printf("%Fg", fl);
779 static struct value _parse_value(struct type *type, char *arg)
787 switch(type->vtype) {
788 case Vlabel: // NOTEST
789 case Vnone: // NOTEST
790 val.type = NULL; // NOTEST
793 val.str.len = strlen(arg);
794 val.str.txt = malloc(val.str.len);
795 memcpy(val.str.txt, arg, val.str.len);
802 tx.txt = arg; tx.len = strlen(tx.txt);
803 if (number_parse(val.num, tail, tx) == 0)
806 mpq_neg(val.num, val.num);
808 printf("Unsupported suffix: %s\n", arg);
813 if (strcasecmp(arg, "true") == 0 ||
814 strcmp(arg, "1") == 0)
816 else if (strcasecmp(arg, "false") == 0 ||
817 strcmp(arg, "0") == 0)
820 printf("Bad bool: %s\n", arg);
828 static void _free_value(struct value v);
830 static struct type base_prototype = {
832 .prepare = _val_prepare,
833 .parse = _parse_value,
834 .print = _print_value,
835 .cmp_order = _value_cmp,
836 .cmp_eq = _value_cmp,
841 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
844 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
846 struct text txt = { n, strlen(n) };
849 t = add_type(c, txt, &base_prototype);
854 ###### context initialization
856 Tbool = add_base_type(&context, "Boolean", Vbool);
857 Tstr = add_base_type(&context, "string", Vstr);
858 Tnum = add_base_type(&context, "number", Vnum);
859 Tnone = add_base_type(&context, "none", Vnone);
860 Tlabel = add_base_type(&context, "label", Vlabel);
864 Variables are scoped named values. We store the names in a linked
865 list of "bindings" sorted lexically, and use sequential search and
872 struct binding *next; // in lexical order
876 This linked list is stored in the parse context so that "reduce"
877 functions can find or add variables, and so the analysis phase can
878 ensure that every variable gets a type.
882 struct binding *varlist; // In lexical order
886 static struct binding *find_binding(struct parse_context *c, struct text s)
888 struct binding **l = &c->varlist;
893 (cmp = text_cmp((*l)->name, s)) < 0)
897 n = calloc(1, sizeof(*n));
904 Each name can be linked to multiple variables defined in different
905 scopes. Each scope starts where the name is declared and continues
906 until the end of the containing code block. Scopes of a given name
907 cannot nest, so a declaration while a name is in-scope is an error.
909 ###### binding fields
910 struct variable *var;
914 struct variable *previous;
916 struct binding *name;
917 struct exec *where_decl;// where name was declared
918 struct exec *where_set; // where type was set
922 While the naming seems strange, we include local constants in the
923 definition of variables. A name declared `var := value` can
924 subsequently be changed, but a name declared `var ::= value` cannot -
927 ###### variable fields
930 Scopes in parallel branches can be partially merged. More
931 specifically, if a given name is declared in both branches of an
932 if/else then its scope is a candidate for merging. Similarly if
933 every branch of an exhaustive switch (e.g. has an "else" clause)
934 declares a given name, then the scopes from the branches are
935 candidates for merging.
937 Note that names declared inside a loop (which is only parallel to
938 itself) are never visible after the loop. Similarly names defined in
939 scopes which are not parallel, such as those started by `for` and
940 `switch`, are never visible after the scope. Only variables defined in
941 both `then` and `else` (including the implicit then after an `if`, and
942 excluding `then` used with `for`) and in all `case`s and `else` of a
943 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
945 Labels, which are a bit like variables, follow different rules.
946 Labels are not explicitly declared, but if an undeclared name appears
947 in a context where a label is legal, that effectively declares the
948 name as a label. The declaration remains in force (or in scope) at
949 least to the end of the immediately containing block and conditionally
950 in any larger containing block which does not declare the name in some
951 other way. Importantly, the conditional scope extension happens even
952 if the label is only used in one parallel branch of a conditional --
953 when used in one branch it is treated as having been declared in all
956 Merge candidates are tentatively visible beyond the end of the
957 branching statement which creates them. If the name is used, the
958 merge is affirmed and they become a single variable visible at the
959 outer layer. If not - if it is redeclared first - the merge lapses.
961 To track scopes we have an extra stack, implemented as a linked list,
962 which roughly parallels the parse stack and which is used exclusively
963 for scoping. When a new scope is opened, a new frame is pushed and
964 the child-count of the parent frame is incremented. This child-count
965 is used to distinguish between the first of a set of parallel scopes,
966 in which declared variables must not be in scope, and subsequent
967 branches, whether they must already be conditionally scoped.
969 To push a new frame *before* any code in the frame is parsed, we need a
970 grammar reduction. This is most easily achieved with a grammar
971 element which derives the empty string, and creates the new scope when
972 it is recognized. This can be placed, for example, between a keyword
973 like "if" and the code following it.
977 struct scope *parent;
983 struct scope *scope_stack;
986 static void scope_pop(struct parse_context *c)
988 struct scope *s = c->scope_stack;
990 c->scope_stack = s->parent;
995 static void scope_push(struct parse_context *c)
997 struct scope *s = calloc(1, sizeof(*s));
999 c->scope_stack->child_count += 1;
1000 s->parent = c->scope_stack;
1002 c->scope_depth += 1;
1008 OpenScope -> ${ scope_push(c); }$
1010 Each variable records a scope depth and is in one of four states:
1012 - "in scope". This is the case between the declaration of the
1013 variable and the end of the containing block, and also between
1014 the usage with affirms a merge and the end of that block.
1016 The scope depth is not greater than the current parse context scope
1017 nest depth. When the block of that depth closes, the state will
1018 change. To achieve this, all "in scope" variables are linked
1019 together as a stack in nesting order.
1021 - "pending". The "in scope" block has closed, but other parallel
1022 scopes are still being processed. So far, every parallel block at
1023 the same level that has closed has declared the name.
1025 The scope depth is the depth of the last parallel block that
1026 enclosed the declaration, and that has closed.
1028 - "conditionally in scope". The "in scope" block and all parallel
1029 scopes have closed, and no further mention of the name has been
1030 seen. This state includes a secondary nest depth which records the
1031 outermost scope seen since the variable became conditionally in
1032 scope. If a use of the name is found, the variable becomes "in
1033 scope" and that secondary depth becomes the recorded scope depth.
1034 If the name is declared as a new variable, the old variable becomes
1035 "out of scope" and the recorded scope depth stays unchanged.
1037 - "out of scope". The variable is neither in scope nor conditionally
1038 in scope. It is permanently out of scope now and can be removed from
1039 the "in scope" stack.
1041 ###### variable fields
1042 int depth, min_depth;
1043 enum { OutScope, PendingScope, CondScope, InScope } scope;
1044 struct variable *in_scope;
1046 ###### parse context
1048 struct variable *in_scope;
1050 All variables with the same name are linked together using the
1051 'previous' link. Those variable that have
1052 been affirmatively merged all have a 'merged' pointer that points to
1053 one primary variable - the most recently declared instance. When
1054 merging variables, we need to also adjust the 'merged' pointer on any
1055 other variables that had previously been merged with the one that will
1056 no longer be primary.
1058 A variable that is no longer the most recent instance of a name may
1059 still have "pending" scope, if it might still be merged with most
1060 recent instance. These variables don't really belong in the
1061 "in_scope" list, but are not immediately removed when a new instance
1062 is found. Instead, they are detected and ignored when considering the
1063 list of in_scope names.
1065 ###### variable fields
1066 struct variable *merged;
1068 ###### ast functions
1070 static void variable_merge(struct variable *primary, struct variable *secondary)
1074 if (primary->merged)
1076 primary = primary->merged;
1078 for (v = primary->previous; v; v=v->previous)
1079 if (v == secondary || v == secondary->merged ||
1080 v->merged == secondary ||
1081 (v->merged && v->merged == secondary->merged)) {
1082 v->scope = OutScope;
1083 v->merged = primary;
1087 ###### free context vars
1089 while (context.varlist) {
1090 struct binding *b = context.varlist;
1091 struct variable *v = b->var;
1092 context.varlist = b->next;
1095 struct variable *t = v;
1100 // This is a global constant
1101 free_exec(t->where_decl);
1106 #### Manipulating Bindings
1108 When a name is conditionally visible, a new declaration discards the
1109 old binding - the condition lapses. Conversely a usage of the name
1110 affirms the visibility and extends it to the end of the containing
1111 block - i.e. the block that contains both the original declaration and
1112 the latest usage. This is determined from `min_depth`. When a
1113 conditionally visible variable gets affirmed like this, it is also
1114 merged with other conditionally visible variables with the same name.
1116 When we parse a variable declaration we either report an error if the
1117 name is currently bound, or create a new variable at the current nest
1118 depth if the name is unbound or bound to a conditionally scoped or
1119 pending-scope variable. If the previous variable was conditionally
1120 scoped, it and its homonyms becomes out-of-scope.
1122 When we parse a variable reference (including non-declarative
1123 assignment) we report an error if the name is not bound or is bound to
1124 a pending-scope variable; update the scope if the name is bound to a
1125 conditionally scoped variable; or just proceed normally if the named
1126 variable is in scope.
1128 When we exit a scope, any variables bound at this level are either
1129 marked out of scope or pending-scoped, depending on whether the scope
1130 was sequential or parallel. Here a "parallel" scope means the "then"
1131 or "else" part of a conditional, or any "case" or "else" branch of a
1132 switch. Other scopes are "sequential".
1134 When exiting a parallel scope we check if there are any variables that
1135 were previously pending and are still visible. If there are, then
1136 there weren't redeclared in the most recent scope, so they cannot be
1137 merged and must become out-of-scope. If it is not the first of
1138 parallel scopes (based on `child_count`), we check that there was a
1139 previous binding that is still pending-scope. If there isn't, the new
1140 variable must now be out-of-scope.
1142 When exiting a sequential scope that immediately enclosed parallel
1143 scopes, we need to resolve any pending-scope variables. If there was
1144 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1145 we need to mark all pending-scope variable as out-of-scope. Otherwise
1146 all pending-scope variables become conditionally scoped.
1149 enum closetype { CloseSequential, CloseParallel, CloseElse };
1151 ###### ast functions
1153 static struct variable *var_decl(struct parse_context *c, struct text s)
1155 struct binding *b = find_binding(c, s);
1156 struct variable *v = b->var;
1158 switch (v ? v->scope : OutScope) {
1160 /* Caller will report the error */
1164 v && v->scope == CondScope;
1166 v->scope = OutScope;
1170 v = calloc(1, sizeof(*v));
1171 v->previous = b->var;
1174 v->min_depth = v->depth = c->scope_depth;
1176 v->in_scope = c->in_scope;
1178 v->val = val_prepare(NULL);
1182 static struct variable *var_ref(struct parse_context *c, struct text s)
1184 struct binding *b = find_binding(c, s);
1185 struct variable *v = b->var;
1186 struct variable *v2;
1188 switch (v ? v->scope : OutScope) {
1191 /* Caller will report the error */
1194 /* All CondScope variables of this name need to be merged
1195 * and become InScope
1197 v->depth = v->min_depth;
1199 for (v2 = v->previous;
1200 v2 && v2->scope == CondScope;
1202 variable_merge(v, v2);
1210 static void var_block_close(struct parse_context *c, enum closetype ct)
1212 /* Close off all variables that are in_scope */
1213 struct variable *v, **vp, *v2;
1216 for (vp = &c->in_scope;
1217 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1219 if (v->name->var == v) switch (ct) {
1221 case CloseParallel: /* handle PendingScope */
1225 if (c->scope_stack->child_count == 1)
1226 v->scope = PendingScope;
1227 else if (v->previous &&
1228 v->previous->scope == PendingScope)
1229 v->scope = PendingScope;
1230 else if (v->val.type == Tlabel)
1231 v->scope = PendingScope;
1232 else if (v->name->var == v)
1233 v->scope = OutScope;
1234 if (ct == CloseElse) {
1235 /* All Pending variables with this name
1236 * are now Conditional */
1238 v2 && v2->scope == PendingScope;
1240 v2->scope = CondScope;
1245 v2 && v2->scope == PendingScope;
1247 if (v2->val.type != Tlabel)
1248 v2->scope = OutScope;
1250 case OutScope: break;
1253 case CloseSequential:
1254 if (v->val.type == Tlabel)
1255 v->scope = PendingScope;
1258 v->scope = OutScope;
1261 /* There was no 'else', so we can only become
1262 * conditional if we know the cases were exhaustive,
1263 * and that doesn't mean anything yet.
1264 * So only labels become conditional..
1267 v2 && v2->scope == PendingScope;
1269 if (v2->val.type == Tlabel) {
1270 v2->scope = CondScope;
1271 v2->min_depth = c->scope_depth;
1273 v2->scope = OutScope;
1276 case OutScope: break;
1280 if (v->scope == OutScope || v->name->var != v)
1289 Executables can be lots of different things. In many cases an
1290 executable is just an operation combined with one or two other
1291 executables. This allows for expressions and lists etc. Other times
1292 an executable is something quite specific like a constant or variable
1293 name. So we define a `struct exec` to be a general executable with a
1294 type, and a `struct binode` which is a subclass of `exec`, forms a
1295 node in a binary tree, and holds an operation. There will be other
1296 subclasses, and to access these we need to be able to `cast` the
1297 `exec` into the various other types.
1300 #define cast(structname, pointer) ({ \
1301 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1302 if (__mptr && *__mptr != X##structname) abort(); \
1303 (struct structname *)( (char *)__mptr);})
1305 #define new(structname) ({ \
1306 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1307 __ptr->type = X##structname; \
1308 __ptr->line = -1; __ptr->column = -1; \
1311 #define new_pos(structname, token) ({ \
1312 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1313 __ptr->type = X##structname; \
1314 __ptr->line = token.line; __ptr->column = token.col; \
1323 enum exec_types type;
1331 struct exec *left, *right;
1334 ###### ast functions
1336 static int __fput_loc(struct exec *loc, FILE *f)
1340 if (loc->line >= 0) {
1341 fprintf(f, "%d:%d: ", loc->line, loc->column);
1344 if (loc->type == Xbinode)
1345 return __fput_loc(cast(binode,loc)->left, f) ||
1346 __fput_loc(cast(binode,loc)->right, f);
1349 static void fput_loc(struct exec *loc, FILE *f)
1351 if (!__fput_loc(loc, f))
1352 fprintf(f, "??:??: "); // NOTEST
1355 Each different type of `exec` node needs a number of functions
1356 defined, a bit like methods. We must be able to be able to free it,
1357 print it, analyse it and execute it. Once we have specific `exec`
1358 types we will need to parse them too. Let's take this a bit more
1363 The parser generator requires a `free_foo` function for each struct
1364 that stores attributes and they will often be `exec`s and subtypes
1365 there-of. So we need `free_exec` which can handle all the subtypes,
1366 and we need `free_binode`.
1368 ###### ast functions
1370 static void free_binode(struct binode *b)
1375 free_exec(b->right);
1379 ###### core functions
1380 static void free_exec(struct exec *e)
1389 ###### forward decls
1391 static void free_exec(struct exec *e);
1393 ###### free exec cases
1394 case Xbinode: free_binode(cast(binode, e)); break;
1398 Printing an `exec` requires that we know the current indent level for
1399 printing line-oriented components. As will become clear later, we
1400 also want to know what sort of bracketing to use.
1402 ###### ast functions
1404 static void do_indent(int i, char *str)
1411 ###### core functions
1412 static void print_binode(struct binode *b, int indent, int bracket)
1416 ## print binode cases
1420 static void print_exec(struct exec *e, int indent, int bracket)
1426 print_binode(cast(binode, e), indent, bracket); break;
1431 ###### forward decls
1433 static void print_exec(struct exec *e, int indent, int bracket);
1437 As discussed, analysis involves propagating type requirements around
1438 the program and looking for errors.
1440 So `propagate_types` is passed an expected type (being a `struct type`
1441 pointer together with some `val_rules` flags) that the `exec` is
1442 expected to return, and returns the type that it does return, either
1443 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1444 by reference. It is set to `0` when an error is found, and `2` when
1445 any change is made. If it remains unchanged at `1`, then no more
1446 propagation is needed.
1450 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1454 if (rules & Rnolabel)
1455 fputs(" (labels not permitted)", stderr);
1458 ###### core functions
1460 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1461 struct type *type, int rules);
1462 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1463 struct type *type, int rules)
1470 switch (prog->type) {
1473 struct binode *b = cast(binode, prog);
1475 ## propagate binode cases
1479 ## propagate exec cases
1484 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1485 struct type *type, int rules)
1487 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1496 Interpreting an `exec` doesn't require anything but the `exec`. State
1497 is stored in variables and each variable will be directly linked from
1498 within the `exec` tree. The exception to this is the whole `program`
1499 which needs to look at command line arguments. The `program` will be
1500 interpreted separately.
1502 Each `exec` can return a value, which may be `Tnone` but must be
1503 non-NULL; Some `exec`s will return the location of a value, which can
1504 be updates. To support this, each exec case must store either a value
1505 in `val` or the pointer to a value in `lval`. If `lval` is set, but a
1506 simple value is required, `inter_exec()` will dereference `lval` to
1509 ###### core functions
1512 struct value val, *lval;
1515 static struct lrval _interp_exec(struct exec *e);
1517 static struct value interp_exec(struct exec *e)
1519 struct lrval ret = _interp_exec(e);
1522 return dup_value(*ret.lval);
1527 static struct value *linterp_exec(struct exec *e)
1529 struct lrval ret = _interp_exec(e);
1534 static struct lrval _interp_exec(struct exec *e)
1537 struct value rv, *lrv = NULL;
1548 struct binode *b = cast(binode, e);
1549 struct value left, right, *lleft;
1550 left.type = right.type = Tnone;
1552 ## interp binode cases
1554 free_value(left); free_value(right);
1557 ## interp exec cases
1566 Now that we have the shape of the interpreter in place we can add some
1567 complex types and connected them in to the data structures and the
1568 different phases of parse, analyse, print, interpret.
1570 Thus far we have arrays and structs.
1574 Arrays can be declared by giving a size and a type, as `[size]type' so
1575 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1576 size can be an arbitrary expression which is evaluated when the name
1579 Arrays cannot be assigned. When pointers are introduced we will also
1580 introduce array slices which can refer to part or all of an array -
1581 the assignment syntax will create a slice. For now, an array can only
1582 ever be referenced by the name it is declared with. It is likely that
1583 a "`copy`" primitive will eventually be define which can be used to
1584 make a copy of an array with controllable depth.
1586 ###### type union fields
1590 struct variable *vsize;
1591 struct type *member;
1594 ###### value union fields
1596 struct value *elmnts;
1599 ###### value functions
1601 static struct value array_prepare(struct type *type)
1606 ret.array.elmnts = NULL;
1610 static struct value array_init(struct type *type)
1616 if (type->array.vsize) {
1619 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1620 mpq_denref(type->array.vsize->val.num));
1621 type->array.size = mpz_get_si(q);
1624 ret.array.elmnts = calloc(type->array.size,
1625 sizeof(ret.array.elmnts[0]));
1626 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1627 ret.array.elmnts[i] = val_init(type->array.member);
1631 static void array_free(struct value val)
1635 if (val.array.elmnts)
1636 for (i = 0; i < val.type->array.size; i++)
1637 free_value(val.array.elmnts[i]);
1638 free(val.array.elmnts);
1641 static int array_compat(struct type *require, struct type *have)
1643 if (have->compat != require->compat)
1645 /* Both are arrays, so we can look at details */
1646 if (!type_compat(require->array.member, have->array.member, 0))
1648 if (require->array.vsize == NULL && have->array.vsize == NULL)
1649 return require->array.size == have->array.size;
1651 return require->array.vsize == have->array.vsize;
1654 static void array_print_type(struct type *type, FILE *f)
1657 if (type->array.vsize) {
1658 struct binding *b = type->array.vsize->name;
1659 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1661 fprintf(f, "%d]", type->array.size);
1662 type_print(type->array.member, f);
1665 static struct type array_prototype = {
1666 .prepare = array_prepare,
1668 .print_type = array_print_type,
1669 .compat = array_compat,
1675 | [ NUMBER ] Type ${
1676 $0 = calloc(1, sizeof(struct type));
1677 *($0) = array_prototype;
1678 $0->array.member = $<4;
1679 $0->array.vsize = NULL;
1683 if (number_parse(num, tail, $2.txt) == 0)
1684 tok_err(c, "error: unrecognised number", &$2);
1686 tok_err(c, "error: unsupported number suffix", &$2);
1688 $0->array.size = mpz_get_ui(mpq_numref(num));
1689 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1690 tok_err(c, "error: array size must be an integer",
1692 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1693 tok_err(c, "error: array size is too large",
1697 $0->next= c->anon_typelist;
1698 c->anon_typelist = $0;
1702 | [ IDENTIFIER ] Type ${ {
1703 struct variable *v = var_ref(c, $2.txt);
1706 tok_err(c, "error: name undeclared", &$2);
1707 else if (!v->constant)
1708 tok_err(c, "error: array size must be a constant", &$2);
1710 $0 = calloc(1, sizeof(struct type));
1711 *($0) = array_prototype;
1712 $0->array.member = $<4;
1714 $0->array.vsize = v;
1715 $0->next= c->anon_typelist;
1716 c->anon_typelist = $0;
1719 ###### parse context
1721 struct type *anon_typelist;
1723 ###### free context types
1725 while (context.anon_typelist) {
1726 struct type *t = context.anon_typelist;
1728 context.anon_typelist = t->next;
1735 ###### variable grammar
1737 | Variable [ Expression ] ${ {
1738 struct binode *b = new(binode);
1745 ###### print binode cases
1747 print_exec(b->left, -1, bracket);
1749 print_exec(b->right, -1, bracket);
1753 ###### propagate binode cases
1755 /* left must be an array, right must be a number,
1756 * result is the member type of the array
1758 propagate_types(b->right, c, ok, Tnum, 0);
1759 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1760 if (!t || t->compat != array_compat) {
1761 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1764 if (!type_compat(type, t->array.member, rules)) {
1765 type_err(c, "error: have %1 but need %2", prog,
1766 t->array.member, rules, type);
1768 return t->array.member;
1772 ###### interp binode cases
1777 lleft = linterp_exec(b->left);
1778 right = interp_exec(b->right);
1780 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1784 if (i >= 0 && i < lleft->type->array.size)
1785 lrv = &lleft->array.elmnts[i];
1787 rv = val_init(lleft->type->array.member);
1793 A `struct` is a data-type that contains one or more other data-types.
1794 It differs from an array in that each member can be of a different
1795 type, and they are accessed by name rather than by number. Thus you
1796 cannot choose an element by calculation, you need to know what you
1799 The language makes no promises about how a given structure will be
1800 stored in memory - it is free to rearrange fields to suit whatever
1801 criteria seems important.
1803 Structs are declared separately from program code - they cannot be
1804 declared in-line in a variable declaration like arrays can. A struct
1805 is given a name and this name is used to identify the type - the name
1806 is not prefixed by the word `struct` as it would be in C.
1808 Structs are only treated as the same if they have the same name.
1809 Simply having the same fields in the same order is not enough. This
1810 might change once we can create structure initializes from a list of
1813 Each component datum is identified much like a variable is declared,
1814 with a name, one or two colons, and a type. The type cannot be omitted
1815 as there is no opportunity to deduce the type from usage. An initial
1816 value can be given following an equals sign, so
1818 ##### Example: a struct type
1824 would declare a type called "complex" which has two number fields,
1825 each initialised to zero.
1827 Struct will need to be declared separately from the code that uses
1828 them, so we will need to be able to print out the declaration of a
1829 struct when reprinting the whole program. So a `print_type_decl` type
1830 function will be needed.
1832 ###### type union fields
1843 ###### value union fields
1845 struct value *fields;
1848 ###### type functions
1849 void (*print_type_decl)(struct type *type, FILE *f);
1851 ###### value functions
1853 static struct value structure_prepare(struct type *type)
1858 ret.structure.fields = NULL;
1862 static struct value structure_init(struct type *type)
1868 ret.structure.fields = calloc(type->structure.nfields,
1869 sizeof(ret.structure.fields[0]));
1870 for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
1871 ret.structure.fields[i] = val_init(type->structure.fields[i].type);
1875 static void structure_free(struct value val)
1879 if (val.structure.fields)
1880 for (i = 0; i < val.type->structure.nfields; i++)
1881 free_value(val.structure.fields[i]);
1882 free(val.structure.fields);
1885 static void structure_free_type(struct type *t)
1888 for (i = 0; i < t->structure.nfields; i++)
1889 free_value(t->structure.fields[i].init);
1890 free(t->structure.fields);
1893 static struct type structure_prototype = {
1894 .prepare = structure_prepare,
1895 .init = structure_init,
1896 .free = structure_free,
1897 .free_type = structure_free_type,
1898 .print_type_decl = structure_print_type,
1912 ###### free exec cases
1914 free_exec(cast(fieldref, e)->left);
1918 ###### variable grammar
1920 | Variable . IDENTIFIER ${ {
1921 struct fieldref *fr = new_pos(fieldref, $2);
1928 ###### print exec cases
1932 struct fieldref *f = cast(fieldref, e);
1933 print_exec(f->left, -1, bracket);
1934 printf(".%.*s", f->name.len, f->name.txt);
1938 ###### ast functions
1939 static int find_struct_index(struct type *type, struct text field)
1942 for (i = 0; i < type->structure.nfields; i++)
1943 if (text_cmp(type->structure.fields[i].name, field) == 0)
1948 ###### propagate exec cases
1952 struct fieldref *f = cast(fieldref, prog);
1953 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1956 type_err(c, "error: unknown type for field access", f->left,
1958 else if (st->prepare != structure_prepare)
1959 type_err(c, "error: field reference attempted on %1, not a struct",
1960 f->left, st, 0, NULL);
1961 else if (f->index == -2) {
1962 f->index = find_struct_index(st, f->name);
1964 type_err(c, "error: cannot find requested field in %1",
1965 f->left, st, 0, NULL);
1967 if (f->index >= 0) {
1968 struct type *ft = st->structure.fields[f->index].type;
1969 if (!type_compat(type, ft, rules))
1970 type_err(c, "error: have %1 but need %2", prog,
1977 ###### interp exec cases
1980 struct fieldref *f = cast(fieldref, e);
1981 struct value *lleft = linterp_exec(f->left);
1982 lrv = &lleft->structure.fields[f->index];
1988 struct fieldlist *prev;
1992 ###### ast functions
1993 static void free_fieldlist(struct fieldlist *f)
1997 free_fieldlist(f->prev);
1998 free_value(f->f.init);
2002 ###### top level grammar
2003 DeclareStruct -> struct IDENTIFIER FieldBlock ${ {
2005 add_type(c, $2.txt, &structure_prototype);
2007 struct fieldlist *f;
2009 for (f = $3; f; f=f->prev)
2012 t->structure.nfields = cnt;
2013 t->structure.fields = calloc(cnt, sizeof(struct field));
2017 t->structure.fields[cnt] = f->f;
2018 f->f.init = val_prepare(Tnone);
2022 | DeclareStruct NEWLINE
2032 FieldBlock -> Open FieldList } ${ $0 = $<2; }$
2033 | Open SimpleFieldList } ${ $0 = $<2; }$
2034 | : FieldList $$NEWLINE ${ $0 = $<2; }$
2036 FieldList -> FieldLines ${ $0 = $<1; }$
2037 | Newlines FieldLines ${ $0 = $<2; }$
2038 FieldLines -> SimpleFieldListLine ${ $0 = $<1; }$
2039 | FieldLines SimpleFieldListLine ${
2044 SimpleFieldListLine -> SimpleFieldList NEWLINE ${ $0 = $<1; }$
2045 | SimpleFieldListLine NEWLINE ${ $0 = $<1; }$
2046 | ERROR NEWLINE ${ tok_err(c, "Syntax error in struct field", &$1); }$
2048 SimpleFieldList -> Field ${ $0 = $<1; }$
2049 | SimpleFieldList ; Field ${
2053 | SimpleFieldList ; ${
2057 Field -> IDENTIFIER : Type = Expression ${ {
2060 $0 = calloc(1, sizeof(struct fieldlist));
2061 $0->f.name = $1.txt;
2063 $0->f.init = val_prepare($0->f.type);
2066 propagate_types($<5, c, &ok, $3, 0);
2071 $0->f.init = interp_exec($5);
2073 | IDENTIFIER : Type ${
2074 $0 = calloc(1, sizeof(struct fieldlist));
2075 $0->f.name = $1.txt;
2077 $0->f.init = val_init($3);
2080 ###### forward decls
2081 static void structure_print_type(struct type *t, FILE *f);
2083 ###### value functions
2084 static void structure_print_type(struct type *t, FILE *f)
2088 fprintf(f, "struct %.*s:\n", t->name.len, t->name.txt);
2090 for (i = 0; i < t->structure.nfields; i++) {
2091 struct field *fl = t->structure.fields + i;
2092 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2093 type_print(fl->type, f);
2094 if (fl->init.type->print) {
2096 if (fl->init.type == Tstr)
2098 print_value(fl->init);
2099 if (fl->init.type == Tstr)
2106 ###### print type decls
2111 while (target != 0) {
2113 for (t = context.typelist; t ; t=t->next)
2114 if (t->print_type_decl) {
2123 t->print_type_decl(t, stdout);
2129 ## Executables: the elements of code
2131 Each code element needs to be parsed, printed, analysed,
2132 interpreted, and freed. There are several, so let's just start with
2133 the easy ones and work our way up.
2137 We have already met values as separate objects. When manifest
2138 constants appear in the program text, that must result in an executable
2139 which has a constant value. So the `val` structure embeds a value in
2155 $0 = new_pos(val, $1);
2156 $0->val.type = Tbool;
2160 $0 = new_pos(val, $1);
2161 $0->val.type = Tbool;
2165 $0 = new_pos(val, $1);
2166 $0->val.type = Tnum;
2169 if (number_parse($0->val.num, tail, $1.txt) == 0)
2170 mpq_init($0->val.num);
2172 tok_err(c, "error: unsupported number suffix",
2177 $0 = new_pos(val, $1);
2178 $0->val.type = Tstr;
2181 string_parse(&$1, '\\', &$0->val.str, tail);
2183 tok_err(c, "error: unsupported string suffix",
2188 $0 = new_pos(val, $1);
2189 $0->val.type = Tstr;
2192 string_parse(&$1, '\\', &$0->val.str, tail);
2194 tok_err(c, "error: unsupported string suffix",
2199 ###### print exec cases
2202 struct val *v = cast(val, e);
2203 if (v->val.type == Tstr)
2205 print_value(v->val);
2206 if (v->val.type == Tstr)
2211 ###### propagate exec cases
2214 struct val *val = cast(val, prog);
2215 if (!type_compat(type, val->val.type, rules))
2216 type_err(c, "error: expected %1%r found %2",
2217 prog, type, rules, val->val.type);
2218 return val->val.type;
2221 ###### interp exec cases
2223 rv = dup_value(cast(val, e)->val);
2226 ###### ast functions
2227 static void free_val(struct val *v)
2235 ###### free exec cases
2236 case Xval: free_val(cast(val, e)); break;
2238 ###### ast functions
2239 // Move all nodes from 'b' to 'rv', reversing the order.
2240 // In 'b' 'left' is a list, and 'right' is the last node.
2241 // In 'rv', left' is the first node and 'right' is a list.
2242 static struct binode *reorder_bilist(struct binode *b)
2244 struct binode *rv = NULL;
2247 struct exec *t = b->right;
2251 b = cast(binode, b->left);
2261 Just as we used a `val` to wrap a value into an `exec`, we similarly
2262 need a `var` to wrap a `variable` into an exec. While each `val`
2263 contained a copy of the value, each `var` hold a link to the variable
2264 because it really is the same variable no matter where it appears.
2265 When a variable is used, we need to remember to follow the `->merged`
2266 link to find the primary instance.
2274 struct variable *var;
2280 VariableDecl -> IDENTIFIER : ${ {
2281 struct variable *v = var_decl(c, $1.txt);
2282 $0 = new_pos(var, $1);
2287 v = var_ref(c, $1.txt);
2289 type_err(c, "error: variable '%v' redeclared",
2291 type_err(c, "info: this is where '%v' was first declared",
2292 v->where_decl, NULL, 0, NULL);
2295 | IDENTIFIER :: ${ {
2296 struct variable *v = var_decl(c, $1.txt);
2297 $0 = new_pos(var, $1);
2303 v = var_ref(c, $1.txt);
2305 type_err(c, "error: variable '%v' redeclared",
2307 type_err(c, "info: this is where '%v' was first declared",
2308 v->where_decl, NULL, 0, NULL);
2311 | IDENTIFIER : Type ${ {
2312 struct variable *v = var_decl(c, $1.txt);
2313 $0 = new_pos(var, $1);
2318 v->val = val_prepare($<3);
2320 v = var_ref(c, $1.txt);
2322 type_err(c, "error: variable '%v' redeclared",
2324 type_err(c, "info: this is where '%v' was first declared",
2325 v->where_decl, NULL, 0, NULL);
2328 | IDENTIFIER :: Type ${ {
2329 struct variable *v = var_decl(c, $1.txt);
2330 $0 = new_pos(var, $1);
2335 v->val = val_prepare($<3);
2338 v = var_ref(c, $1.txt);
2340 type_err(c, "error: variable '%v' redeclared",
2342 type_err(c, "info: this is where '%v' was first declared",
2343 v->where_decl, NULL, 0, NULL);
2348 Variable -> IDENTIFIER ${ {
2349 struct variable *v = var_ref(c, $1.txt);
2350 $0 = new_pos(var, $1);
2352 /* This might be a label - allocate a var just in case */
2353 v = var_decl(c, $1.txt);
2355 v->val = val_prepare(Tnone);
2360 cast(var, $0)->var = v;
2365 Type -> IDENTIFIER ${
2366 $0 = find_type(c, $1.txt);
2369 "error: undefined type", &$1);
2376 ###### print exec cases
2379 struct var *v = cast(var, e);
2381 struct binding *b = v->var->name;
2382 printf("%.*s", b->name.len, b->name.txt);
2389 if (loc->type == Xvar) {
2390 struct var *v = cast(var, loc);
2392 struct binding *b = v->var->name;
2393 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2395 fputs("???", stderr); // NOTEST
2397 fputs("NOTVAR", stderr); // NOTEST
2400 ###### propagate exec cases
2404 struct var *var = cast(var, prog);
2405 struct variable *v = var->var;
2407 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2408 return Tnone; // NOTEST
2412 if (v->constant && (rules & Rnoconstant)) {
2413 type_err(c, "error: Cannot assign to a constant: %v",
2414 prog, NULL, 0, NULL);
2415 type_err(c, "info: name was defined as a constant here",
2416 v->where_decl, NULL, 0, NULL);
2419 if (v->val.type == Tnone && v->where_decl == prog)
2420 type_err(c, "error: variable used but not declared: %v",
2421 prog, NULL, 0, NULL);
2422 if (v->val.type == NULL) {
2423 if (type && *ok != 0) {
2424 v->val = val_prepare(type);
2425 v->where_set = prog;
2430 if (!type_compat(type, v->val.type, rules)) {
2431 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2432 type, rules, v->val.type);
2433 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2434 v->val.type, rules, NULL);
2441 ###### interp exec cases
2444 struct var *var = cast(var, e);
2445 struct variable *v = var->var;
2453 ###### ast functions
2455 static void free_var(struct var *v)
2460 ###### free exec cases
2461 case Xvar: free_var(cast(var, e)); break;
2463 ### Expressions: Conditional
2465 Our first user of the `binode` will be conditional expressions, which
2466 is a bit odd as they actually have three components. That will be
2467 handled by having 2 binodes for each expression. The conditional
2468 expression is the lowest precedence operatior, so it gets to define
2469 what an "Expression" is. The next level up is "BoolExpr", which
2472 Conditional expressions are of the form "value `if` condition `else`
2473 other_value". They associate to the right, so everything to the right
2474 of `else` is part of an else value, while only the BoolExpr to the
2475 left of `if` is the if values. Between `if` and `else` there is no
2476 room for ambiguity, so a full conditional expression is allowed in there.
2487 Expression -> Expression if Expression else Expression $$ifelse ${ {
2488 struct binode *b1 = new(binode);
2489 struct binode *b2 = new(binode);
2498 ## expression grammar
2500 ###### print binode cases
2503 b2 = cast(binode, b->right);
2504 if (bracket) printf("(");
2505 print_exec(b2->left, -1, bracket);
2507 print_exec(b->left, -1, bracket);
2509 print_exec(b2->right, -1, bracket);
2510 if (bracket) printf(")");
2513 ###### propagate binode cases
2516 /* cond must be Tbool, others must match */
2517 struct binode *b2 = cast(binode, b->right);
2520 propagate_types(b->left, c, ok, Tbool, 0);
2521 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2522 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2526 ###### interp binode cases
2529 struct binode *b2 = cast(binode, b->right);
2530 left = interp_exec(b->left);
2532 rv = interp_exec(b2->left);
2534 rv = interp_exec(b2->right);
2538 ### Expressions: Boolean
2540 The next class of expressions to use the `binode` will be Boolean
2541 expressions. As I haven't implemented precedence in the parser
2542 generator yet, we need different names for each precedence level used
2543 by expressions. The outer most or lowest level precedence after
2544 conditional expressions are Boolean operators which form an `BoolExpr`
2545 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2546 have `and then` and `or else` which only evaluate the second operand
2547 if the result would make a difference.
2556 ###### expr precedence
2561 ###### expression grammar
2562 | Expression or Expression ${ {
2563 struct binode *b = new(binode);
2569 | Expression or else Expression ${ {
2570 struct binode *b = new(binode);
2577 | Expression and Expression ${ {
2578 struct binode *b = new(binode);
2584 | Expression and then Expression ${ {
2585 struct binode *b = new(binode);
2592 | not Expression ${ {
2593 struct binode *b = new(binode);
2599 ###### print binode cases
2601 if (bracket) printf("(");
2602 print_exec(b->left, -1, bracket);
2604 print_exec(b->right, -1, bracket);
2605 if (bracket) printf(")");
2608 if (bracket) printf("(");
2609 print_exec(b->left, -1, bracket);
2610 printf(" and then ");
2611 print_exec(b->right, -1, bracket);
2612 if (bracket) printf(")");
2615 if (bracket) printf("(");
2616 print_exec(b->left, -1, bracket);
2618 print_exec(b->right, -1, bracket);
2619 if (bracket) printf(")");
2622 if (bracket) printf("(");
2623 print_exec(b->left, -1, bracket);
2624 printf(" or else ");
2625 print_exec(b->right, -1, bracket);
2626 if (bracket) printf(")");
2629 if (bracket) printf("(");
2631 print_exec(b->right, -1, bracket);
2632 if (bracket) printf(")");
2635 ###### propagate binode cases
2641 /* both must be Tbool, result is Tbool */
2642 propagate_types(b->left, c, ok, Tbool, 0);
2643 propagate_types(b->right, c, ok, Tbool, 0);
2644 if (type && type != Tbool)
2645 type_err(c, "error: %1 operation found where %2 expected", prog,
2649 ###### interp binode cases
2651 rv = interp_exec(b->left);
2652 right = interp_exec(b->right);
2653 rv.bool = rv.bool && right.bool;
2656 rv = interp_exec(b->left);
2658 rv = interp_exec(b->right);
2661 rv = interp_exec(b->left);
2662 right = interp_exec(b->right);
2663 rv.bool = rv.bool || right.bool;
2666 rv = interp_exec(b->left);
2668 rv = interp_exec(b->right);
2671 rv = interp_exec(b->right);
2675 ### Expressions: Comparison
2677 Of slightly higher precedence that Boolean expressions are
2679 A comparison takes arguments of any comparable type, but the two types must be
2682 To simplify the parsing we introduce an `eop` which can record an
2683 expression operator.
2690 ###### ast functions
2691 static void free_eop(struct eop *e)
2705 ###### expr precedence
2706 $LEFT < > <= >= == != CMPop
2708 ###### expression grammar
2709 | Expression CMPop Expression ${ {
2710 struct binode *b = new(binode);
2720 CMPop -> < ${ $0.op = Less; }$
2721 | > ${ $0.op = Gtr; }$
2722 | <= ${ $0.op = LessEq; }$
2723 | >= ${ $0.op = GtrEq; }$
2724 | == ${ $0.op = Eql; }$
2725 | != ${ $0.op = NEql; }$
2727 ###### print binode cases
2735 if (bracket) printf("(");
2736 print_exec(b->left, -1, bracket);
2738 case Less: printf(" < "); break;
2739 case LessEq: printf(" <= "); break;
2740 case Gtr: printf(" > "); break;
2741 case GtrEq: printf(" >= "); break;
2742 case Eql: printf(" == "); break;
2743 case NEql: printf(" != "); break;
2744 default: abort(); // NOTEST
2746 print_exec(b->right, -1, bracket);
2747 if (bracket) printf(")");
2750 ###### propagate binode cases
2757 /* Both must match but not be labels, result is Tbool */
2758 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2760 propagate_types(b->right, c, ok, t, 0);
2762 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2764 t = propagate_types(b->left, c, ok, t, 0);
2766 if (!type_compat(type, Tbool, 0))
2767 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2768 Tbool, rules, type);
2771 ###### interp binode cases
2780 left = interp_exec(b->left);
2781 right = interp_exec(b->right);
2782 cmp = value_cmp(left, right);
2785 case Less: rv.bool = cmp < 0; break;
2786 case LessEq: rv.bool = cmp <= 0; break;
2787 case Gtr: rv.bool = cmp > 0; break;
2788 case GtrEq: rv.bool = cmp >= 0; break;
2789 case Eql: rv.bool = cmp == 0; break;
2790 case NEql: rv.bool = cmp != 0; break;
2791 default: rv.bool = 0; break; // NOTEST
2796 ### Expressions: The rest
2798 The remaining expressions with the highest precedence are arithmetic
2799 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2800 The `Factor` is where the `Value` and `Variable` that we already have
2803 `+` and `-` are both infix and prefix operations (where they are
2804 absolute value and negation). These have different operator names.
2806 We also have a 'Bracket' operator which records where parentheses were
2807 found. This makes it easy to reproduce these when printing. Once
2808 precedence is handled better I might be able to discard this.
2817 ###### expr precedence
2822 ###### expression grammar
2823 | Expression Eop Expression ${ {
2824 struct binode *b = new(binode);
2831 | Expression Top Expression ${ {
2832 struct binode *b = new(binode);
2839 | ( Expression ) ${ {
2840 struct binode *b = new_pos(binode, $1);
2845 | Uop Expression ${ {
2846 struct binode *b = new(binode);
2851 | Value ${ $0 = $<1; }$
2852 | Variable ${ $0 = $<1; }$
2855 Eop -> + ${ $0.op = Plus; }$
2856 | - ${ $0.op = Minus; }$
2858 Uop -> + ${ $0.op = Absolute; }$
2859 | - ${ $0.op = Negate; }$
2861 Top -> * ${ $0.op = Times; }$
2862 | / ${ $0.op = Divide; }$
2863 | % ${ $0.op = Rem; }$
2864 | ++ ${ $0.op = Concat; }$
2866 ###### print binode cases
2873 if (bracket) printf("(");
2874 print_exec(b->left, indent, bracket);
2876 case Plus: fputs(" + ", stdout); break;
2877 case Minus: fputs(" - ", stdout); break;
2878 case Times: fputs(" * ", stdout); break;
2879 case Divide: fputs(" / ", stdout); break;
2880 case Rem: fputs(" % ", stdout); break;
2881 case Concat: fputs(" ++ ", stdout); break;
2882 default: abort(); // NOTEST
2884 print_exec(b->right, indent, bracket);
2885 if (bracket) printf(")");
2888 if (bracket) printf("(");
2890 print_exec(b->right, indent, bracket);
2891 if (bracket) printf(")");
2894 if (bracket) printf("(");
2896 print_exec(b->right, indent, bracket);
2897 if (bracket) printf(")");
2901 print_exec(b->right, indent, bracket);
2905 ###### propagate binode cases
2911 /* both must be numbers, result is Tnum */
2914 /* as propagate_types ignores a NULL,
2915 * unary ops fit here too */
2916 propagate_types(b->left, c, ok, Tnum, 0);
2917 propagate_types(b->right, c, ok, Tnum, 0);
2918 if (!type_compat(type, Tnum, 0))
2919 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2924 /* both must be Tstr, result is Tstr */
2925 propagate_types(b->left, c, ok, Tstr, 0);
2926 propagate_types(b->right, c, ok, Tstr, 0);
2927 if (!type_compat(type, Tstr, 0))
2928 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2933 return propagate_types(b->right, c, ok, type, 0);
2935 ###### interp binode cases
2938 rv = interp_exec(b->left);
2939 right = interp_exec(b->right);
2940 mpq_add(rv.num, rv.num, right.num);
2943 rv = interp_exec(b->left);
2944 right = interp_exec(b->right);
2945 mpq_sub(rv.num, rv.num, right.num);
2948 rv = interp_exec(b->left);
2949 right = interp_exec(b->right);
2950 mpq_mul(rv.num, rv.num, right.num);
2953 rv = interp_exec(b->left);
2954 right = interp_exec(b->right);
2955 mpq_div(rv.num, rv.num, right.num);
2960 left = interp_exec(b->left);
2961 right = interp_exec(b->right);
2962 mpz_init(l); mpz_init(r); mpz_init(rem);
2963 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2964 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2965 mpz_tdiv_r(rem, l, r);
2966 rv = val_init(Tnum);
2967 mpq_set_z(rv.num, rem);
2968 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2972 rv = interp_exec(b->right);
2973 mpq_neg(rv.num, rv.num);
2976 rv = interp_exec(b->right);
2977 mpq_abs(rv.num, rv.num);
2980 rv = interp_exec(b->right);
2983 left = interp_exec(b->left);
2984 right = interp_exec(b->right);
2986 rv.str = text_join(left.str, right.str);
2989 ###### value functions
2991 static struct text text_join(struct text a, struct text b)
2994 rv.len = a.len + b.len;
2995 rv.txt = malloc(rv.len);
2996 memcpy(rv.txt, a.txt, a.len);
2997 memcpy(rv.txt+a.len, b.txt, b.len);
3001 ### Blocks, Statements, and Statement lists.
3003 Now that we have expressions out of the way we need to turn to
3004 statements. There are simple statements and more complex statements.
3005 Simple statements do not contain (syntactic) newlines, complex statements do.
3007 Statements often come in sequences and we have corresponding simple
3008 statement lists and complex statement lists.
3009 The former comprise only simple statements separated by semicolons.
3010 The later comprise complex statements and simple statement lists. They are
3011 separated by newlines. Thus the semicolon is only used to separate
3012 simple statements on the one line. This may be overly restrictive,
3013 but I'm not sure I ever want a complex statement to share a line with
3016 Note that a simple statement list can still use multiple lines if
3017 subsequent lines are indented, so
3019 ###### Example: wrapped simple statement list
3024 is a single simple statement list. This might allow room for
3025 confusion, so I'm not set on it yet.
3027 A simple statement list needs no extra syntax. A complex statement
3028 list has two syntactic forms. It can be enclosed in braces (much like
3029 C blocks), or it can be introduced by a colon and continue until an
3030 unindented newline (much like Python blocks). With this extra syntax
3031 it is referred to as a block.
3033 Note that a block does not have to include any newlines if it only
3034 contains simple statements. So both of:
3036 if condition: a=b; d=f
3038 if condition { a=b; print f }
3042 In either case the list is constructed from a `binode` list with
3043 `Block` as the operator. When parsing the list it is most convenient
3044 to append to the end, so a list is a list and a statement. When using
3045 the list it is more convenient to consider a list to be a statement
3046 and a list. So we need a function to re-order a list.
3047 `reorder_bilist` serves this purpose.
3049 The only stand-alone statement we introduce at this stage is `pass`
3050 which does nothing and is represented as a `NULL` pointer in a `Block`
3051 list. Other stand-alone statements will follow once the infrastructure
3060 Block -> Open Statementlist Close ${ $0 = $<2; }$
3061 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
3062 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
3063 | : Statementlist $$OUT ${ $0 = $<2; }$
3065 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
3066 | Newlines ComplexStatements ${ $0 = reorder_bilist($<2); }$
3068 ComplexStatements -> ComplexStatements ComplexStatement ${
3078 | ComplexStatement ${
3090 ComplexStatement -> SimpleStatementLine ${
3091 $0 = reorder_bilist($<1);
3093 | ComplexStatement NEWLINE ${ $0 = $<1; }$
3094 ## ComplexStatement Grammar
3097 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3103 | SimpleStatement ${
3110 SimpleStatementLine -> SimpleStatements NEWLINE ${ $0 = $<1; }$
3111 | SimpleStatements ; NEWLINE ${ $0 = $<1; }$
3112 | SimpleStatementLine NEWLINE ${ $0 = $<1; }$
3114 SimpleStatement -> pass ${ $0 = NULL; }$
3115 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3116 ## SimpleStatement Grammar
3118 ###### print binode cases
3122 if (b->left == NULL)
3125 print_exec(b->left, indent, bracket);
3128 print_exec(b->right, indent, bracket);
3131 // block, one per line
3132 if (b->left == NULL)
3133 do_indent(indent, "pass\n");
3135 print_exec(b->left, indent, bracket);
3137 print_exec(b->right, indent, bracket);
3141 ###### propagate binode cases
3144 /* If any statement returns something other than Tnone
3145 * or Tbool then all such must return same type.
3146 * As each statement may be Tnone or something else,
3147 * we must always pass NULL (unknown) down, otherwise an incorrect
3148 * error might occur. We never return Tnone unless it is
3153 for (e = b; e; e = cast(binode, e->right)) {
3154 t = propagate_types(e->left, c, ok, NULL, rules);
3155 if ((rules & Rboolok) && t == Tbool)
3157 if (t && t != Tnone && t != Tbool) {
3161 type_err(c, "error: expected %1%r, found %2",
3162 e->left, type, rules, t);
3168 ###### interp binode cases
3170 while (rv.type == Tnone &&
3173 rv = interp_exec(b->left);
3174 b = cast(binode, b->right);
3178 ### The Print statement
3180 `print` is a simple statement that takes a comma-separated list of
3181 expressions and prints the values separated by spaces and terminated
3182 by a newline. No control of formatting is possible.
3184 `print` faces the same list-ordering issue as blocks, and uses the
3190 ###### SimpleStatement Grammar
3192 | print ExpressionList ${
3193 $0 = reorder_bilist($<2);
3195 | print ExpressionList , ${
3200 $0 = reorder_bilist($0);
3211 ExpressionList -> ExpressionList , Expression ${
3224 ###### print binode cases
3227 do_indent(indent, "print");
3231 print_exec(b->left, -1, bracket);
3235 b = cast(binode, b->right);
3241 ###### propagate binode cases
3244 /* don't care but all must be consistent */
3245 propagate_types(b->left, c, ok, NULL, Rnolabel);
3246 propagate_types(b->right, c, ok, NULL, Rnolabel);
3249 ###### interp binode cases
3255 for ( ; b; b = cast(binode, b->right))
3259 left = interp_exec(b->left);
3272 ###### Assignment statement
3274 An assignment will assign a value to a variable, providing it hasn't
3275 be declared as a constant. The analysis phase ensures that the type
3276 will be correct so the interpreter just needs to perform the
3277 calculation. There is a form of assignment which declares a new
3278 variable as well as assigning a value. If a name is assigned before
3279 it is declared, and error will be raised as the name is created as
3280 `Tlabel` and it is illegal to assign to such names.
3286 ###### SimpleStatement Grammar
3287 | Variable = Expression ${
3293 | VariableDecl = Expression ${
3301 if ($1->var->where_set == NULL) {
3303 "Variable declared with no type or value: %v",
3313 ###### print binode cases
3316 do_indent(indent, "");
3317 print_exec(b->left, indent, bracket);
3319 print_exec(b->right, indent, bracket);
3326 struct variable *v = cast(var, b->left)->var;
3327 do_indent(indent, "");
3328 print_exec(b->left, indent, bracket);
3329 if (cast(var, b->left)->var->constant) {
3330 if (v->where_decl == v->where_set) {
3332 type_print(v->val.type, stdout);
3337 if (v->where_decl == v->where_set) {
3339 type_print(v->val.type, stdout);
3346 print_exec(b->right, indent, bracket);
3353 ###### propagate binode cases
3357 /* Both must match and not be labels,
3358 * Type must support 'dup',
3359 * For Assign, left must not be constant.
3362 t = propagate_types(b->left, c, ok, NULL,
3363 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3368 if (propagate_types(b->right, c, ok, t, 0) != t)
3369 if (b->left->type == Xvar)
3370 type_err(c, "info: variable '%v' was set as %1 here.",
3371 cast(var, b->left)->var->where_set, t, rules, NULL);
3373 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3375 propagate_types(b->left, c, ok, t,
3376 (b->op == Assign ? Rnoconstant : 0));
3378 if (t && t->dup == NULL)
3379 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3384 ###### interp binode cases
3387 lleft = linterp_exec(b->left);
3388 right = interp_exec(b->right);
3393 free_value(right); // NOTEST
3399 struct variable *v = cast(var, b->left)->var;
3403 right = interp_exec(b->right);
3405 right = val_init(v->val.type);
3412 ### The `use` statement
3414 The `use` statement is the last "simple" statement. It is needed when
3415 the condition in a conditional statement is a block. `use` works much
3416 like `return` in C, but only completes the `condition`, not the whole
3422 ###### SimpleStatement Grammar
3424 $0 = new_pos(binode, $1);
3427 if ($0->right->type == Xvar) {
3428 struct var *v = cast(var, $0->right);
3429 if (v->var->val.type == Tnone) {
3430 /* Convert this to a label */
3431 v->var->val = val_prepare(Tlabel);
3432 v->var->val.label = &v->var->val;
3437 ###### print binode cases
3440 do_indent(indent, "use ");
3441 print_exec(b->right, -1, bracket);
3446 ###### propagate binode cases
3449 /* result matches value */
3450 return propagate_types(b->right, c, ok, type, 0);
3452 ###### interp binode cases
3455 rv = interp_exec(b->right);
3458 ### The Conditional Statement
3460 This is the biggy and currently the only complex statement. This
3461 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3462 It is comprised of a number of parts, all of which are optional though
3463 set combinations apply. Each part is (usually) a key word (`then` is
3464 sometimes optional) followed by either an expression or a code block,
3465 except the `casepart` which is a "key word and an expression" followed
3466 by a code block. The code-block option is valid for all parts and,
3467 where an expression is also allowed, the code block can use the `use`
3468 statement to report a value. If the code block does not report a value
3469 the effect is similar to reporting `True`.
3471 The `else` and `case` parts, as well as `then` when combined with
3472 `if`, can contain a `use` statement which will apply to some
3473 containing conditional statement. `for` parts, `do` parts and `then`
3474 parts used with `for` can never contain a `use`, except in some
3475 subordinate conditional statement.
3477 If there is a `forpart`, it is executed first, only once.
3478 If there is a `dopart`, then it is executed repeatedly providing
3479 always that the `condpart` or `cond`, if present, does not return a non-True
3480 value. `condpart` can fail to return any value if it simply executes
3481 to completion. This is treated the same as returning `True`.
3483 If there is a `thenpart` it will be executed whenever the `condpart`
3484 or `cond` returns True (or does not return any value), but this will happen
3485 *after* `dopart` (when present).
3487 If `elsepart` is present it will be executed at most once when the
3488 condition returns `False` or some value that isn't `True` and isn't
3489 matched by any `casepart`. If there are any `casepart`s, they will be
3490 executed when the condition returns a matching value.
3492 The particular sorts of values allowed in case parts has not yet been
3493 determined in the language design, so nothing is prohibited.
3495 The various blocks in this complex statement potentially provide scope
3496 for variables as described earlier. Each such block must include the
3497 "OpenScope" nonterminal before parsing the block, and must call
3498 `var_block_close()` when closing the block.
3500 The code following "`if`", "`switch`" and "`for`" does not get its own
3501 scope, but is in a scope covering the whole statement, so names
3502 declared there cannot be redeclared elsewhere. Similarly the
3503 condition following "`while`" is in a scope the covers the body
3504 ("`do`" part) of the loop, and which does not allow conditional scope
3505 extension. Code following "`then`" (both looping and non-looping),
3506 "`else`" and "`case`" each get their own local scope.
3508 The type requirements on the code block in a `whilepart` are quite
3509 unusal. It is allowed to return a value of some identifiable type, in
3510 which case the loop aborts and an appropriate `casepart` is run, or it
3511 can return a Boolean, in which case the loop either continues to the
3512 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3513 This is different both from the `ifpart` code block which is expected to
3514 return a Boolean, or the `switchpart` code block which is expected to
3515 return the same type as the casepart values. The correct analysis of
3516 the type of the `whilepart` code block is the reason for the
3517 `Rboolok` flag which is passed to `propagate_types()`.
3519 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3528 struct exec *action;
3529 struct casepart *next;
3531 struct cond_statement {
3533 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3534 struct casepart *casepart;
3537 ###### ast functions
3539 static void free_casepart(struct casepart *cp)
3543 free_exec(cp->value);
3544 free_exec(cp->action);
3551 static void free_cond_statement(struct cond_statement *s)
3555 free_exec(s->forpart);
3556 free_exec(s->condpart);
3557 free_exec(s->dopart);
3558 free_exec(s->thenpart);
3559 free_exec(s->elsepart);
3560 free_casepart(s->casepart);
3564 ###### free exec cases
3565 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3567 ###### ComplexStatement Grammar
3568 | CondStatement $$NEWLINE ${ $0 = $<1; }$
3573 // both ForThen and Whilepart open scopes, and CondSuffix only
3574 // closes one - so in the first branch here we have another to close.
3575 CondStatement -> ForPart ThenPart WhilePart CondSuffix ${
3579 $0->condpart = $3.condpart; $3.condpart = NULL;
3580 $0->dopart = $3.dopart; $3.dopart = NULL;
3581 var_block_close(c, CloseSequential);
3583 | ForPart WhilePart CondSuffix ${
3586 $0->thenpart = NULL;
3587 $0->condpart = $2.condpart; $2.condpart = NULL;
3588 $0->dopart = $2.dopart; $2.dopart = NULL;
3589 var_block_close(c, CloseSequential);
3591 | WhilePart CondSuffix ${
3593 $0->condpart = $1.condpart; $1.condpart = NULL;
3594 $0->dopart = $1.dopart; $1.dopart = NULL;
3596 | SwitchPart CasePart CondSuffix ${
3599 $2->next = $0->casepart;
3602 | IfPart IfSuffix ${
3604 $0->condpart = $1.condpart; $1.condpart = NULL;
3605 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3606 // This is where we close an "if" statement
3607 var_block_close(c, CloseSequential);
3610 CondSuffix -> IfSuffix ${
3612 // This is where we close scope of the whole
3613 // "for" or "while" statement
3614 var_block_close(c, CloseSequential);
3616 | CasePart CondSuffix ${
3618 $1->next = $0->casepart;
3623 CasePart -> case Expression OpenScope Block ${
3624 $0 = calloc(1,sizeof(struct casepart));
3627 var_block_close(c, CloseParallel);
3629 | CasePart NEWLINE ${ $0 = $<1; }$
3632 IfSuffix -> ${ $0 = new(cond_statement); }$
3633 | IfSuffix NEWLINE ${ $0 = $<1; }$
3634 | else OpenScope Block ${
3635 $0 = new(cond_statement);
3637 var_block_close(c, CloseElse);
3639 | else OpenScope SimpleStatements NEWLINE ${
3640 $0 = new(cond_statement);
3641 $0->elsepart = reorder_bilist($<3);
3642 var_block_close(c, CloseElse);
3644 | else OpenScope CondStatement ${
3645 $0 = new(cond_statement);
3647 var_block_close(c, CloseElse);
3651 // These scopes are closed in CondSuffix
3652 ForPart -> for OpenScope SimpleStatements ; ${
3653 $0 = reorder_bilist($<3);
3655 | for OpenScope SimpleStatements NEWLINE ${
3656 $0 = reorder_bilist($<3);
3658 | for OpenScope Block ${
3661 | ForPart NEWLINE ${ $0 = $<1; }$
3663 ThenPart -> then OpenScope SimpleStatements ; ${
3664 $0 = reorder_bilist($<3);
3665 var_block_close(c, CloseSequential);
3667 | then OpenScope SimpleStatements NEWLINE ${
3668 $0 = reorder_bilist($<3);
3669 var_block_close(c, CloseSequential);
3671 | then OpenScope Block ${
3673 var_block_close(c, CloseSequential);
3675 | ThenPart NEWLINE ${ $0 = $<1; }$
3677 // This scope is closed in CondSuffix
3678 WhileHead -> while OpenScope Block ${ $0 = $<3; }$
3679 | WhileHead NEWLINE ${ $0 = $<1; }$
3682 // This scope is closed in CondSuffix
3683 WhilePart -> while OpenScope Expression Block ${
3684 $0.type = Xcond_statement;
3688 | WhileHead do Block ${
3689 $0.type = Xcond_statement;
3693 | WhilePart NEWLINE ${ $0 = $<1; }$
3695 IfPart -> if OpenScope Expression OpenScope Block ${
3696 $0.type = Xcond_statement;
3699 var_block_close(c, CloseParallel);
3701 | if OpenScope Expression OpenScope then Block ${
3702 $0.type = Xcond_statement;
3705 var_block_close(c, CloseParallel);
3707 | if OpenScope Expression OpenScope then SimpleStatements ; ${
3708 $0.type = Xcond_statement;
3710 $0.thenpart = reorder_bilist($<6);
3711 var_block_close(c, CloseParallel);
3713 | if OpenScope Expression OpenScope then SimpleStatements NEWLINE ${
3714 $0.type = Xcond_statement;
3716 $0.thenpart = reorder_bilist($<6);
3717 var_block_close(c, CloseParallel);
3719 | if OpenScope Block then OpenScope Block ${
3720 $0.type = Xcond_statement;
3723 var_block_close(c, CloseParallel);
3725 | IfPart NEWLINE ${ $0 = $<1; }$
3728 // This scope is closed in CondSuffix
3729 SwitchPart -> switch OpenScope Expression ${
3732 | switch OpenScope Block ${
3735 | SwitchPart NEWLINE ${ $0 = $<1; }$
3737 ###### print exec cases
3739 case Xcond_statement:
3741 struct cond_statement *cs = cast(cond_statement, e);
3742 struct casepart *cp;
3744 do_indent(indent, "for");
3745 if (bracket) printf(" {\n"); else printf(":\n");
3746 print_exec(cs->forpart, indent+1, bracket);
3749 do_indent(indent, "} then {\n");
3751 do_indent(indent, "then:\n");
3752 print_exec(cs->thenpart, indent+1, bracket);
3754 if (bracket) do_indent(indent, "}\n");
3758 if (cs->condpart && cs->condpart->type == Xbinode &&
3759 cast(binode, cs->condpart)->op == Block) {
3761 do_indent(indent, "while {\n");
3763 do_indent(indent, "while:\n");
3764 print_exec(cs->condpart, indent+1, bracket);
3766 do_indent(indent, "} do {\n");
3768 do_indent(indent, "do:\n");
3769 print_exec(cs->dopart, indent+1, bracket);
3771 do_indent(indent, "}\n");
3773 do_indent(indent, "while ");
3774 print_exec(cs->condpart, 0, bracket);
3779 print_exec(cs->dopart, indent+1, bracket);
3781 do_indent(indent, "}\n");
3786 do_indent(indent, "switch");
3788 do_indent(indent, "if");
3789 if (cs->condpart && cs->condpart->type == Xbinode &&
3790 cast(binode, cs->condpart)->op == Block) {
3795 print_exec(cs->condpart, indent+1, bracket);
3797 do_indent(indent, "}\n");
3799 do_indent(indent, "then:\n");
3800 print_exec(cs->thenpart, indent+1, bracket);
3804 print_exec(cs->condpart, 0, bracket);
3810 print_exec(cs->thenpart, indent+1, bracket);
3812 do_indent(indent, "}\n");
3817 for (cp = cs->casepart; cp; cp = cp->next) {
3818 do_indent(indent, "case ");
3819 print_exec(cp->value, -1, 0);
3824 print_exec(cp->action, indent+1, bracket);
3826 do_indent(indent, "}\n");
3829 do_indent(indent, "else");
3834 print_exec(cs->elsepart, indent+1, bracket);
3836 do_indent(indent, "}\n");
3841 ###### propagate exec cases
3842 case Xcond_statement:
3844 // forpart and dopart must return Tnone
3845 // thenpart must return Tnone if there is a dopart,
3846 // otherwise it is like elsepart.
3848 // be bool if there is no casepart
3849 // match casepart->values if there is a switchpart
3850 // either be bool or match casepart->value if there
3852 // elsepart and casepart->action must match the return type
3853 // expected of this statement.
3854 struct cond_statement *cs = cast(cond_statement, prog);
3855 struct casepart *cp;
3857 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3858 if (!type_compat(Tnone, t, 0))
3860 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3861 if (!type_compat(Tnone, t, 0))
3864 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3865 if (!type_compat(Tnone, t, 0))
3868 if (cs->casepart == NULL)
3869 propagate_types(cs->condpart, c, ok, Tbool, 0);
3871 /* Condpart must match case values, with bool permitted */
3873 for (cp = cs->casepart;
3874 cp && !t; cp = cp->next)
3875 t = propagate_types(cp->value, c, ok, NULL, 0);
3876 if (!t && cs->condpart)
3877 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3878 // Now we have a type (I hope) push it down
3880 for (cp = cs->casepart; cp; cp = cp->next)
3881 propagate_types(cp->value, c, ok, t, 0);
3882 propagate_types(cs->condpart, c, ok, t, Rboolok);
3885 // (if)then, else, and case parts must return expected type.
3886 if (!cs->dopart && !type)
3887 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3889 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3890 for (cp = cs->casepart;
3893 type = propagate_types(cp->action, c, ok, NULL, rules);
3896 propagate_types(cs->thenpart, c, ok, type, rules);
3897 propagate_types(cs->elsepart, c, ok, type, rules);
3898 for (cp = cs->casepart; cp ; cp = cp->next)
3899 propagate_types(cp->action, c, ok, type, rules);
3905 ###### interp exec cases
3906 case Xcond_statement:
3908 struct value v, cnd;
3909 struct casepart *cp;
3910 struct cond_statement *c = cast(cond_statement, e);
3913 interp_exec(c->forpart);
3916 cnd = interp_exec(c->condpart);
3919 if (!(cnd.type == Tnone ||
3920 (cnd.type == Tbool && cnd.bool != 0)))
3922 // cnd is Tnone or Tbool, doesn't need to be freed
3924 interp_exec(c->dopart);
3927 rv = interp_exec(c->thenpart);
3928 if (rv.type != Tnone || !c->dopart)
3932 } while (c->dopart);
3934 for (cp = c->casepart; cp; cp = cp->next) {
3935 v = interp_exec(cp->value);
3936 if (value_cmp(v, cnd) == 0) {
3939 rv = interp_exec(cp->action);
3946 rv = interp_exec(c->elsepart);
3953 ### Top level structure
3955 All the language elements so far can be used in various places. Now
3956 it is time to clarify what those places are.
3958 At the top level of a file there will be a number of declarations.
3959 Many of the things that can be declared haven't been described yet,
3960 such as functions, procedures, imports, and probably more.
3961 For now there are two sorts of things that can appear at the top
3962 level. They are predefined constants, `struct` types, and the main
3963 program. While the syntax will allow the main program to appear
3964 multiple times, that will trigger an error if it is actually attempted.
3966 The various declarations do not return anything. They store the
3967 various declarations in the parse context.
3969 ###### Parser: grammar
3972 Ocean -> DeclarationList
3973 | Newlines DeclarationList
3975 DeclarationList -> Declaration
3976 | DeclarationList Declaration
3978 Declaration -> Declaration NEWLINE
3984 "error: unhandled parse error", &$1);
3987 ## top level grammar
3989 ### The `const` section
3991 As well as being defined in with the code that uses them, constants
3992 can be declared at the top level. These have full-file scope, so they
3993 are always `InScope`. The value of a top level constant can be given
3994 as an expression, and this is evaluated immediately rather than in the
3995 later interpretation stage. Once we add functions to the language, we
3996 will need rules concern which, if any, can be used to define a top
3999 Constants are defined in a section that starts with the reserved word
4000 `const` and then has a block with a list of assignment statements.
4001 For syntactic consistency, these must use the double-colon syntax to
4002 make it clear that they are constants. Type can also be given: if
4003 not, the type will be determined during analysis, as with other
4006 As the types constants are inserted at the head of a list, printing
4007 them in the same order that they were read is not straight forward.
4008 We take a quadratic approach here and count the number of constants
4009 (variables of depth 0), then count down from there, each time
4010 searching through for the Nth constant for decreasing N.
4012 ###### top level grammar
4014 DeclareConstant -> const Open ConstList Close
4015 | const Open SimpleConstList }
4016 | const : ConstList $$NEWLINE
4017 | const SimpleConstList NEWLINE
4019 ConstList -> ConstLines
4020 | Newlines ConstLines
4021 ConstLines -> ConstLines SimpleConstLine
4023 SimpleConstList -> SimpleConstList ; Const
4026 SimpleConstLine -> SimpleConstList NEWLINE
4027 | SimpleConstLine NEWLINE
4028 | ERROR NEWLINE ${ tok_err(c, "Syntax error in constant", &$1); }$
4031 CType -> Type ${ $0 = $<1; }$
4034 Const -> IDENTIFIER :: CType = Expression ${ {
4038 v = var_decl(c, $1.txt);
4040 struct var *var = new_pos(var, $1);
4041 v->where_decl = var;
4046 v = var_ref(c, $1.txt);
4047 tok_err(c, "error: name already declared", &$1);
4048 type_err(c, "info: this is where '%v' was first declared",
4049 v->where_decl, NULL, 0, NULL);
4053 propagate_types($5, c, &ok, $3, 0);
4058 v->val = interp_exec($5);
4062 ###### print const decls
4067 while (target != 0) {
4069 for (v = context.in_scope; v; v=v->in_scope)
4070 if (v->depth == 0) {
4081 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4082 type_print(v->val.type, stdout);
4084 if (v->val.type == Tstr)
4086 print_value(v->val);
4087 if (v->val.type == Tstr)
4095 ### Finally the whole program.
4097 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4098 the keyword "program" and a list of variable names which are assigned
4099 values from command line arguments. Following this is a `block` which
4100 is the code to execute. Unlike Pascal, constants and other
4101 declarations come *before* the program.
4103 As this is the top level, several things are handled a bit
4105 The whole program is not interpreted by `interp_exec` as that isn't
4106 passed the argument list which the program requires. Similarly type
4107 analysis is a bit more interesting at this level.
4112 ###### top level grammar
4114 DeclareProgram -> Program ${ {
4116 type_err(c, "Program defined a second time",
4123 Program -> program OpenScope Varlist Block ${
4126 $0->left = reorder_bilist($<3);
4128 var_block_close(c, CloseSequential);
4129 if (c->scope_stack && !c->parse_error) abort();
4132 Varlist -> Varlist ArgDecl ${
4141 ArgDecl -> IDENTIFIER ${ {
4142 struct variable *v = var_decl(c, $1.txt);
4149 ###### print binode cases
4151 do_indent(indent, "program");
4152 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4154 print_exec(b2->left, 0, 0);
4160 print_exec(b->right, indent+1, bracket);
4162 do_indent(indent, "}\n");
4165 ###### propagate binode cases
4166 case Program: abort(); // NOTEST
4168 ###### core functions
4170 static int analyse_prog(struct exec *prog, struct parse_context *c)
4172 struct binode *b = cast(binode, prog);
4179 propagate_types(b->right, c, &ok, Tnone, 0);
4184 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4185 struct var *v = cast(var, b->left);
4186 if (!v->var->val.type) {
4187 v->var->where_set = b;
4188 v->var->val = val_prepare(Tstr);
4191 b = cast(binode, prog);
4194 propagate_types(b->right, c, &ok, Tnone, 0);
4199 /* Make sure everything is still consistent */
4200 propagate_types(b->right, c, &ok, Tnone, 0);
4204 static void interp_prog(struct exec *prog, char **argv)
4206 struct binode *p = cast(binode, prog);
4212 al = cast(binode, p->left);
4214 struct var *v = cast(var, al->left);
4215 struct value *vl = &v->var->val;
4217 if (argv[0] == NULL) {
4218 printf("Not enough args\n");
4221 al = cast(binode, al->right);
4223 *vl = parse_value(vl->type, argv[0]);
4224 if (vl->type == NULL)
4228 v = interp_exec(p->right);
4232 ###### interp binode cases
4233 case Program: abort(); // NOTEST
4235 ## And now to test it out.
4237 Having a language requires having a "hello world" program. I'll
4238 provide a little more than that: a program that prints "Hello world"
4239 finds the GCD of two numbers, prints the first few elements of
4240 Fibonacci, performs a binary search for a number, and a few other
4241 things which will likely grow as the languages grows.
4243 ###### File: oceani.mk
4246 @echo "===== DEMO ====="
4247 ./oceani --section "demo: hello" oceani.mdc 55 33
4253 four ::= 2 + 2 ; five ::= 10/2
4254 const pie ::= "I like Pie";
4255 cake ::= "The cake is"
4264 print "Hello World, what lovely oceans you have!"
4265 print "Are there", five, "?"
4266 print pi, pie, "but", cake
4268 /* When a variable is defined in both branches of an 'if',
4269 * and used afterwards, the variables are merged.
4275 print "Is", A, "bigger than", B,"? ", bigger
4276 /* If a variable is not used after the 'if', no
4277 * merge happens, so types can be different
4280 double:string = "yes"
4281 print A, "is more than twice", B, "?", double
4284 print "double", B, "is", double
4289 if a > 0 and then b > 0:
4295 print "GCD of", A, "and", B,"is", a
4297 print a, "is not positive, cannot calculate GCD"
4299 print b, "is not positive, cannot calculate GCD"
4304 print "Fibonacci:", f1,f2,
4305 then togo = togo - 1
4313 /* Binary search... */
4318 mid := (lo + hi) / 2
4330 print "Yay, I found", target
4332 print "Closest I found was", mid
4337 // "middle square" PRNG. Not particularly good, but one my
4338 // Dad taught me - the first one I ever heard of.
4339 for i:=1; then i = i + 1; while i < size:
4340 n := list[i-1] * list[i-1]
4341 list[i] = (n / 100) % 10 000
4343 print "Before sort:",
4344 for i:=0; then i = i + 1; while i < size:
4348 for i := 1; then i=i+1; while i < size:
4349 for j:=i-1; then j=j-1; while j >= 0:
4350 if list[j] > list[j+1]:
4354 print " After sort:",
4355 for i:=0; then i = i + 1; while i < size:
4359 if 1 == 2 then print "yes"; else print "no"
4363 bob.alive = (bob.name == "Hello")
4364 print "bob", "is" if bob.alive else "isn't", "alive"