1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
126 ###### Parser: reduce
127 struct parse_context *c = config2context(config);
135 #include <sys/mman.h>
154 static char Usage[] =
155 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
170 struct section *s, *ss;
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_mark),
175 .number_chars = ".,_+- ",
180 int doprint=0, dotrace=0, doexec=1, brackets=0;
182 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
185 case 't': dotrace=1; break;
186 case 'p': doprint=1; break;
187 case 'n': doexec=0; break;
188 case 'b': brackets=1; break;
189 case 's': section = optarg; break;
190 default: fprintf(stderr, Usage);
194 if (optind >= argc) {
195 fprintf(stderr, "oceani: no input file given\n");
198 fd = open(argv[optind], O_RDONLY);
200 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
203 context.file_name = argv[optind];
204 len = lseek(fd, 0, 2);
205 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
206 s = code_extract(file, file+len, NULL);
208 fprintf(stderr, "oceani: could not find any code in %s\n",
213 ## context initialization
216 for (ss = s; ss; ss = ss->next) {
217 struct text sec = ss->section;
218 if (sec.len == strlen(section) &&
219 strncmp(sec.txt, section, sec.len) == 0)
223 fprintf(stderr, "oceani: cannot find section %s\n",
229 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
232 fprintf(stderr, "oceani: no program found.\n");
233 context.parse_error = 1;
235 if (context.prog && doprint) {
238 print_exec(context.prog, 0, brackets);
240 if (context.prog && doexec && !context.parse_error) {
241 if (!analyse_prog(context.prog, &context)) {
242 fprintf(stderr, "oceani: type error in program - not running.\n");
245 interp_prog(context.prog, argv+optind+1);
247 free_exec(context.prog);
250 struct section *t = s->next;
256 ## free context types
257 exit(context.parse_error ? 1 : 0);
262 The four requirements of parse, analyse, print, interpret apply to
263 each language element individually so that is how most of the code
266 Three of the four are fairly self explanatory. The one that requires
267 a little explanation is the analysis step.
269 The current language design does not require the types of variables to
270 be declared, but they must still have a single type. Different
271 operations impose different requirements on the variables, for example
272 addition requires both arguments to be numeric, and assignment
273 requires the variable on the left to have the same type as the
274 expression on the right.
276 Analysis involves propagating these type requirements around and
277 consequently setting the type of each variable. If any requirements
278 are violated (e.g. a string is compared with a number) or if a
279 variable needs to have two different types, then an error is raised
280 and the program will not run.
282 If the same variable is declared in both branchs of an 'if/else', or
283 in all cases of a 'switch' then the multiple instances may be merged
284 into just one variable if the variable is references after the
285 conditional statement. When this happens, the types must naturally be
286 consistent across all the branches. When the variable is not used
287 outside the if, the variables in the different branches are distinct
288 and can be of different types.
290 Determining the types of all variables early is important for
291 processing command line arguments. These can be assigned to any of
292 several types of variable, but we must first know the correct type so
293 any required conversion can happen. If a variable is associated with
294 a command line argument but no type can be interpreted (e.g. the
295 variable is only ever used in a `print` statement), then the type is
298 Undeclared names may only appear in "use" statements and "case" expressions.
299 These names are given a type of "label" and a unique value.
300 This allows them to fill the role of a name in an enumerated type, which
301 is useful for testing the `switch` statement.
303 As we will see, the condition part of a `while` statement can return
304 either a Boolean or some other type. This requires that the expected
305 type that gets passed around comprises a type and a flag to indicate
306 that `Tbool` is also permitted.
308 As there are, as yet, no distinct types that are compatible, there
309 isn't much subtlety in the analysis. When we have distinct number
310 types, this will become more interesting.
314 When analysis discovers an inconsistency it needs to report an error;
315 just refusing to run the code ensures that the error doesn't cascade,
316 but by itself it isn't very useful. A clear understanding of the sort
317 of error message that are useful will help guide the process of
320 At a simplistic level, the only sort of error that type analysis can
321 report is that the type of some construct doesn't match a contextual
322 requirement. For example, in `4 + "hello"` the addition provides a
323 contextual requirement for numbers, but `"hello"` is not a number. In
324 this particular example no further information is needed as the types
325 are obvious from local information. When a variable is involved that
326 isn't the case. It may be helpful to explain why the variable has a
327 particular type, by indicating the location where the type was set,
328 whether by declaration or usage.
330 Using a recursive-descent analysis we can easily detect a problem at
331 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
332 will detect that one argument is not a number and the usage of `hello`
333 will detect that a number was wanted, but not provided. In this
334 (early) version of the language, we will generate error reports at
335 multiple locations, so the use of `hello` will report an error and
336 explain were the value was set, and the addition will report an error
337 and say why numbers are needed. To be able to report locations for
338 errors, each language element will need to record a file location
339 (line and column) and each variable will need to record the language
340 element where its type was set. For now we will assume that each line
341 of an error message indicates one location in the file, and up to 2
342 types. So we provide a `printf`-like function which takes a format, a
343 language (a `struct exec` which has not yet been introduced), and 2
344 types. "`%1`" reports the first type, "`%2`" reports the second. We
345 will need a function to print the location, once we know how that is
346 stored. As will be explained later, there are sometimes extra rules for
347 type matching and they might affect error messages, we need to pass those
350 As well as type errors, we sometimes need to report problems with
351 tokens, which might be unexpected or might name a type that has not
352 been defined. For these we have `tok_err()` which reports an error
353 with a given token. Each of the error functions sets the flag in the
354 context so indicate that parsing failed.
358 static void fput_loc(struct exec *loc, FILE *f);
360 ###### core functions
362 static void type_err(struct parse_context *c,
363 char *fmt, struct exec *loc,
364 struct type *t1, int rules, struct type *t2)
366 fprintf(stderr, "%s:", c->file_name);
367 fput_loc(loc, stderr);
368 for (; *fmt ; fmt++) {
375 case '%': fputc(*fmt, stderr); break; // NOTEST
376 default: fputc('?', stderr); break; // NOTEST
378 type_print(t1, stderr);
381 type_print(t2, stderr);
390 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
392 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
393 t->txt.len, t->txt.txt);
397 ## Entities: declared and predeclared.
399 There are various "things" that the language and/or the interpreter
400 needs to know about to parse and execute a program. These include
401 types, variables, values, and executable code. These are all lumped
402 together under the term "entities" (calling them "objects" would be
403 confusing) and introduced here. These will introduced and described
404 here. The following section will present the different specific code
405 elements which comprise or manipulate these various entities.
409 Values come in a wide range of types, with more likely to be added.
410 Each type needs to be able to parse and print its own values (for
411 convenience at least) as well as to compare two values, at least for
412 equality and possibly for order. For now, values might need to be
413 duplicated and freed, though eventually such manipulations will be
414 better integrated into the language.
416 Rather than requiring every numeric type to support all numeric
417 operations (add, multiple, etc), we allow types to be able to present
418 as one of a few standard types: integer, float, and fraction. The
419 existence of these conversion functions eventaully enable types to
420 determine if they are compatible with other types, though such types
421 have not yet been implemented.
423 Named type are stored in a simple linked list. Objects of each type are "values"
424 which are often passed around by value.
431 ## value union fields
438 struct value (*init)(struct type *type);
439 struct value (*prepare)(struct type *type);
440 struct value (*parse)(struct type *type, char *str);
441 void (*print)(struct value val);
442 void (*print_type)(struct type *type, FILE *f);
443 int (*cmp_order)(struct value v1, struct value v2);
444 int (*cmp_eq)(struct value v1, struct value v2);
445 struct value (*dup)(struct value val);
446 void (*free)(struct value val);
447 void (*free_type)(struct type *t);
448 int (*compat)(struct type *this, struct type *other);
449 long long (*to_int)(struct value *v);
450 double (*to_float)(struct value *v);
451 int (*to_mpq)(mpq_t *q, struct value *v);
460 struct type *typelist;
464 static struct type *find_type(struct parse_context *c, struct text s)
466 struct type *l = c->typelist;
469 text_cmp(l->name, s) != 0)
474 static struct type *add_type(struct parse_context *c, struct text s,
479 n = calloc(1, sizeof(*n));
482 n->next = c->typelist;
487 static void free_type(struct type *t)
489 /* The type is always a reference to something in the
490 * context, so we don't need to free anything.
494 static void free_value(struct value v)
500 static int type_compat(struct type *require, struct type *have, int rules)
502 if ((rules & Rboolok) && have == Tbool)
504 if ((rules & Rnolabel) && have == Tlabel)
506 if (!require || !have)
510 return require->compat(require, have);
512 return require == have;
515 static void type_print(struct type *type, FILE *f)
518 fputs("*unknown*type*", f);
519 else if (type->name.len)
520 fprintf(f, "%.*s", type->name.len, type->name.txt);
521 else if (type->print_type)
522 type->print_type(type, f);
524 fputs("*invalid*type*", f); // NOTEST
527 static struct value val_prepare(struct type *type)
532 return type->prepare(type);
537 static struct value val_init(struct type *type)
542 return type->init(type);
547 static struct value dup_value(struct value v)
550 return v.type->dup(v);
554 static int value_cmp(struct value left, struct value right)
556 if (left.type && left.type->cmp_order)
557 return left.type->cmp_order(left, right);
558 if (left.type && left.type->cmp_eq)
559 return left.type->cmp_eq(left, right);
563 static void print_value(struct value v)
565 if (v.type && v.type->print)
568 printf("*Unknown*"); // NOTEST
571 static struct value parse_value(struct type *type, char *arg)
575 if (type && type->parse)
576 return type->parse(type, arg);
577 rv.type = NULL; // NOTEST
583 static void free_value(struct value v);
584 static int type_compat(struct type *require, struct type *have, int rules);
585 static void type_print(struct type *type, FILE *f);
586 static struct value val_init(struct type *type);
587 static struct value dup_value(struct value v);
588 static int value_cmp(struct value left, struct value right);
589 static void print_value(struct value v);
590 static struct value parse_value(struct type *type, char *arg);
592 ###### free context types
594 while (context.typelist) {
595 struct type *t = context.typelist;
597 context.typelist = t->next;
605 Values of the base types can be numbers, which we represent as
606 multi-precision fractions, strings, Booleans and labels. When
607 analysing the program we also need to allow for places where no value
608 is meaningful (type `Tnone`) and where we don't know what type to
609 expect yet (type is `NULL`).
611 Values are never shared, they are always copied when used, and freed
612 when no longer needed.
614 When propagating type information around the program, we need to
615 determine if two types are compatible, where type `NULL` is compatible
616 with anything. There are two special cases with type compatibility,
617 both related to the Conditional Statement which will be described
618 later. In some cases a Boolean can be accepted as well as some other
619 primary type, and in others any type is acceptable except a label (`Vlabel`).
620 A separate function encoding these cases will simplify some code later.
622 When assigning command line arguments to variables, we need to be able
623 to parse each type from a string.
625 The distinction beteen "prepare" and "init" needs to be explained.
626 "init" sets up an initial value, such as "zero" or the empty string.
627 "prepare" simply prepares the data structure so that if "free" gets
628 called on it, it won't do something silly. Normally a value will be
629 stored after "prepare" but before "free", but this might not happen if
634 #include "parse_string.h"
635 #include "parse_number.h"
638 myLDLIBS := libnumber.o libstring.o -lgmp
639 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
641 ###### type union fields
642 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
644 ###### value union fields
651 static void _free_value(struct value v)
653 switch (v.type->vtype) {
655 case Vstr: free(v.str.txt); break;
656 case Vnum: mpq_clear(v.num); break;
662 ###### value functions
664 static struct value _val_prepare(struct type *type)
669 switch(type->vtype) {
673 memset(&rv.num, 0, sizeof(rv.num));
689 static struct value _val_init(struct type *type)
694 switch(type->vtype) {
695 case Vnone: // NOTEST
698 mpq_init(rv.num); break;
700 rv.str.txt = malloc(1);
706 case Vlabel: // NOTEST
707 rv.label = NULL; // NOTEST
713 static struct value _dup_value(struct value v)
717 switch (rv.type->vtype) {
718 case Vnone: // NOTEST
728 mpq_set(rv.num, v.num);
731 rv.str.len = v.str.len;
732 rv.str.txt = malloc(rv.str.len);
733 memcpy(rv.str.txt, v.str.txt, v.str.len);
739 static int _value_cmp(struct value left, struct value right)
742 if (left.type != right.type)
743 return left.type - right.type; // NOTEST
744 switch (left.type->vtype) {
745 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
746 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
747 case Vstr: cmp = text_cmp(left.str, right.str); break;
748 case Vbool: cmp = left.bool - right.bool; break;
749 case Vnone: cmp = 0; // NOTEST
754 static void _print_value(struct value v)
756 switch (v.type->vtype) {
757 case Vnone: // NOTEST
758 printf("*no-value*"); break; // NOTEST
759 case Vlabel: // NOTEST
760 printf("*label-%p*", v.label); break; // NOTEST
762 printf("%.*s", v.str.len, v.str.txt); break;
764 printf("%s", v.bool ? "True":"False"); break;
769 mpf_set_q(fl, v.num);
770 gmp_printf("%Fg", fl);
777 static struct value _parse_value(struct type *type, char *arg)
785 switch(type->vtype) {
786 case Vlabel: // NOTEST
787 case Vnone: // NOTEST
788 val.type = NULL; // NOTEST
791 val.str.len = strlen(arg);
792 val.str.txt = malloc(val.str.len);
793 memcpy(val.str.txt, arg, val.str.len);
800 tx.txt = arg; tx.len = strlen(tx.txt);
801 if (number_parse(val.num, tail, tx) == 0)
804 mpq_neg(val.num, val.num);
806 printf("Unsupported suffix: %s\n", arg);
811 if (strcasecmp(arg, "true") == 0 ||
812 strcmp(arg, "1") == 0)
814 else if (strcasecmp(arg, "false") == 0 ||
815 strcmp(arg, "0") == 0)
818 printf("Bad bool: %s\n", arg);
826 static void _free_value(struct value v);
828 static struct type base_prototype = {
830 .prepare = _val_prepare,
831 .parse = _parse_value,
832 .print = _print_value,
833 .cmp_order = _value_cmp,
834 .cmp_eq = _value_cmp,
839 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
842 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
844 struct text txt = { n, strlen(n) };
847 t = add_type(c, txt, &base_prototype);
852 ###### context initialization
854 Tbool = add_base_type(&context, "Boolean", Vbool);
855 Tstr = add_base_type(&context, "string", Vstr);
856 Tnum = add_base_type(&context, "number", Vnum);
857 Tnone = add_base_type(&context, "none", Vnone);
858 Tlabel = add_base_type(&context, "label", Vlabel);
862 Variables are scoped named values. We store the names in a linked
863 list of "bindings" sorted lexically, and use sequential search and
870 struct binding *next; // in lexical order
874 This linked list is stored in the parse context so that "reduce"
875 functions can find or add variables, and so the analysis phase can
876 ensure that every variable gets a type.
880 struct binding *varlist; // In lexical order
884 static struct binding *find_binding(struct parse_context *c, struct text s)
886 struct binding **l = &c->varlist;
891 (cmp = text_cmp((*l)->name, s)) < 0)
895 n = calloc(1, sizeof(*n));
902 Each name can be linked to multiple variables defined in different
903 scopes. Each scope starts where the name is declared and continues
904 until the end of the containing code block. Scopes of a given name
905 cannot nest, so a declaration while a name is in-scope is an error.
907 ###### binding fields
908 struct variable *var;
912 struct variable *previous;
914 struct binding *name;
915 struct exec *where_decl;// where name was declared
916 struct exec *where_set; // where type was set
920 While the naming seems strange, we include local constants in the
921 definition of variables. A name declared `var := value` can
922 subsequently be changed, but a name declared `var ::= value` cannot -
925 ###### variable fields
928 Scopes in parallel branches can be partially merged. More
929 specifically, if a given name is declared in both branches of an
930 if/else then its scope is a candidate for merging. Similarly if
931 every branch of an exhaustive switch (e.g. has an "else" clause)
932 declares a given name, then the scopes from the branches are
933 candidates for merging.
935 Note that names declared inside a loop (which is only parallel to
936 itself) are never visible after the loop. Similarly names defined in
937 scopes which are not parallel, such as those started by `for` and
938 `switch`, are never visible after the scope. Only variables defined in
939 both `then` and `else` (including the implicit then after an `if`, and
940 excluding `then` used with `for`) and in all `case`s and `else` of a
941 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
943 Labels, which are a bit like variables, follow different rules.
944 Labels are not explicitly declared, but if an undeclared name appears
945 in a context where a label is legal, that effectively declares the
946 name as a label. The declaration remains in force (or in scope) at
947 least to the end of the immediately containing block and conditionally
948 in any larger containing block which does not declare the name in some
949 other way. Importantly, the conditional scope extension happens even
950 if the label is only used in one parallel branch of a conditional --
951 when used in one branch it is treated as having been declared in all
954 Merge candidates are tentatively visible beyond the end of the
955 branching statement which creates them. If the name is used, the
956 merge is affirmed and they become a single variable visible at the
957 outer layer. If not - if it is redeclared first - the merge lapses.
959 To track scopes we have an extra stack, implemented as a linked list,
960 which roughly parallels the parse stack and which is used exclusively
961 for scoping. When a new scope is opened, a new frame is pushed and
962 the child-count of the parent frame is incremented. This child-count
963 is used to distinguish between the first of a set of parallel scopes,
964 in which declared variables must not be in scope, and subsequent
965 branches, whether they must already be conditionally scoped.
967 To push a new frame *before* any code in the frame is parsed, we need a
968 grammar reduction. This is most easily achieved with a grammar
969 element which derives the empty string, and creates the new scope when
970 it is recognized. This can be placed, for example, between a keyword
971 like "if" and the code following it.
975 struct scope *parent;
981 struct scope *scope_stack;
984 static void scope_pop(struct parse_context *c)
986 struct scope *s = c->scope_stack;
988 c->scope_stack = s->parent;
993 static void scope_push(struct parse_context *c)
995 struct scope *s = calloc(1, sizeof(*s));
997 c->scope_stack->child_count += 1;
998 s->parent = c->scope_stack;
1000 c->scope_depth += 1;
1006 OpenScope -> ${ scope_push(c); }$
1008 Each variable records a scope depth and is in one of four states:
1010 - "in scope". This is the case between the declaration of the
1011 variable and the end of the containing block, and also between
1012 the usage with affirms a merge and the end of that block.
1014 The scope depth is not greater than the current parse context scope
1015 nest depth. When the block of that depth closes, the state will
1016 change. To achieve this, all "in scope" variables are linked
1017 together as a stack in nesting order.
1019 - "pending". The "in scope" block has closed, but other parallel
1020 scopes are still being processed. So far, every parallel block at
1021 the same level that has closed has declared the name.
1023 The scope depth is the depth of the last parallel block that
1024 enclosed the declaration, and that has closed.
1026 - "conditionally in scope". The "in scope" block and all parallel
1027 scopes have closed, and no further mention of the name has been
1028 seen. This state includes a secondary nest depth which records the
1029 outermost scope seen since the variable became conditionally in
1030 scope. If a use of the name is found, the variable becomes "in
1031 scope" and that secondary depth becomes the recorded scope depth.
1032 If the name is declared as a new variable, the old variable becomes
1033 "out of scope" and the recorded scope depth stays unchanged.
1035 - "out of scope". The variable is neither in scope nor conditionally
1036 in scope. It is permanently out of scope now and can be removed from
1037 the "in scope" stack.
1039 ###### variable fields
1040 int depth, min_depth;
1041 enum { OutScope, PendingScope, CondScope, InScope } scope;
1042 struct variable *in_scope;
1044 ###### parse context
1046 struct variable *in_scope;
1048 All variables with the same name are linked together using the
1049 'previous' link. Those variable that have
1050 been affirmatively merged all have a 'merged' pointer that points to
1051 one primary variable - the most recently declared instance. When
1052 merging variables, we need to also adjust the 'merged' pointer on any
1053 other variables that had previously been merged with the one that will
1054 no longer be primary.
1056 A variable that is no longer the most recent instance of a name may
1057 still have "pending" scope, if it might still be merged with most
1058 recent instance. These variables don't really belong in the
1059 "in_scope" list, but are not immediately removed when a new instance
1060 is found. Instead, they are detected and ignored when considering the
1061 list of in_scope names.
1063 ###### variable fields
1064 struct variable *merged;
1066 ###### ast functions
1068 static void variable_merge(struct variable *primary, struct variable *secondary)
1072 if (primary->merged)
1074 primary = primary->merged;
1076 for (v = primary->previous; v; v=v->previous)
1077 if (v == secondary || v == secondary->merged ||
1078 v->merged == secondary ||
1079 (v->merged && v->merged == secondary->merged)) {
1080 v->scope = OutScope;
1081 v->merged = primary;
1085 ###### free context vars
1087 while (context.varlist) {
1088 struct binding *b = context.varlist;
1089 struct variable *v = b->var;
1090 context.varlist = b->next;
1093 struct variable *t = v;
1098 // This is a global constant
1099 free_exec(t->where_decl);
1104 #### Manipulating Bindings
1106 When a name is conditionally visible, a new declaration discards the
1107 old binding - the condition lapses. Conversely a usage of the name
1108 affirms the visibility and extends it to the end of the containing
1109 block - i.e. the block that contains both the original declaration and
1110 the latest usage. This is determined from `min_depth`. When a
1111 conditionally visible variable gets affirmed like this, it is also
1112 merged with other conditionally visible variables with the same name.
1114 When we parse a variable declaration we either report an error if the
1115 name is currently bound, or create a new variable at the current nest
1116 depth if the name is unbound or bound to a conditionally scoped or
1117 pending-scope variable. If the previous variable was conditionally
1118 scoped, it and its homonyms becomes out-of-scope.
1120 When we parse a variable reference (including non-declarative
1121 assignment) we report an error if the name is not bound or is bound to
1122 a pending-scope variable; update the scope if the name is bound to a
1123 conditionally scoped variable; or just proceed normally if the named
1124 variable is in scope.
1126 When we exit a scope, any variables bound at this level are either
1127 marked out of scope or pending-scoped, depending on whether the scope
1128 was sequential or parallel. Here a "parallel" scope means the "then"
1129 or "else" part of a conditional, or any "case" or "else" branch of a
1130 switch. Other scopes are "sequential".
1132 When exiting a parallel scope we check if there are any variables that
1133 were previously pending and are still visible. If there are, then
1134 there weren't redeclared in the most recent scope, so they cannot be
1135 merged and must become out-of-scope. If it is not the first of
1136 parallel scopes (based on `child_count`), we check that there was a
1137 previous binding that is still pending-scope. If there isn't, the new
1138 variable must now be out-of-scope.
1140 When exiting a sequential scope that immediately enclosed parallel
1141 scopes, we need to resolve any pending-scope variables. If there was
1142 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1143 we need to mark all pending-scope variable as out-of-scope. Otherwise
1144 all pending-scope variables become conditionally scoped.
1147 enum closetype { CloseSequential, CloseParallel, CloseElse };
1149 ###### ast functions
1151 static struct variable *var_decl(struct parse_context *c, struct text s)
1153 struct binding *b = find_binding(c, s);
1154 struct variable *v = b->var;
1156 switch (v ? v->scope : OutScope) {
1158 /* Caller will report the error */
1162 v && v->scope == CondScope;
1164 v->scope = OutScope;
1168 v = calloc(1, sizeof(*v));
1169 v->previous = b->var;
1172 v->min_depth = v->depth = c->scope_depth;
1174 v->in_scope = c->in_scope;
1176 v->val = val_prepare(NULL);
1180 static struct variable *var_ref(struct parse_context *c, struct text s)
1182 struct binding *b = find_binding(c, s);
1183 struct variable *v = b->var;
1184 struct variable *v2;
1186 switch (v ? v->scope : OutScope) {
1189 /* Caller will report the error */
1192 /* All CondScope variables of this name need to be merged
1193 * and become InScope
1195 v->depth = v->min_depth;
1197 for (v2 = v->previous;
1198 v2 && v2->scope == CondScope;
1200 variable_merge(v, v2);
1208 static void var_block_close(struct parse_context *c, enum closetype ct)
1210 /* Close off all variables that are in_scope */
1211 struct variable *v, **vp, *v2;
1214 for (vp = &c->in_scope;
1215 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1217 if (v->name->var == v) switch (ct) {
1219 case CloseParallel: /* handle PendingScope */
1223 if (c->scope_stack->child_count == 1)
1224 v->scope = PendingScope;
1225 else if (v->previous &&
1226 v->previous->scope == PendingScope)
1227 v->scope = PendingScope;
1228 else if (v->val.type == Tlabel)
1229 v->scope = PendingScope;
1230 else if (v->name->var == v)
1231 v->scope = OutScope;
1232 if (ct == CloseElse) {
1233 /* All Pending variables with this name
1234 * are now Conditional */
1236 v2 && v2->scope == PendingScope;
1238 v2->scope = CondScope;
1243 v2 && v2->scope == PendingScope;
1245 if (v2->val.type != Tlabel)
1246 v2->scope = OutScope;
1248 case OutScope: break;
1251 case CloseSequential:
1252 if (v->val.type == Tlabel)
1253 v->scope = PendingScope;
1256 v->scope = OutScope;
1259 /* There was no 'else', so we can only become
1260 * conditional if we know the cases were exhaustive,
1261 * and that doesn't mean anything yet.
1262 * So only labels become conditional..
1265 v2 && v2->scope == PendingScope;
1267 if (v2->val.type == Tlabel) {
1268 v2->scope = CondScope;
1269 v2->min_depth = c->scope_depth;
1271 v2->scope = OutScope;
1274 case OutScope: break;
1278 if (v->scope == OutScope || v->name->var != v)
1287 Executables can be lots of different things. In many cases an
1288 executable is just an operation combined with one or two other
1289 executables. This allows for expressions and lists etc. Other times
1290 an executable is something quite specific like a constant or variable
1291 name. So we define a `struct exec` to be a general executable with a
1292 type, and a `struct binode` which is a subclass of `exec`, forms a
1293 node in a binary tree, and holds an operation. There will be other
1294 subclasses, and to access these we need to be able to `cast` the
1295 `exec` into the various other types.
1298 #define cast(structname, pointer) ({ \
1299 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1300 if (__mptr && *__mptr != X##structname) abort(); \
1301 (struct structname *)( (char *)__mptr);})
1303 #define new(structname) ({ \
1304 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1305 __ptr->type = X##structname; \
1306 __ptr->line = -1; __ptr->column = -1; \
1309 #define new_pos(structname, token) ({ \
1310 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1311 __ptr->type = X##structname; \
1312 __ptr->line = token.line; __ptr->column = token.col; \
1321 enum exec_types type;
1329 struct exec *left, *right;
1332 ###### ast functions
1334 static int __fput_loc(struct exec *loc, FILE *f)
1338 if (loc->line >= 0) {
1339 fprintf(f, "%d:%d: ", loc->line, loc->column);
1342 if (loc->type == Xbinode)
1343 return __fput_loc(cast(binode,loc)->left, f) ||
1344 __fput_loc(cast(binode,loc)->right, f);
1347 static void fput_loc(struct exec *loc, FILE *f)
1349 if (!__fput_loc(loc, f))
1350 fprintf(f, "??:??: "); // NOTEST
1353 Each different type of `exec` node needs a number of functions
1354 defined, a bit like methods. We must be able to be able to free it,
1355 print it, analyse it and execute it. Once we have specific `exec`
1356 types we will need to parse them too. Let's take this a bit more
1361 The parser generator requires a `free_foo` function for each struct
1362 that stores attributes and they will often be `exec`s and subtypes
1363 there-of. So we need `free_exec` which can handle all the subtypes,
1364 and we need `free_binode`.
1366 ###### ast functions
1368 static void free_binode(struct binode *b)
1373 free_exec(b->right);
1377 ###### core functions
1378 static void free_exec(struct exec *e)
1387 ###### forward decls
1389 static void free_exec(struct exec *e);
1391 ###### free exec cases
1392 case Xbinode: free_binode(cast(binode, e)); break;
1396 Printing an `exec` requires that we know the current indent level for
1397 printing line-oriented components. As will become clear later, we
1398 also want to know what sort of bracketing to use.
1400 ###### ast functions
1402 static void do_indent(int i, char *str)
1409 ###### core functions
1410 static void print_binode(struct binode *b, int indent, int bracket)
1414 ## print binode cases
1418 static void print_exec(struct exec *e, int indent, int bracket)
1424 print_binode(cast(binode, e), indent, bracket); break;
1429 ###### forward decls
1431 static void print_exec(struct exec *e, int indent, int bracket);
1435 As discussed, analysis involves propagating type requirements around
1436 the program and looking for errors.
1438 So `propagate_types` is passed an expected type (being a `struct type`
1439 pointer together with some `val_rules` flags) that the `exec` is
1440 expected to return, and returns the type that it does return, either
1441 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1442 by reference. It is set to `0` when an error is found, and `2` when
1443 any change is made. If it remains unchanged at `1`, then no more
1444 propagation is needed.
1448 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1452 if (rules & Rnolabel)
1453 fputs(" (labels not permitted)", stderr);
1456 ###### core functions
1458 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1459 struct type *type, int rules);
1460 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1461 struct type *type, int rules)
1468 switch (prog->type) {
1471 struct binode *b = cast(binode, prog);
1473 ## propagate binode cases
1477 ## propagate exec cases
1482 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1483 struct type *type, int rules)
1485 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1494 Interpreting an `exec` doesn't require anything but the `exec`. State
1495 is stored in variables and each variable will be directly linked from
1496 within the `exec` tree. The exception to this is the whole `program`
1497 which needs to look at command line arguments. The `program` will be
1498 interpreted separately.
1500 Each `exec` can return a value, which may be `Tnone` but must be
1501 non-NULL; Some `exec`s will return the location of a value, which can
1502 be updates. To support this, each exec case must store either a value
1503 in `val` or the pointer to a value in `lval`. If `lval` is set, but a
1504 simple value is required, `inter_exec()` will dereference `lval` to
1507 ###### core functions
1510 struct value val, *lval;
1513 static struct lrval _interp_exec(struct exec *e);
1515 static struct value interp_exec(struct exec *e)
1517 struct lrval ret = _interp_exec(e);
1520 return dup_value(*ret.lval);
1525 static struct value *linterp_exec(struct exec *e)
1527 struct lrval ret = _interp_exec(e);
1532 static struct lrval _interp_exec(struct exec *e)
1535 struct value rv, *lrv = NULL;
1546 struct binode *b = cast(binode, e);
1547 struct value left, right, *lleft;
1548 left.type = right.type = Tnone;
1550 ## interp binode cases
1552 free_value(left); free_value(right);
1555 ## interp exec cases
1564 Now that we have the shape of the interpreter in place we can add some
1565 complex types and connected them in to the data structures and the
1566 different phases of parse, analyse, print, interpret.
1568 Thus far we have arrays and structs.
1572 Arrays can be declared by giving a size and a type, as `[size]type' so
1573 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1574 size can be an arbitrary expression which is evaluated when the name
1577 Arrays cannot be assigned. When pointers are introduced we will also
1578 introduce array slices which can refer to part or all of an array -
1579 the assignment syntax will create a slice. For now, an array can only
1580 ever be referenced by the name it is declared with. It is likely that
1581 a "`copy`" primitive will eventually be define which can be used to
1582 make a copy of an array with controllable depth.
1584 ###### type union fields
1588 struct variable *vsize;
1589 struct type *member;
1592 ###### value union fields
1594 struct value *elmnts;
1597 ###### value functions
1599 static struct value array_prepare(struct type *type)
1604 ret.array.elmnts = NULL;
1608 static struct value array_init(struct type *type)
1614 if (type->array.vsize) {
1617 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1618 mpq_denref(type->array.vsize->val.num));
1619 type->array.size = mpz_get_si(q);
1622 ret.array.elmnts = calloc(type->array.size,
1623 sizeof(ret.array.elmnts[0]));
1624 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1625 ret.array.elmnts[i] = val_init(type->array.member);
1629 static void array_free(struct value val)
1633 if (val.array.elmnts)
1634 for (i = 0; i < val.type->array.size; i++)
1635 free_value(val.array.elmnts[i]);
1636 free(val.array.elmnts);
1639 static int array_compat(struct type *require, struct type *have)
1641 if (have->compat != require->compat)
1643 /* Both are arrays, so we can look at details */
1644 if (!type_compat(require->array.member, have->array.member, 0))
1646 if (require->array.vsize == NULL && have->array.vsize == NULL)
1647 return require->array.size == have->array.size;
1649 return require->array.vsize == have->array.vsize;
1652 static void array_print_type(struct type *type, FILE *f)
1655 if (type->array.vsize) {
1656 struct binding *b = type->array.vsize->name;
1657 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1659 fprintf(f, "%d]", type->array.size);
1660 type_print(type->array.member, f);
1663 static struct type array_prototype = {
1664 .prepare = array_prepare,
1666 .print_type = array_print_type,
1667 .compat = array_compat,
1673 | [ NUMBER ] Type ${
1674 $0 = calloc(1, sizeof(struct type));
1675 *($0) = array_prototype;
1676 $0->array.member = $<4;
1677 $0->array.vsize = NULL;
1681 if (number_parse(num, tail, $2.txt) == 0)
1682 tok_err(c, "error: unrecognised number", &$2);
1684 tok_err(c, "error: unsupported number suffix", &$2);
1686 $0->array.size = mpz_get_ui(mpq_numref(num));
1687 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1688 tok_err(c, "error: array size must be an integer",
1690 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1691 tok_err(c, "error: array size is too large",
1695 $0->next= c->anon_typelist;
1696 c->anon_typelist = $0;
1700 | [ IDENTIFIER ] Type ${ {
1701 struct variable *v = var_ref(c, $2.txt);
1704 tok_err(c, "error: name undeclared", &$2);
1705 else if (!v->constant)
1706 tok_err(c, "error: array size must be a constant", &$2);
1708 $0 = calloc(1, sizeof(struct type));
1709 *($0) = array_prototype;
1710 $0->array.member = $<4;
1712 $0->array.vsize = v;
1713 $0->next= c->anon_typelist;
1714 c->anon_typelist = $0;
1717 ###### parse context
1719 struct type *anon_typelist;
1721 ###### free context types
1723 while (context.anon_typelist) {
1724 struct type *t = context.anon_typelist;
1726 context.anon_typelist = t->next;
1733 ###### variable grammar
1735 | Variable [ Expression ] ${ {
1736 struct binode *b = new(binode);
1743 ###### print binode cases
1745 print_exec(b->left, -1, bracket);
1747 print_exec(b->right, -1, bracket);
1751 ###### propagate binode cases
1753 /* left must be an array, right must be a number,
1754 * result is the member type of the array
1756 propagate_types(b->right, c, ok, Tnum, 0);
1757 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1758 if (!t || t->compat != array_compat) {
1759 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1762 if (!type_compat(type, t->array.member, rules)) {
1763 type_err(c, "error: have %1 but need %2", prog,
1764 t->array.member, rules, type);
1766 return t->array.member;
1770 ###### interp binode cases
1775 lleft = linterp_exec(b->left);
1776 right = interp_exec(b->right);
1778 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1782 if (i >= 0 && i < lleft->type->array.size)
1783 lrv = &lleft->array.elmnts[i];
1785 rv = val_init(lleft->type->array.member);
1791 A `struct` is a data-type that contains one or more other data-types.
1792 It differs from an array in that each member can be of a different
1793 type, and they are accessed by name rather than by number. Thus you
1794 cannot choose an element by calculation, you need to know what you
1797 The language makes no promises about how a given structure will be
1798 stored in memory - it is free to rearrange fields to suit whatever
1799 criteria seems important.
1801 Structs are declared separately from program code - they cannot be
1802 declared in-line in a variable declaration like arrays can. A struct
1803 is given a name and this name is used to identify the type - the name
1804 is not prefixed by the word `struct` as it would be in C.
1806 Structs are only treated as the same if they have the same name.
1807 Simply having the same fields in the same order is not enough. This
1808 might change once we can create structure initializes from a list of
1811 Each component datum is identified much like a variable is declared,
1812 with a name, one or two colons, and a type. The type cannot be omitted
1813 as there is no opportunity to deduce the type from usage. An initial
1814 value can be given following an equals sign, so
1816 ##### Example: a struct type
1822 would declare a type called "complex" which has two number fields,
1823 each initialised to zero.
1825 Struct will need to be declared separately from the code that uses
1826 them, so we will need to be able to print out the declaration of a
1827 struct when reprinting the whole program. So a `print_type_decl` type
1828 function will be needed.
1830 ###### type union fields
1841 ###### value union fields
1843 struct value *fields;
1846 ###### type functions
1847 void (*print_type_decl)(struct type *type, FILE *f);
1849 ###### value functions
1851 static struct value structure_prepare(struct type *type)
1856 ret.structure.fields = NULL;
1860 static struct value structure_init(struct type *type)
1866 ret.structure.fields = calloc(type->structure.nfields,
1867 sizeof(ret.structure.fields[0]));
1868 for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
1869 ret.structure.fields[i] = val_init(type->structure.fields[i].type);
1873 static void structure_free(struct value val)
1877 if (val.structure.fields)
1878 for (i = 0; i < val.type->structure.nfields; i++)
1879 free_value(val.structure.fields[i]);
1880 free(val.structure.fields);
1883 static void structure_free_type(struct type *t)
1886 for (i = 0; i < t->structure.nfields; i++)
1887 free_value(t->structure.fields[i].init);
1888 free(t->structure.fields);
1891 static struct type structure_prototype = {
1892 .prepare = structure_prepare,
1893 .init = structure_init,
1894 .free = structure_free,
1895 .free_type = structure_free_type,
1896 .print_type_decl = structure_print_type,
1910 ###### free exec cases
1912 free_exec(cast(fieldref, e)->left);
1916 ###### variable grammar
1918 | Variable . IDENTIFIER ${ {
1919 struct fieldref *fr = new_pos(fieldref, $2);
1926 ###### print exec cases
1930 struct fieldref *f = cast(fieldref, e);
1931 print_exec(f->left, -1, bracket);
1932 printf(".%.*s", f->name.len, f->name.txt);
1936 ###### ast functions
1937 static int find_struct_index(struct type *type, struct text field)
1940 for (i = 0; i < type->structure.nfields; i++)
1941 if (text_cmp(type->structure.fields[i].name, field) == 0)
1946 ###### propagate exec cases
1950 struct fieldref *f = cast(fieldref, prog);
1951 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1954 type_err(c, "error: unknown type for field access", f->left,
1956 else if (st->prepare != structure_prepare)
1957 type_err(c, "error: field reference attempted on %1, not a struct",
1958 f->left, st, 0, NULL);
1959 else if (f->index == -2) {
1960 f->index = find_struct_index(st, f->name);
1962 type_err(c, "error: cannot find requested field in %1",
1963 f->left, st, 0, NULL);
1965 if (f->index >= 0) {
1966 struct type *ft = st->structure.fields[f->index].type;
1967 if (!type_compat(type, ft, rules))
1968 type_err(c, "error: have %1 but need %2", prog,
1975 ###### interp exec cases
1978 struct fieldref *f = cast(fieldref, e);
1979 struct value *lleft = linterp_exec(f->left);
1980 lrv = &lleft->structure.fields[f->index];
1986 struct fieldlist *prev;
1990 ###### ast functions
1991 static void free_fieldlist(struct fieldlist *f)
1995 free_fieldlist(f->prev);
1996 free_value(f->f.init);
2000 ###### top level grammar
2001 DeclareStruct -> struct IDENTIFIER FieldBlock ${ {
2003 add_type(c, $2.txt, &structure_prototype);
2005 struct fieldlist *f;
2007 for (f = $3; f; f=f->prev)
2010 t->structure.nfields = cnt;
2011 t->structure.fields = calloc(cnt, sizeof(struct field));
2015 t->structure.fields[cnt] = f->f;
2016 f->f.init = val_prepare(Tnone);
2020 | DeclareStruct NEWLINE
2030 FieldBlock -> Open FieldList } ${ $0 = $<2; }$
2031 | Open SimpleFieldList } ${ $0 = $<2; }$
2032 | : FieldList ${ $0 = $<2; }$
2034 FieldList -> FieldLines ${ $0 = $<1; }$
2035 | Newlines FieldLines ${ $0 = $<2; }$
2036 FieldLines -> SimpleFieldListLine ${ $0 = $<1; }$
2037 | FieldLines SimpleFieldListLine ${
2042 SimpleFieldListLine -> SimpleFieldList NEWLINE ${ $0 = $<1; }$
2043 | SimpleFieldListLine NEWLINE ${ $0 = $<1; }$
2044 | ERROR NEWLINE ${ tok_err(c, "Syntax error in struct field", &$1); }$
2046 SimpleFieldList -> Field ${ $0 = $<1; }$
2047 | SimpleFieldList ; Field ${
2051 | SimpleFieldList ; ${
2055 Field -> IDENTIFIER : Type = Expression ${ {
2058 $0 = calloc(1, sizeof(struct fieldlist));
2059 $0->f.name = $1.txt;
2061 $0->f.init = val_prepare($0->f.type);
2064 propagate_types($<5, c, &ok, $3, 0);
2069 $0->f.init = interp_exec($5);
2071 | IDENTIFIER : Type ${
2072 $0 = calloc(1, sizeof(struct fieldlist));
2073 $0->f.name = $1.txt;
2075 $0->f.init = val_init($3);
2078 ###### forward decls
2079 static void structure_print_type(struct type *t, FILE *f);
2081 ###### value functions
2082 static void structure_print_type(struct type *t, FILE *f)
2086 fprintf(f, "struct %.*s:\n", t->name.len, t->name.txt);
2088 for (i = 0; i < t->structure.nfields; i++) {
2089 struct field *fl = t->structure.fields + i;
2090 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2091 type_print(fl->type, f);
2092 if (fl->init.type->print) {
2094 if (fl->init.type == Tstr)
2096 print_value(fl->init);
2097 if (fl->init.type == Tstr)
2104 ###### print type decls
2109 while (target != 0) {
2111 for (t = context.typelist; t ; t=t->next)
2112 if (t->print_type_decl) {
2121 t->print_type_decl(t, stdout);
2127 ## Executables: the elements of code
2129 Each code element needs to be parsed, printed, analysed,
2130 interpreted, and freed. There are several, so let's just start with
2131 the easy ones and work our way up.
2135 We have already met values as separate objects. When manifest
2136 constants appear in the program text, that must result in an executable
2137 which has a constant value. So the `val` structure embeds a value in
2153 $0 = new_pos(val, $1);
2154 $0->val.type = Tbool;
2158 $0 = new_pos(val, $1);
2159 $0->val.type = Tbool;
2163 $0 = new_pos(val, $1);
2164 $0->val.type = Tnum;
2167 if (number_parse($0->val.num, tail, $1.txt) == 0)
2168 mpq_init($0->val.num);
2170 tok_err(c, "error: unsupported number suffix",
2175 $0 = new_pos(val, $1);
2176 $0->val.type = Tstr;
2179 string_parse(&$1, '\\', &$0->val.str, tail);
2181 tok_err(c, "error: unsupported string suffix",
2186 $0 = new_pos(val, $1);
2187 $0->val.type = Tstr;
2190 string_parse(&$1, '\\', &$0->val.str, tail);
2192 tok_err(c, "error: unsupported string suffix",
2197 ###### print exec cases
2200 struct val *v = cast(val, e);
2201 if (v->val.type == Tstr)
2203 print_value(v->val);
2204 if (v->val.type == Tstr)
2209 ###### propagate exec cases
2212 struct val *val = cast(val, prog);
2213 if (!type_compat(type, val->val.type, rules))
2214 type_err(c, "error: expected %1%r found %2",
2215 prog, type, rules, val->val.type);
2216 return val->val.type;
2219 ###### interp exec cases
2221 rv = dup_value(cast(val, e)->val);
2224 ###### ast functions
2225 static void free_val(struct val *v)
2233 ###### free exec cases
2234 case Xval: free_val(cast(val, e)); break;
2236 ###### ast functions
2237 // Move all nodes from 'b' to 'rv', reversing the order.
2238 // In 'b' 'left' is a list, and 'right' is the last node.
2239 // In 'rv', left' is the first node and 'right' is a list.
2240 static struct binode *reorder_bilist(struct binode *b)
2242 struct binode *rv = NULL;
2245 struct exec *t = b->right;
2249 b = cast(binode, b->left);
2259 Just as we used a `val` to wrap a value into an `exec`, we similarly
2260 need a `var` to wrap a `variable` into an exec. While each `val`
2261 contained a copy of the value, each `var` hold a link to the variable
2262 because it really is the same variable no matter where it appears.
2263 When a variable is used, we need to remember to follow the `->merged`
2264 link to find the primary instance.
2272 struct variable *var;
2278 VariableDecl -> IDENTIFIER : ${ {
2279 struct variable *v = var_decl(c, $1.txt);
2280 $0 = new_pos(var, $1);
2285 v = var_ref(c, $1.txt);
2287 type_err(c, "error: variable '%v' redeclared",
2289 type_err(c, "info: this is where '%v' was first declared",
2290 v->where_decl, NULL, 0, NULL);
2293 | IDENTIFIER :: ${ {
2294 struct variable *v = var_decl(c, $1.txt);
2295 $0 = new_pos(var, $1);
2301 v = var_ref(c, $1.txt);
2303 type_err(c, "error: variable '%v' redeclared",
2305 type_err(c, "info: this is where '%v' was first declared",
2306 v->where_decl, NULL, 0, NULL);
2309 | IDENTIFIER : Type ${ {
2310 struct variable *v = var_decl(c, $1.txt);
2311 $0 = new_pos(var, $1);
2316 v->val = val_prepare($<3);
2318 v = var_ref(c, $1.txt);
2320 type_err(c, "error: variable '%v' redeclared",
2322 type_err(c, "info: this is where '%v' was first declared",
2323 v->where_decl, NULL, 0, NULL);
2326 | IDENTIFIER :: Type ${ {
2327 struct variable *v = var_decl(c, $1.txt);
2328 $0 = new_pos(var, $1);
2333 v->val = val_prepare($<3);
2336 v = var_ref(c, $1.txt);
2338 type_err(c, "error: variable '%v' redeclared",
2340 type_err(c, "info: this is where '%v' was first declared",
2341 v->where_decl, NULL, 0, NULL);
2346 Variable -> IDENTIFIER ${ {
2347 struct variable *v = var_ref(c, $1.txt);
2348 $0 = new_pos(var, $1);
2350 /* This might be a label - allocate a var just in case */
2351 v = var_decl(c, $1.txt);
2353 v->val = val_prepare(Tnone);
2358 cast(var, $0)->var = v;
2363 Type -> IDENTIFIER ${
2364 $0 = find_type(c, $1.txt);
2367 "error: undefined type", &$1);
2374 ###### print exec cases
2377 struct var *v = cast(var, e);
2379 struct binding *b = v->var->name;
2380 printf("%.*s", b->name.len, b->name.txt);
2387 if (loc->type == Xvar) {
2388 struct var *v = cast(var, loc);
2390 struct binding *b = v->var->name;
2391 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2393 fputs("???", stderr); // NOTEST
2395 fputs("NOTVAR", stderr); // NOTEST
2398 ###### propagate exec cases
2402 struct var *var = cast(var, prog);
2403 struct variable *v = var->var;
2405 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2406 return Tnone; // NOTEST
2410 if (v->constant && (rules & Rnoconstant)) {
2411 type_err(c, "error: Cannot assign to a constant: %v",
2412 prog, NULL, 0, NULL);
2413 type_err(c, "info: name was defined as a constant here",
2414 v->where_decl, NULL, 0, NULL);
2417 if (v->val.type == Tnone && v->where_decl == prog)
2418 type_err(c, "error: variable used but not declared: %v",
2419 prog, NULL, 0, NULL);
2420 if (v->val.type == NULL) {
2421 if (type && *ok != 0) {
2422 v->val = val_prepare(type);
2423 v->where_set = prog;
2428 if (!type_compat(type, v->val.type, rules)) {
2429 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2430 type, rules, v->val.type);
2431 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2432 v->val.type, rules, NULL);
2439 ###### interp exec cases
2442 struct var *var = cast(var, e);
2443 struct variable *v = var->var;
2451 ###### ast functions
2453 static void free_var(struct var *v)
2458 ###### free exec cases
2459 case Xvar: free_var(cast(var, e)); break;
2461 ### Expressions: Conditional
2463 Our first user of the `binode` will be conditional expressions, which
2464 is a bit odd as they actually have three components. That will be
2465 handled by having 2 binodes for each expression. The conditional
2466 expression is the lowest precedence operatior, so it gets to define
2467 what an "Expression" is. The next level up is "BoolExpr", which
2470 Conditional expressions are of the form "value `if` condition `else`
2471 other_value". They associate to the right, so everything to the right
2472 of `else` is part of an else value, while only the BoolExpr to the
2473 left of `if` is the if values. Between `if` and `else` there is no
2474 room for ambiguity, so a full conditional expression is allowed in there.
2485 Expression -> Expression if Expression else Expression $$ifelse ${ {
2486 struct binode *b1 = new(binode);
2487 struct binode *b2 = new(binode);
2496 ## expression grammar
2498 ###### print binode cases
2501 b2 = cast(binode, b->right);
2502 if (bracket) printf("(");
2503 print_exec(b2->left, -1, bracket);
2505 print_exec(b->left, -1, bracket);
2507 print_exec(b2->right, -1, bracket);
2508 if (bracket) printf(")");
2511 ###### propagate binode cases
2514 /* cond must be Tbool, others must match */
2515 struct binode *b2 = cast(binode, b->right);
2518 propagate_types(b->left, c, ok, Tbool, 0);
2519 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2520 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2524 ###### interp binode cases
2527 struct binode *b2 = cast(binode, b->right);
2528 left = interp_exec(b->left);
2530 rv = interp_exec(b2->left);
2532 rv = interp_exec(b2->right);
2536 ### Expressions: Boolean
2538 The next class of expressions to use the `binode` will be Boolean
2539 expressions. As I haven't implemented precedence in the parser
2540 generator yet, we need different names for each precedence level used
2541 by expressions. The outer most or lowest level precedence after
2542 conditional expressions are Boolean operators which form an `BoolExpr`
2543 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2544 have `and then` and `or else` which only evaluate the second operand
2545 if the result would make a difference.
2554 ###### expr precedence
2559 ###### expression grammar
2560 | Expression or Expression ${ {
2561 struct binode *b = new(binode);
2567 | Expression or else Expression ${ {
2568 struct binode *b = new(binode);
2575 | Expression and Expression ${ {
2576 struct binode *b = new(binode);
2582 | Expression and then Expression ${ {
2583 struct binode *b = new(binode);
2590 | not Expression ${ {
2591 struct binode *b = new(binode);
2597 ###### print binode cases
2599 if (bracket) printf("(");
2600 print_exec(b->left, -1, bracket);
2602 print_exec(b->right, -1, bracket);
2603 if (bracket) printf(")");
2606 if (bracket) printf("(");
2607 print_exec(b->left, -1, bracket);
2608 printf(" and then ");
2609 print_exec(b->right, -1, bracket);
2610 if (bracket) printf(")");
2613 if (bracket) printf("(");
2614 print_exec(b->left, -1, bracket);
2616 print_exec(b->right, -1, bracket);
2617 if (bracket) printf(")");
2620 if (bracket) printf("(");
2621 print_exec(b->left, -1, bracket);
2622 printf(" or else ");
2623 print_exec(b->right, -1, bracket);
2624 if (bracket) printf(")");
2627 if (bracket) printf("(");
2629 print_exec(b->right, -1, bracket);
2630 if (bracket) printf(")");
2633 ###### propagate binode cases
2639 /* both must be Tbool, result is Tbool */
2640 propagate_types(b->left, c, ok, Tbool, 0);
2641 propagate_types(b->right, c, ok, Tbool, 0);
2642 if (type && type != Tbool)
2643 type_err(c, "error: %1 operation found where %2 expected", prog,
2647 ###### interp binode cases
2649 rv = interp_exec(b->left);
2650 right = interp_exec(b->right);
2651 rv.bool = rv.bool && right.bool;
2654 rv = interp_exec(b->left);
2656 rv = interp_exec(b->right);
2659 rv = interp_exec(b->left);
2660 right = interp_exec(b->right);
2661 rv.bool = rv.bool || right.bool;
2664 rv = interp_exec(b->left);
2666 rv = interp_exec(b->right);
2669 rv = interp_exec(b->right);
2673 ### Expressions: Comparison
2675 Of slightly higher precedence that Boolean expressions are
2677 A comparison takes arguments of any comparable type, but the two types must be
2680 To simplify the parsing we introduce an `eop` which can record an
2681 expression operator.
2688 ###### ast functions
2689 static void free_eop(struct eop *e)
2703 ###### expr precedence
2704 $LEFT < > <= >= == != CMPop
2706 ###### expression grammar
2707 | Expression CMPop Expression ${ {
2708 struct binode *b = new(binode);
2718 CMPop -> < ${ $0.op = Less; }$
2719 | > ${ $0.op = Gtr; }$
2720 | <= ${ $0.op = LessEq; }$
2721 | >= ${ $0.op = GtrEq; }$
2722 | == ${ $0.op = Eql; }$
2723 | != ${ $0.op = NEql; }$
2725 ###### print binode cases
2733 if (bracket) printf("(");
2734 print_exec(b->left, -1, bracket);
2736 case Less: printf(" < "); break;
2737 case LessEq: printf(" <= "); break;
2738 case Gtr: printf(" > "); break;
2739 case GtrEq: printf(" >= "); break;
2740 case Eql: printf(" == "); break;
2741 case NEql: printf(" != "); break;
2742 default: abort(); // NOTEST
2744 print_exec(b->right, -1, bracket);
2745 if (bracket) printf(")");
2748 ###### propagate binode cases
2755 /* Both must match but not be labels, result is Tbool */
2756 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2758 propagate_types(b->right, c, ok, t, 0);
2760 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2762 t = propagate_types(b->left, c, ok, t, 0);
2764 if (!type_compat(type, Tbool, 0))
2765 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2766 Tbool, rules, type);
2769 ###### interp binode cases
2778 left = interp_exec(b->left);
2779 right = interp_exec(b->right);
2780 cmp = value_cmp(left, right);
2783 case Less: rv.bool = cmp < 0; break;
2784 case LessEq: rv.bool = cmp <= 0; break;
2785 case Gtr: rv.bool = cmp > 0; break;
2786 case GtrEq: rv.bool = cmp >= 0; break;
2787 case Eql: rv.bool = cmp == 0; break;
2788 case NEql: rv.bool = cmp != 0; break;
2789 default: rv.bool = 0; break; // NOTEST
2794 ### Expressions: The rest
2796 The remaining expressions with the highest precedence are arithmetic
2797 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2798 The `Factor` is where the `Value` and `Variable` that we already have
2801 `+` and `-` are both infix and prefix operations (where they are
2802 absolute value and negation). These have different operator names.
2804 We also have a 'Bracket' operator which records where parentheses were
2805 found. This makes it easy to reproduce these when printing. Once
2806 precedence is handled better I might be able to discard this.
2815 ###### expr precedence
2820 ###### expression grammar
2821 | Expression Eop Expression ${ {
2822 struct binode *b = new(binode);
2829 | Expression Top Expression ${ {
2830 struct binode *b = new(binode);
2837 | ( Expression ) ${ {
2838 struct binode *b = new_pos(binode, $1);
2843 | Uop Expression ${ {
2844 struct binode *b = new(binode);
2849 | Value ${ $0 = $<1; }$
2850 | Variable ${ $0 = $<1; }$
2853 Eop -> + ${ $0.op = Plus; }$
2854 | - ${ $0.op = Minus; }$
2856 Uop -> + ${ $0.op = Absolute; }$
2857 | - ${ $0.op = Negate; }$
2859 Top -> * ${ $0.op = Times; }$
2860 | / ${ $0.op = Divide; }$
2861 | % ${ $0.op = Rem; }$
2862 | ++ ${ $0.op = Concat; }$
2864 ###### print binode cases
2871 if (bracket) printf("(");
2872 print_exec(b->left, indent, bracket);
2874 case Plus: fputs(" + ", stdout); break;
2875 case Minus: fputs(" - ", stdout); break;
2876 case Times: fputs(" * ", stdout); break;
2877 case Divide: fputs(" / ", stdout); break;
2878 case Rem: fputs(" % ", stdout); break;
2879 case Concat: fputs(" ++ ", stdout); break;
2880 default: abort(); // NOTEST
2882 print_exec(b->right, indent, bracket);
2883 if (bracket) printf(")");
2886 if (bracket) printf("(");
2888 print_exec(b->right, indent, bracket);
2889 if (bracket) printf(")");
2892 if (bracket) printf("(");
2894 print_exec(b->right, indent, bracket);
2895 if (bracket) printf(")");
2899 print_exec(b->right, indent, bracket);
2903 ###### propagate binode cases
2909 /* both must be numbers, result is Tnum */
2912 /* as propagate_types ignores a NULL,
2913 * unary ops fit here too */
2914 propagate_types(b->left, c, ok, Tnum, 0);
2915 propagate_types(b->right, c, ok, Tnum, 0);
2916 if (!type_compat(type, Tnum, 0))
2917 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2922 /* both must be Tstr, result is Tstr */
2923 propagate_types(b->left, c, ok, Tstr, 0);
2924 propagate_types(b->right, c, ok, Tstr, 0);
2925 if (!type_compat(type, Tstr, 0))
2926 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2931 return propagate_types(b->right, c, ok, type, 0);
2933 ###### interp binode cases
2936 rv = interp_exec(b->left);
2937 right = interp_exec(b->right);
2938 mpq_add(rv.num, rv.num, right.num);
2941 rv = interp_exec(b->left);
2942 right = interp_exec(b->right);
2943 mpq_sub(rv.num, rv.num, right.num);
2946 rv = interp_exec(b->left);
2947 right = interp_exec(b->right);
2948 mpq_mul(rv.num, rv.num, right.num);
2951 rv = interp_exec(b->left);
2952 right = interp_exec(b->right);
2953 mpq_div(rv.num, rv.num, right.num);
2958 left = interp_exec(b->left);
2959 right = interp_exec(b->right);
2960 mpz_init(l); mpz_init(r); mpz_init(rem);
2961 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2962 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2963 mpz_tdiv_r(rem, l, r);
2964 rv = val_init(Tnum);
2965 mpq_set_z(rv.num, rem);
2966 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2970 rv = interp_exec(b->right);
2971 mpq_neg(rv.num, rv.num);
2974 rv = interp_exec(b->right);
2975 mpq_abs(rv.num, rv.num);
2978 rv = interp_exec(b->right);
2981 left = interp_exec(b->left);
2982 right = interp_exec(b->right);
2984 rv.str = text_join(left.str, right.str);
2987 ###### value functions
2989 static struct text text_join(struct text a, struct text b)
2992 rv.len = a.len + b.len;
2993 rv.txt = malloc(rv.len);
2994 memcpy(rv.txt, a.txt, a.len);
2995 memcpy(rv.txt+a.len, b.txt, b.len);
2999 ### Blocks, Statements, and Statement lists.
3001 Now that we have expressions out of the way we need to turn to
3002 statements. There are simple statements and more complex statements.
3003 Simple statements do not contain (syntactic) newlines, complex statements do.
3005 Statements often come in sequences and we have corresponding simple
3006 statement lists and complex statement lists.
3007 The former comprise only simple statements separated by semicolons.
3008 The later comprise complex statements and simple statement lists. They are
3009 separated by newlines. Thus the semicolon is only used to separate
3010 simple statements on the one line. This may be overly restrictive,
3011 but I'm not sure I ever want a complex statement to share a line with
3014 Note that a simple statement list can still use multiple lines if
3015 subsequent lines are indented, so
3017 ###### Example: wrapped simple statement list
3022 is a single simple statement list. This might allow room for
3023 confusion, so I'm not set on it yet.
3025 A simple statement list needs no extra syntax. A complex statement
3026 list has two syntactic forms. It can be enclosed in braces (much like
3027 C blocks), or it can be introduced by a colon and continue until an
3028 unindented newline (much like Python blocks). With this extra syntax
3029 it is referred to as a block.
3031 Note that a block does not have to include any newlines if it only
3032 contains simple statements. So both of:
3034 if condition: a=b; d=f
3036 if condition { a=b; print f }
3040 In either case the list is constructed from a `binode` list with
3041 `Block` as the operator. When parsing the list it is most convenient
3042 to append to the end, so a list is a list and a statement. When using
3043 the list it is more convenient to consider a list to be a statement
3044 and a list. So we need a function to re-order a list.
3045 `reorder_bilist` serves this purpose.
3047 The only stand-alone statement we introduce at this stage is `pass`
3048 which does nothing and is represented as a `NULL` pointer in a `Block`
3049 list. Other stand-alone statements will follow once the infrastructure
3058 Block -> Open Statementlist Close ${ $0 = $<2; }$
3059 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
3060 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
3061 | : Statementlist ${ $0 = $<2; }$
3063 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
3064 | Newlines ComplexStatements ${ $0 = reorder_bilist($<2); }$
3066 ComplexStatements -> ComplexStatements ComplexStatement ${
3076 | ComplexStatement ${
3088 ComplexStatement -> SimpleStatementLine ${
3089 $0 = reorder_bilist($<1);
3091 | ComplexStatement NEWLINE ${ $0 = $<1; }$
3092 ## ComplexStatement Grammar
3095 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3101 | SimpleStatement ${
3108 SimpleStatementLine -> SimpleStatements NEWLINE ${ $0 = $<1; }$
3109 | SimpleStatements ; NEWLINE ${ $0 = $<1; }$
3110 | SimpleStatementLine NEWLINE ${ $0 = $<1; }$
3112 SimpleStatement -> pass ${ $0 = NULL; }$
3113 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3114 ## SimpleStatement Grammar
3116 ###### print binode cases
3120 if (b->left == NULL)
3123 print_exec(b->left, indent, bracket);
3126 print_exec(b->right, indent, bracket);
3129 // block, one per line
3130 if (b->left == NULL)
3131 do_indent(indent, "pass\n");
3133 print_exec(b->left, indent, bracket);
3135 print_exec(b->right, indent, bracket);
3139 ###### propagate binode cases
3142 /* If any statement returns something other than Tnone
3143 * or Tbool then all such must return same type.
3144 * As each statement may be Tnone or something else,
3145 * we must always pass NULL (unknown) down, otherwise an incorrect
3146 * error might occur. We never return Tnone unless it is
3151 for (e = b; e; e = cast(binode, e->right)) {
3152 t = propagate_types(e->left, c, ok, NULL, rules);
3153 if ((rules & Rboolok) && t == Tbool)
3155 if (t && t != Tnone && t != Tbool) {
3159 type_err(c, "error: expected %1%r, found %2",
3160 e->left, type, rules, t);
3166 ###### interp binode cases
3168 while (rv.type == Tnone &&
3171 rv = interp_exec(b->left);
3172 b = cast(binode, b->right);
3176 ### The Print statement
3178 `print` is a simple statement that takes a comma-separated list of
3179 expressions and prints the values separated by spaces and terminated
3180 by a newline. No control of formatting is possible.
3182 `print` faces the same list-ordering issue as blocks, and uses the
3188 ###### SimpleStatement Grammar
3190 | print ExpressionList ${
3191 $0 = reorder_bilist($<2);
3193 | print ExpressionList , ${
3198 $0 = reorder_bilist($0);
3209 ExpressionList -> ExpressionList , Expression ${
3222 ###### print binode cases
3225 do_indent(indent, "print");
3229 print_exec(b->left, -1, bracket);
3233 b = cast(binode, b->right);
3239 ###### propagate binode cases
3242 /* don't care but all must be consistent */
3243 propagate_types(b->left, c, ok, NULL, Rnolabel);
3244 propagate_types(b->right, c, ok, NULL, Rnolabel);
3247 ###### interp binode cases
3253 for ( ; b; b = cast(binode, b->right))
3257 left = interp_exec(b->left);
3270 ###### Assignment statement
3272 An assignment will assign a value to a variable, providing it hasn't
3273 be declared as a constant. The analysis phase ensures that the type
3274 will be correct so the interpreter just needs to perform the
3275 calculation. There is a form of assignment which declares a new
3276 variable as well as assigning a value. If a name is assigned before
3277 it is declared, and error will be raised as the name is created as
3278 `Tlabel` and it is illegal to assign to such names.
3284 ###### SimpleStatement Grammar
3285 | Variable = Expression ${
3291 | VariableDecl = Expression ${
3299 if ($1->var->where_set == NULL) {
3301 "Variable declared with no type or value: %v",
3311 ###### print binode cases
3314 do_indent(indent, "");
3315 print_exec(b->left, indent, bracket);
3317 print_exec(b->right, indent, bracket);
3324 struct variable *v = cast(var, b->left)->var;
3325 do_indent(indent, "");
3326 print_exec(b->left, indent, bracket);
3327 if (cast(var, b->left)->var->constant) {
3328 if (v->where_decl == v->where_set) {
3330 type_print(v->val.type, stdout);
3335 if (v->where_decl == v->where_set) {
3337 type_print(v->val.type, stdout);
3344 print_exec(b->right, indent, bracket);
3351 ###### propagate binode cases
3355 /* Both must match and not be labels,
3356 * Type must support 'dup',
3357 * For Assign, left must not be constant.
3360 t = propagate_types(b->left, c, ok, NULL,
3361 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3366 if (propagate_types(b->right, c, ok, t, 0) != t)
3367 if (b->left->type == Xvar)
3368 type_err(c, "info: variable '%v' was set as %1 here.",
3369 cast(var, b->left)->var->where_set, t, rules, NULL);
3371 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3373 propagate_types(b->left, c, ok, t,
3374 (b->op == Assign ? Rnoconstant : 0));
3376 if (t && t->dup == NULL)
3377 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3382 ###### interp binode cases
3385 lleft = linterp_exec(b->left);
3386 right = interp_exec(b->right);
3391 free_value(right); // NOTEST
3397 struct variable *v = cast(var, b->left)->var;
3401 right = interp_exec(b->right);
3403 right = val_init(v->val.type);
3410 ### The `use` statement
3412 The `use` statement is the last "simple" statement. It is needed when
3413 the condition in a conditional statement is a block. `use` works much
3414 like `return` in C, but only completes the `condition`, not the whole
3420 ###### SimpleStatement Grammar
3422 $0 = new_pos(binode, $1);
3425 if ($0->right->type == Xvar) {
3426 struct var *v = cast(var, $0->right);
3427 if (v->var->val.type == Tnone) {
3428 /* Convert this to a label */
3429 v->var->val = val_prepare(Tlabel);
3430 v->var->val.label = &v->var->val;
3435 ###### print binode cases
3438 do_indent(indent, "use ");
3439 print_exec(b->right, -1, bracket);
3444 ###### propagate binode cases
3447 /* result matches value */
3448 return propagate_types(b->right, c, ok, type, 0);
3450 ###### interp binode cases
3453 rv = interp_exec(b->right);
3456 ### The Conditional Statement
3458 This is the biggy and currently the only complex statement. This
3459 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3460 It is comprised of a number of parts, all of which are optional though
3461 set combinations apply. Each part is (usually) a key word (`then` is
3462 sometimes optional) followed by either an expression or a code block,
3463 except the `casepart` which is a "key word and an expression" followed
3464 by a code block. The code-block option is valid for all parts and,
3465 where an expression is also allowed, the code block can use the `use`
3466 statement to report a value. If the code block does not report a value
3467 the effect is similar to reporting `True`.
3469 The `else` and `case` parts, as well as `then` when combined with
3470 `if`, can contain a `use` statement which will apply to some
3471 containing conditional statement. `for` parts, `do` parts and `then`
3472 parts used with `for` can never contain a `use`, except in some
3473 subordinate conditional statement.
3475 If there is a `forpart`, it is executed first, only once.
3476 If there is a `dopart`, then it is executed repeatedly providing
3477 always that the `condpart` or `cond`, if present, does not return a non-True
3478 value. `condpart` can fail to return any value if it simply executes
3479 to completion. This is treated the same as returning `True`.
3481 If there is a `thenpart` it will be executed whenever the `condpart`
3482 or `cond` returns True (or does not return any value), but this will happen
3483 *after* `dopart` (when present).
3485 If `elsepart` is present it will be executed at most once when the
3486 condition returns `False` or some value that isn't `True` and isn't
3487 matched by any `casepart`. If there are any `casepart`s, they will be
3488 executed when the condition returns a matching value.
3490 The particular sorts of values allowed in case parts has not yet been
3491 determined in the language design, so nothing is prohibited.
3493 The various blocks in this complex statement potentially provide scope
3494 for variables as described earlier. Each such block must include the
3495 "OpenScope" nonterminal before parsing the block, and must call
3496 `var_block_close()` when closing the block.
3498 The code following "`if`", "`switch`" and "`for`" does not get its own
3499 scope, but is in a scope covering the whole statement, so names
3500 declared there cannot be redeclared elsewhere. Similarly the
3501 condition following "`while`" is in a scope the covers the body
3502 ("`do`" part) of the loop, and which does not allow conditional scope
3503 extension. Code following "`then`" (both looping and non-looping),
3504 "`else`" and "`case`" each get their own local scope.
3506 The type requirements on the code block in a `whilepart` are quite
3507 unusal. It is allowed to return a value of some identifiable type, in
3508 which case the loop aborts and an appropriate `casepart` is run, or it
3509 can return a Boolean, in which case the loop either continues to the
3510 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3511 This is different both from the `ifpart` code block which is expected to
3512 return a Boolean, or the `switchpart` code block which is expected to
3513 return the same type as the casepart values. The correct analysis of
3514 the type of the `whilepart` code block is the reason for the
3515 `Rboolok` flag which is passed to `propagate_types()`.
3517 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3526 struct exec *action;
3527 struct casepart *next;
3529 struct cond_statement {
3531 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3532 struct casepart *casepart;
3535 ###### ast functions
3537 static void free_casepart(struct casepart *cp)
3541 free_exec(cp->value);
3542 free_exec(cp->action);
3549 static void free_cond_statement(struct cond_statement *s)
3553 free_exec(s->forpart);
3554 free_exec(s->condpart);
3555 free_exec(s->dopart);
3556 free_exec(s->thenpart);
3557 free_exec(s->elsepart);
3558 free_casepart(s->casepart);
3562 ###### free exec cases
3563 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3565 ###### ComplexStatement Grammar
3566 | CondStatement ${ $0 = $<1; }$
3571 // both ForThen and Whilepart open scopes, and CondSuffix only
3572 // closes one - so in the first branch here we have another to close.
3573 CondStatement -> ForPart ThenPart WhilePart CondSuffix ${
3577 $0->condpart = $3.condpart; $3.condpart = NULL;
3578 $0->dopart = $3.dopart; $3.dopart = NULL;
3579 var_block_close(c, CloseSequential);
3581 | ForPart WhilePart CondSuffix ${
3584 $0->thenpart = NULL;
3585 $0->condpart = $2.condpart; $2.condpart = NULL;
3586 $0->dopart = $2.dopart; $2.dopart = NULL;
3587 var_block_close(c, CloseSequential);
3589 | WhilePart CondSuffix ${
3591 $0->condpart = $1.condpart; $1.condpart = NULL;
3592 $0->dopart = $1.dopart; $1.dopart = NULL;
3594 | SwitchPart CasePart CondSuffix ${
3597 $2->next = $0->casepart;
3600 | IfPart IfSuffix ${
3602 $0->condpart = $1.condpart; $1.condpart = NULL;
3603 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3604 // This is where we close an "if" statement
3605 var_block_close(c, CloseSequential);
3608 CondSuffix -> IfSuffix ${
3610 // This is where we close scope of the whole
3611 // "for" or "while" statement
3612 var_block_close(c, CloseSequential);
3614 | CasePart CondSuffix ${
3616 $1->next = $0->casepart;
3621 CasePart -> case Expression OpenScope Block ${
3622 $0 = calloc(1,sizeof(struct casepart));
3625 var_block_close(c, CloseParallel);
3627 | CasePart NEWLINE ${ $0 = $<1; }$
3630 IfSuffix -> ${ $0 = new(cond_statement); }$
3631 | IfSuffix NEWLINE ${ $0 = $<1; }$
3632 | else OpenScope Block ${
3633 $0 = new(cond_statement);
3635 var_block_close(c, CloseElse);
3637 | else OpenScope SimpleStatements NEWLINE ${
3638 $0 = new(cond_statement);
3639 $0->elsepart = reorder_bilist($<3);
3640 var_block_close(c, CloseElse);
3642 | else OpenScope CondStatement ${
3643 $0 = new(cond_statement);
3645 var_block_close(c, CloseElse);
3649 // These scopes are closed in CondSuffix
3650 ForPart -> for OpenScope SimpleStatements ; ${
3651 $0 = reorder_bilist($<3);
3653 | for OpenScope SimpleStatements NEWLINE ${
3654 $0 = reorder_bilist($<3);
3656 | for OpenScope Block ${
3659 | ForPart NEWLINE ${ $0 = $<1; }$
3661 ThenPart -> then OpenScope SimpleStatements ; ${
3662 $0 = reorder_bilist($<3);
3663 var_block_close(c, CloseSequential);
3665 | then OpenScope SimpleStatements NEWLINE ${
3666 $0 = reorder_bilist($<3);
3667 var_block_close(c, CloseSequential);
3669 | then OpenScope Block ${
3671 var_block_close(c, CloseSequential);
3673 | ThenPart NEWLINE ${ $0 = $<1; }$
3675 // This scope is closed in CondSuffix
3676 WhileHead -> while OpenScope Block ${ $0 = $<3; }$
3677 | WhileHead NEWLINE ${ $0 = $<1; }$
3680 // This scope is closed in CondSuffix
3681 WhilePart -> while OpenScope Expression Block ${
3682 $0.type = Xcond_statement;
3686 | WhileHead do Block ${
3687 $0.type = Xcond_statement;
3691 | WhilePart NEWLINE ${ $0 = $<1; }$
3693 IfPart -> if OpenScope Expression OpenScope Block ${
3694 $0.type = Xcond_statement;
3697 var_block_close(c, CloseParallel);
3699 | if OpenScope Expression OpenScope then Block ${
3700 $0.type = Xcond_statement;
3703 var_block_close(c, CloseParallel);
3705 | if OpenScope Expression OpenScope then SimpleStatements ; ${
3706 $0.type = Xcond_statement;
3708 $0.thenpart = reorder_bilist($<6);
3709 var_block_close(c, CloseParallel);
3711 | if OpenScope Expression OpenScope then SimpleStatements NEWLINE ${
3712 $0.type = Xcond_statement;
3714 $0.thenpart = reorder_bilist($<6);
3715 var_block_close(c, CloseParallel);
3717 | if OpenScope Block then OpenScope Block ${
3718 $0.type = Xcond_statement;
3721 var_block_close(c, CloseParallel);
3723 | IfPart NEWLINE ${ $0 = $<1; }$
3726 // This scope is closed in CondSuffix
3727 SwitchPart -> switch OpenScope Expression ${
3730 | switch OpenScope Block ${
3733 | SwitchPart NEWLINE ${ $0 = $<1; }$
3735 ###### print exec cases
3737 case Xcond_statement:
3739 struct cond_statement *cs = cast(cond_statement, e);
3740 struct casepart *cp;
3742 do_indent(indent, "for");
3743 if (bracket) printf(" {\n"); else printf(":\n");
3744 print_exec(cs->forpart, indent+1, bracket);
3747 do_indent(indent, "} then {\n");
3749 do_indent(indent, "then:\n");
3750 print_exec(cs->thenpart, indent+1, bracket);
3752 if (bracket) do_indent(indent, "}\n");
3756 if (cs->condpart && cs->condpart->type == Xbinode &&
3757 cast(binode, cs->condpart)->op == Block) {
3759 do_indent(indent, "while {\n");
3761 do_indent(indent, "while:\n");
3762 print_exec(cs->condpart, indent+1, bracket);
3764 do_indent(indent, "} do {\n");
3766 do_indent(indent, "do:\n");
3767 print_exec(cs->dopart, indent+1, bracket);
3769 do_indent(indent, "}\n");
3771 do_indent(indent, "while ");
3772 print_exec(cs->condpart, 0, bracket);
3777 print_exec(cs->dopart, indent+1, bracket);
3779 do_indent(indent, "}\n");
3784 do_indent(indent, "switch");
3786 do_indent(indent, "if");
3787 if (cs->condpart && cs->condpart->type == Xbinode &&
3788 cast(binode, cs->condpart)->op == Block) {
3793 print_exec(cs->condpart, indent+1, bracket);
3795 do_indent(indent, "}\n");
3797 do_indent(indent, "then:\n");
3798 print_exec(cs->thenpart, indent+1, bracket);
3802 print_exec(cs->condpart, 0, bracket);
3808 print_exec(cs->thenpart, indent+1, bracket);
3810 do_indent(indent, "}\n");
3815 for (cp = cs->casepart; cp; cp = cp->next) {
3816 do_indent(indent, "case ");
3817 print_exec(cp->value, -1, 0);
3822 print_exec(cp->action, indent+1, bracket);
3824 do_indent(indent, "}\n");
3827 do_indent(indent, "else");
3832 print_exec(cs->elsepart, indent+1, bracket);
3834 do_indent(indent, "}\n");
3839 ###### propagate exec cases
3840 case Xcond_statement:
3842 // forpart and dopart must return Tnone
3843 // thenpart must return Tnone if there is a dopart,
3844 // otherwise it is like elsepart.
3846 // be bool if there is no casepart
3847 // match casepart->values if there is a switchpart
3848 // either be bool or match casepart->value if there
3850 // elsepart and casepart->action must match the return type
3851 // expected of this statement.
3852 struct cond_statement *cs = cast(cond_statement, prog);
3853 struct casepart *cp;
3855 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3856 if (!type_compat(Tnone, t, 0))
3858 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3859 if (!type_compat(Tnone, t, 0))
3862 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3863 if (!type_compat(Tnone, t, 0))
3866 if (cs->casepart == NULL)
3867 propagate_types(cs->condpart, c, ok, Tbool, 0);
3869 /* Condpart must match case values, with bool permitted */
3871 for (cp = cs->casepart;
3872 cp && !t; cp = cp->next)
3873 t = propagate_types(cp->value, c, ok, NULL, 0);
3874 if (!t && cs->condpart)
3875 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3876 // Now we have a type (I hope) push it down
3878 for (cp = cs->casepart; cp; cp = cp->next)
3879 propagate_types(cp->value, c, ok, t, 0);
3880 propagate_types(cs->condpart, c, ok, t, Rboolok);
3883 // (if)then, else, and case parts must return expected type.
3884 if (!cs->dopart && !type)
3885 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3887 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3888 for (cp = cs->casepart;
3891 type = propagate_types(cp->action, c, ok, NULL, rules);
3894 propagate_types(cs->thenpart, c, ok, type, rules);
3895 propagate_types(cs->elsepart, c, ok, type, rules);
3896 for (cp = cs->casepart; cp ; cp = cp->next)
3897 propagate_types(cp->action, c, ok, type, rules);
3903 ###### interp exec cases
3904 case Xcond_statement:
3906 struct value v, cnd;
3907 struct casepart *cp;
3908 struct cond_statement *c = cast(cond_statement, e);
3911 interp_exec(c->forpart);
3914 cnd = interp_exec(c->condpart);
3917 if (!(cnd.type == Tnone ||
3918 (cnd.type == Tbool && cnd.bool != 0)))
3920 // cnd is Tnone or Tbool, doesn't need to be freed
3922 interp_exec(c->dopart);
3925 rv = interp_exec(c->thenpart);
3926 if (rv.type != Tnone || !c->dopart)
3930 } while (c->dopart);
3932 for (cp = c->casepart; cp; cp = cp->next) {
3933 v = interp_exec(cp->value);
3934 if (value_cmp(v, cnd) == 0) {
3937 rv = interp_exec(cp->action);
3944 rv = interp_exec(c->elsepart);
3951 ### Top level structure
3953 All the language elements so far can be used in various places. Now
3954 it is time to clarify what those places are.
3956 At the top level of a file there will be a number of declarations.
3957 Many of the things that can be declared haven't been described yet,
3958 such as functions, procedures, imports, and probably more.
3959 For now there are two sorts of things that can appear at the top
3960 level. They are predefined constants, `struct` types, and the main
3961 program. While the syntax will allow the main program to appear
3962 multiple times, that will trigger an error if it is actually attempted.
3964 The various declarations do not return anything. They store the
3965 various declarations in the parse context.
3967 ###### Parser: grammar
3970 Ocean -> DeclarationList
3971 | Newlines DeclarationList
3973 DeclarationList -> Declaration
3974 | DeclarationList Declaration
3976 Declaration -> Declaration NEWLINE
3982 "error: unhandled parse error", &$1);
3985 ## top level grammar
3987 ### The `const` section
3989 As well as being defined in with the code that uses them, constants
3990 can be declared at the top level. These have full-file scope, so they
3991 are always `InScope`. The value of a top level constant can be given
3992 as an expression, and this is evaluated immediately rather than in the
3993 later interpretation stage. Once we add functions to the language, we
3994 will need rules concern which, if any, can be used to define a top
3997 Constants are defined in a section that starts with the reserved word
3998 `const` and then has a block with a list of assignment statements.
3999 For syntactic consistency, these must use the double-colon syntax to
4000 make it clear that they are constants. Type can also be given: if
4001 not, the type will be determined during analysis, as with other
4004 As the types constants are inserted at the head of a list, printing
4005 them in the same order that they were read is not straight forward.
4006 We take a quadratic approach here and count the number of constants
4007 (variables of depth 0), then count down from there, each time
4008 searching through for the Nth constant for decreasing N.
4010 ###### top level grammar
4012 DeclareConstant -> const Open ConstList Close
4013 | const Open SimpleConstList }
4015 | const SimpleConstList NEWLINE
4017 ConstList -> ConstLines
4018 | Newlines ConstLines
4019 ConstLines -> ConstLines SimpleConstLine
4021 SimpleConstList -> SimpleConstList ; Const
4024 SimpleConstLine -> SimpleConstList NEWLINE
4025 | SimpleConstLine NEWLINE
4026 | ERROR NEWLINE ${ tok_err(c, "Syntax error in constant", &$1); }$
4029 CType -> Type ${ $0 = $<1; }$
4032 Const -> IDENTIFIER :: CType = Expression ${ {
4036 v = var_decl(c, $1.txt);
4038 struct var *var = new_pos(var, $1);
4039 v->where_decl = var;
4044 v = var_ref(c, $1.txt);
4045 tok_err(c, "error: name already declared", &$1);
4046 type_err(c, "info: this is where '%v' was first declared",
4047 v->where_decl, NULL, 0, NULL);
4051 propagate_types($5, c, &ok, $3, 0);
4056 v->val = interp_exec($5);
4060 ###### print const decls
4065 while (target != 0) {
4067 for (v = context.in_scope; v; v=v->in_scope)
4068 if (v->depth == 0) {
4079 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4080 type_print(v->val.type, stdout);
4082 if (v->val.type == Tstr)
4084 print_value(v->val);
4085 if (v->val.type == Tstr)
4093 ### Finally the whole program.
4095 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4096 the keyword "program" and a list of variable names which are assigned
4097 values from command line arguments. Following this is a `block` which
4098 is the code to execute. Unlike Pascal, constants and other
4099 declarations come *before* the program.
4101 As this is the top level, several things are handled a bit
4103 The whole program is not interpreted by `interp_exec` as that isn't
4104 passed the argument list which the program requires. Similarly type
4105 analysis is a bit more interesting at this level.
4110 ###### top level grammar
4112 DeclareProgram -> Program ${ {
4114 type_err(c, "Program defined a second time",
4121 Program -> program OpenScope Varlist Block ${
4124 $0->left = reorder_bilist($<3);
4126 var_block_close(c, CloseSequential);
4127 if (c->scope_stack && !c->parse_error) abort();
4130 Varlist -> Varlist ArgDecl ${
4139 ArgDecl -> IDENTIFIER ${ {
4140 struct variable *v = var_decl(c, $1.txt);
4147 ###### print binode cases
4149 do_indent(indent, "program");
4150 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4152 print_exec(b2->left, 0, 0);
4158 print_exec(b->right, indent+1, bracket);
4160 do_indent(indent, "}\n");
4163 ###### propagate binode cases
4164 case Program: abort(); // NOTEST
4166 ###### core functions
4168 static int analyse_prog(struct exec *prog, struct parse_context *c)
4170 struct binode *b = cast(binode, prog);
4177 propagate_types(b->right, c, &ok, Tnone, 0);
4182 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4183 struct var *v = cast(var, b->left);
4184 if (!v->var->val.type) {
4185 v->var->where_set = b;
4186 v->var->val = val_prepare(Tstr);
4189 b = cast(binode, prog);
4192 propagate_types(b->right, c, &ok, Tnone, 0);
4197 /* Make sure everything is still consistent */
4198 propagate_types(b->right, c, &ok, Tnone, 0);
4202 static void interp_prog(struct exec *prog, char **argv)
4204 struct binode *p = cast(binode, prog);
4210 al = cast(binode, p->left);
4212 struct var *v = cast(var, al->left);
4213 struct value *vl = &v->var->val;
4215 if (argv[0] == NULL) {
4216 printf("Not enough args\n");
4219 al = cast(binode, al->right);
4221 *vl = parse_value(vl->type, argv[0]);
4222 if (vl->type == NULL)
4226 v = interp_exec(p->right);
4230 ###### interp binode cases
4231 case Program: abort(); // NOTEST
4233 ## And now to test it out.
4235 Having a language requires having a "hello world" program. I'll
4236 provide a little more than that: a program that prints "Hello world"
4237 finds the GCD of two numbers, prints the first few elements of
4238 Fibonacci, performs a binary search for a number, and a few other
4239 things which will likely grow as the languages grows.
4241 ###### File: oceani.mk
4244 @echo "===== DEMO ====="
4245 ./oceani --section "demo: hello" oceani.mdc 55 33
4251 four ::= 2 + 2 ; five ::= 10/2
4252 const pie ::= "I like Pie";
4253 cake ::= "The cake is"
4262 print "Hello World, what lovely oceans you have!"
4263 print "Are there", five, "?"
4264 print pi, pie, "but", cake
4266 /* When a variable is defined in both branches of an 'if',
4267 * and used afterwards, the variables are merged.
4273 print "Is", A, "bigger than", B,"? ", bigger
4274 /* If a variable is not used after the 'if', no
4275 * merge happens, so types can be different
4278 double:string = "yes"
4279 print A, "is more than twice", B, "?", double
4282 print "double", B, "is", double
4287 if a > 0 and then b > 0:
4293 print "GCD of", A, "and", B,"is", a
4295 print a, "is not positive, cannot calculate GCD"
4297 print b, "is not positive, cannot calculate GCD"
4302 print "Fibonacci:", f1,f2,
4303 then togo = togo - 1
4311 /* Binary search... */
4316 mid := (lo + hi) / 2
4328 print "Yay, I found", target
4330 print "Closest I found was", mid
4335 // "middle square" PRNG. Not particularly good, but one my
4336 // Dad taught me - the first one I ever heard of.
4337 for i:=1; then i = i + 1; while i < size:
4338 n := list[i-1] * list[i-1]
4339 list[i] = (n / 100) % 10 000
4341 print "Before sort:",
4342 for i:=0; then i = i + 1; while i < size:
4346 for i := 1; then i=i+1; while i < size:
4347 for j:=i-1; then j=j-1; while j >= 0:
4348 if list[j] > list[j+1]:
4352 print " After sort:",
4353 for i:=0; then i = i + 1; while i < size:
4357 if 1 == 2 then print "yes"; else print "no"
4361 bob.alive = (bob.name == "Hello")
4362 print "bob", "is" if bob.alive else "isn't", "alive"