1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
126 ###### Parser: reduce
127 struct parse_context *c = config2context(config);
135 #include <sys/mman.h>
154 static char Usage[] =
155 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
170 struct section *s, *ss;
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_mark),
175 .number_chars = ".,_+- ",
180 int doprint=0, dotrace=0, doexec=1, brackets=0;
182 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
185 case 't': dotrace=1; break;
186 case 'p': doprint=1; break;
187 case 'n': doexec=0; break;
188 case 'b': brackets=1; break;
189 case 's': section = optarg; break;
190 default: fprintf(stderr, Usage);
194 if (optind >= argc) {
195 fprintf(stderr, "oceani: no input file given\n");
198 fd = open(argv[optind], O_RDONLY);
200 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
203 context.file_name = argv[optind];
204 len = lseek(fd, 0, 2);
205 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
206 s = code_extract(file, file+len, NULL);
208 fprintf(stderr, "oceani: could not find any code in %s\n",
213 ## context initialization
216 for (ss = s; ss; ss = ss->next) {
217 struct text sec = ss->section;
218 if (sec.len == strlen(section) &&
219 strncmp(sec.txt, section, sec.len) == 0)
223 fprintf(stderr, "oceani: cannot find section %s\n",
229 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
232 fprintf(stderr, "oceani: no program found.\n");
233 context.parse_error = 1;
235 if (context.prog && doprint) {
238 print_exec(context.prog, 0, brackets);
240 if (context.prog && doexec && !context.parse_error) {
241 if (!analyse_prog(context.prog, &context)) {
242 fprintf(stderr, "oceani: type error in program - not running.\n");
245 interp_prog(context.prog, argv+optind+1);
247 free_exec(context.prog);
250 struct section *t = s->next;
256 ## free context types
257 exit(context.parse_error ? 1 : 0);
262 The four requirements of parse, analyse, print, interpret apply to
263 each language element individually so that is how most of the code
266 Three of the four are fairly self explanatory. The one that requires
267 a little explanation is the analysis step.
269 The current language design does not require the types of variables to
270 be declared, but they must still have a single type. Different
271 operations impose different requirements on the variables, for example
272 addition requires both arguments to be numeric, and assignment
273 requires the variable on the left to have the same type as the
274 expression on the right.
276 Analysis involves propagating these type requirements around and
277 consequently setting the type of each variable. If any requirements
278 are violated (e.g. a string is compared with a number) or if a
279 variable needs to have two different types, then an error is raised
280 and the program will not run.
282 If the same variable is declared in both branchs of an 'if/else', or
283 in all cases of a 'switch' then the multiple instances may be merged
284 into just one variable if the variable is references after the
285 conditional statement. When this happens, the types must naturally be
286 consistent across all the branches. When the variable is not used
287 outside the if, the variables in the different branches are distinct
288 and can be of different types.
290 Determining the types of all variables early is important for
291 processing command line arguments. These can be assigned to any of
292 several types of variable, but we must first know the correct type so
293 any required conversion can happen. If a variable is associated with
294 a command line argument but no type can be interpreted (e.g. the
295 variable is only ever used in a `print` statement), then the type is
298 Undeclared names may only appear in "use" statements and "case" expressions.
299 These names are given a type of "label" and a unique value.
300 This allows them to fill the role of a name in an enumerated type, which
301 is useful for testing the `switch` statement.
303 As we will see, the condition part of a `while` statement can return
304 either a Boolean or some other type. This requires that the expected
305 type that gets passed around comprises a type and a flag to indicate
306 that `Tbool` is also permitted.
308 As there are, as yet, no distinct types that are compatible, there
309 isn't much subtlety in the analysis. When we have distinct number
310 types, this will become more interesting.
314 When analysis discovers an inconsistency it needs to report an error;
315 just refusing to run the code ensures that the error doesn't cascade,
316 but by itself it isn't very useful. A clear understanding of the sort
317 of error message that are useful will help guide the process of
320 At a simplistic level, the only sort of error that type analysis can
321 report is that the type of some construct doesn't match a contextual
322 requirement. For example, in `4 + "hello"` the addition provides a
323 contextual requirement for numbers, but `"hello"` is not a number. In
324 this particular example no further information is needed as the types
325 are obvious from local information. When a variable is involved that
326 isn't the case. It may be helpful to explain why the variable has a
327 particular type, by indicating the location where the type was set,
328 whether by declaration or usage.
330 Using a recursive-descent analysis we can easily detect a problem at
331 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
332 will detect that one argument is not a number and the usage of `hello`
333 will detect that a number was wanted, but not provided. In this
334 (early) version of the language, we will generate error reports at
335 multiple locations, so the use of `hello` will report an error and
336 explain were the value was set, and the addition will report an error
337 and say why numbers are needed. To be able to report locations for
338 errors, each language element will need to record a file location
339 (line and column) and each variable will need to record the language
340 element where its type was set. For now we will assume that each line
341 of an error message indicates one location in the file, and up to 2
342 types. So we provide a `printf`-like function which takes a format, a
343 language (a `struct exec` which has not yet been introduced), and 2
344 types. "`%1`" reports the first type, "`%2`" reports the second. We
345 will need a function to print the location, once we know how that is
346 stored. As will be explained later, there are sometimes extra rules for
347 type matching and they might affect error messages, we need to pass those
350 As well as type errors, we sometimes need to report problems with
351 tokens, which might be unexpected or might name a type that has not
352 been defined. For these we have `tok_err()` which reports an error
353 with a given token. Each of the error functions sets the flag in the
354 context so indicate that parsing failed.
358 static void fput_loc(struct exec *loc, FILE *f);
360 ###### core functions
362 static void type_err(struct parse_context *c,
363 char *fmt, struct exec *loc,
364 struct type *t1, int rules, struct type *t2)
366 fprintf(stderr, "%s:", c->file_name);
367 fput_loc(loc, stderr);
368 for (; *fmt ; fmt++) {
375 case '%': fputc(*fmt, stderr); break; // NOTEST
376 default: fputc('?', stderr); break; // NOTEST
378 type_print(t1, stderr);
381 type_print(t2, stderr);
390 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
392 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
393 t->txt.len, t->txt.txt);
397 ## Entities: declared and predeclared.
399 There are various "things" that the language and/or the interpreter
400 needs to know about to parse and execute a program. These include
401 types, variables, values, and executable code. These are all lumped
402 together under the term "entities" (calling them "objects" would be
403 confusing) and introduced here. These will introduced and described
404 here. The following section will present the different specific code
405 elements which comprise or manipulate these various entities.
409 Values come in a wide range of types, with more likely to be added.
410 Each type needs to be able to parse and print its own values (for
411 convenience at least) as well as to compare two values, at least for
412 equality and possibly for order. For now, values might need to be
413 duplicated and freed, though eventually such manipulations will be
414 better integrated into the language.
416 Rather than requiring every numeric type to support all numeric
417 operations (add, multiple, etc), we allow types to be able to present
418 as one of a few standard types: integer, float, and fraction. The
419 existence of these conversion functions eventaully enable types to
420 determine if they are compatible with other types, though such types
421 have not yet been implemented.
423 Named type are stored in a simple linked list. Objects of each type are "values"
424 which are often passed around by value.
431 ## value union fields
438 struct value (*init)(struct type *type);
439 struct value (*prepare)(struct type *type);
440 struct value (*parse)(struct type *type, char *str);
441 void (*print)(struct value val);
442 void (*print_type)(struct type *type, FILE *f);
443 int (*cmp_order)(struct value v1, struct value v2);
444 int (*cmp_eq)(struct value v1, struct value v2);
445 struct value (*dup)(struct value val);
446 void (*free)(struct value val);
447 void (*free_type)(struct type *t);
448 int (*compat)(struct type *this, struct type *other);
449 long long (*to_int)(struct value *v);
450 double (*to_float)(struct value *v);
451 int (*to_mpq)(mpq_t *q, struct value *v);
460 struct type *typelist;
464 static struct type *find_type(struct parse_context *c, struct text s)
466 struct type *l = c->typelist;
469 text_cmp(l->name, s) != 0)
474 static struct type *add_type(struct parse_context *c, struct text s,
479 n = calloc(1, sizeof(*n));
482 n->next = c->typelist;
487 static void free_type(struct type *t)
489 /* The type is always a reference to something in the
490 * context, so we don't need to free anything.
494 static void free_value(struct value v)
500 static int type_compat(struct type *require, struct type *have, int rules)
502 if ((rules & Rboolok) && have == Tbool)
504 if ((rules & Rnolabel) && have == Tlabel)
506 if (!require || !have)
510 return require->compat(require, have);
512 return require == have;
515 static void type_print(struct type *type, FILE *f)
518 fputs("*unknown*type*", f);
519 else if (type->name.len)
520 fprintf(f, "%.*s", type->name.len, type->name.txt);
521 else if (type->print_type)
522 type->print_type(type, f);
524 fputs("*invalid*type*", f); // NOTEST
527 static struct value val_prepare(struct type *type)
532 return type->prepare(type);
537 static struct value val_init(struct type *type)
542 return type->init(type);
547 static struct value dup_value(struct value v)
550 return v.type->dup(v);
554 static int value_cmp(struct value left, struct value right)
556 if (left.type && left.type->cmp_order)
557 return left.type->cmp_order(left, right);
558 if (left.type && left.type->cmp_eq)
559 return left.type->cmp_eq(left, right);
563 static void print_value(struct value v)
565 if (v.type && v.type->print)
568 printf("*Unknown*"); // NOTEST
571 static struct value parse_value(struct type *type, char *arg)
575 if (type && type->parse)
576 return type->parse(type, arg);
577 rv.type = NULL; // NOTEST
583 static void free_value(struct value v);
584 static int type_compat(struct type *require, struct type *have, int rules);
585 static void type_print(struct type *type, FILE *f);
586 static struct value val_init(struct type *type);
587 static struct value dup_value(struct value v);
588 static int value_cmp(struct value left, struct value right);
589 static void print_value(struct value v);
590 static struct value parse_value(struct type *type, char *arg);
592 ###### free context types
594 while (context.typelist) {
595 struct type *t = context.typelist;
597 context.typelist = t->next;
605 Values of the base types can be numbers, which we represent as
606 multi-precision fractions, strings, Booleans and labels. When
607 analysing the program we also need to allow for places where no value
608 is meaningful (type `Tnone`) and where we don't know what type to
609 expect yet (type is `NULL`).
611 Values are never shared, they are always copied when used, and freed
612 when no longer needed.
614 When propagating type information around the program, we need to
615 determine if two types are compatible, where type `NULL` is compatible
616 with anything. There are two special cases with type compatibility,
617 both related to the Conditional Statement which will be described
618 later. In some cases a Boolean can be accepted as well as some other
619 primary type, and in others any type is acceptable except a label (`Vlabel`).
620 A separate function encoding these cases will simplify some code later.
622 When assigning command line arguments to variables, we need to be able
623 to parse each type from a string.
625 The distinction beteen "prepare" and "init" needs to be explained.
626 "init" sets up an initial value, such as "zero" or the empty string.
627 "prepare" simply prepares the data structure so that if "free" gets
628 called on it, it won't do something silly. Normally a value will be
629 stored after "prepare" but before "free", but this might not happen if
634 #include "parse_string.h"
635 #include "parse_number.h"
638 myLDLIBS := libnumber.o libstring.o -lgmp
639 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
641 ###### type union fields
642 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
644 ###### value union fields
651 static void _free_value(struct value v)
653 switch (v.type->vtype) {
655 case Vstr: free(v.str.txt); break;
656 case Vnum: mpq_clear(v.num); break;
662 ###### value functions
664 static struct value _val_prepare(struct type *type)
669 switch(type->vtype) {
673 memset(&rv.num, 0, sizeof(rv.num));
689 static struct value _val_init(struct type *type)
694 switch(type->vtype) {
695 case Vnone: // NOTEST
698 mpq_init(rv.num); break;
700 rv.str.txt = malloc(1);
706 case Vlabel: // NOTEST
707 rv.label = NULL; // NOTEST
713 static struct value _dup_value(struct value v)
717 switch (rv.type->vtype) {
718 case Vnone: // NOTEST
728 mpq_set(rv.num, v.num);
731 rv.str.len = v.str.len;
732 rv.str.txt = malloc(rv.str.len);
733 memcpy(rv.str.txt, v.str.txt, v.str.len);
739 static int _value_cmp(struct value left, struct value right)
742 if (left.type != right.type)
743 return left.type - right.type; // NOTEST
744 switch (left.type->vtype) {
745 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
746 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
747 case Vstr: cmp = text_cmp(left.str, right.str); break;
748 case Vbool: cmp = left.bool - right.bool; break;
749 case Vnone: cmp = 0; // NOTEST
754 static void _print_value(struct value v)
756 switch (v.type->vtype) {
757 case Vnone: // NOTEST
758 printf("*no-value*"); break; // NOTEST
759 case Vlabel: // NOTEST
760 printf("*label-%p*", v.label); break; // NOTEST
762 printf("%.*s", v.str.len, v.str.txt); break;
764 printf("%s", v.bool ? "True":"False"); break;
769 mpf_set_q(fl, v.num);
770 gmp_printf("%Fg", fl);
777 static struct value _parse_value(struct type *type, char *arg)
785 switch(type->vtype) {
786 case Vlabel: // NOTEST
787 case Vnone: // NOTEST
788 val.type = NULL; // NOTEST
791 val.str.len = strlen(arg);
792 val.str.txt = malloc(val.str.len);
793 memcpy(val.str.txt, arg, val.str.len);
800 tx.txt = arg; tx.len = strlen(tx.txt);
801 if (number_parse(val.num, tail, tx) == 0)
804 mpq_neg(val.num, val.num);
806 printf("Unsupported suffix: %s\n", arg);
811 if (strcasecmp(arg, "true") == 0 ||
812 strcmp(arg, "1") == 0)
814 else if (strcasecmp(arg, "false") == 0 ||
815 strcmp(arg, "0") == 0)
818 printf("Bad bool: %s\n", arg);
826 static void _free_value(struct value v);
828 static struct type base_prototype = {
830 .prepare = _val_prepare,
831 .parse = _parse_value,
832 .print = _print_value,
833 .cmp_order = _value_cmp,
834 .cmp_eq = _value_cmp,
839 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
842 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
844 struct text txt = { n, strlen(n) };
847 t = add_type(c, txt, &base_prototype);
852 ###### context initialization
854 Tbool = add_base_type(&context, "Boolean", Vbool);
855 Tstr = add_base_type(&context, "string", Vstr);
856 Tnum = add_base_type(&context, "number", Vnum);
857 Tnone = add_base_type(&context, "none", Vnone);
858 Tlabel = add_base_type(&context, "label", Vlabel);
862 Variables are scoped named values. We store the names in a linked
863 list of "bindings" sorted lexically, and use sequential search and
870 struct binding *next; // in lexical order
874 This linked list is stored in the parse context so that "reduce"
875 functions can find or add variables, and so the analysis phase can
876 ensure that every variable gets a type.
880 struct binding *varlist; // In lexical order
884 static struct binding *find_binding(struct parse_context *c, struct text s)
886 struct binding **l = &c->varlist;
891 (cmp = text_cmp((*l)->name, s)) < 0)
895 n = calloc(1, sizeof(*n));
902 Each name can be linked to multiple variables defined in different
903 scopes. Each scope starts where the name is declared and continues
904 until the end of the containing code block. Scopes of a given name
905 cannot nest, so a declaration while a name is in-scope is an error.
907 ###### binding fields
908 struct variable *var;
912 struct variable *previous;
914 struct binding *name;
915 struct exec *where_decl;// where name was declared
916 struct exec *where_set; // where type was set
920 While the naming seems strange, we include local constants in the
921 definition of variables. A name declared `var := value` can
922 subsequently be changed, but a name declared `var ::= value` cannot -
925 ###### variable fields
928 Scopes in parallel branches can be partially merged. More
929 specifically, if a given name is declared in both branches of an
930 if/else then its scope is a candidate for merging. Similarly if
931 every branch of an exhaustive switch (e.g. has an "else" clause)
932 declares a given name, then the scopes from the branches are
933 candidates for merging.
935 Note that names declared inside a loop (which is only parallel to
936 itself) are never visible after the loop. Similarly names defined in
937 scopes which are not parallel, such as those started by `for` and
938 `switch`, are never visible after the scope. Only variables defined in
939 both `then` and `else` (including the implicit then after an `if`, and
940 excluding `then` used with `for`) and in all `case`s and `else` of a
941 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
943 Labels, which are a bit like variables, follow different rules.
944 Labels are not explicitly declared, but if an undeclared name appears
945 in a context where a label is legal, that effectively declares the
946 name as a label. The declaration remains in force (or in scope) at
947 least to the end of the immediately containing block and conditionally
948 in any larger containing block which does not declare the name in some
949 other way. Importantly, the conditional scope extension happens even
950 if the label is only used in one parallel branch of a conditional --
951 when used in one branch it is treated as having been declared in all
954 Merge candidates are tentatively visible beyond the end of the
955 branching statement which creates them. If the name is used, the
956 merge is affirmed and they become a single variable visible at the
957 outer layer. If not - if it is redeclared first - the merge lapses.
959 To track scopes we have an extra stack, implemented as a linked list,
960 which roughly parallels the parse stack and which is used exclusively
961 for scoping. When a new scope is opened, a new frame is pushed and
962 the child-count of the parent frame is incremented. This child-count
963 is used to distinguish between the first of a set of parallel scopes,
964 in which declared variables must not be in scope, and subsequent
965 branches, whether they must already be conditionally scoped.
967 To push a new frame *before* any code in the frame is parsed, we need a
968 grammar reduction. This is most easily achieved with a grammar
969 element which derives the empty string, and creates the new scope when
970 it is recognized. This can be placed, for example, between a keyword
971 like "if" and the code following it.
975 struct scope *parent;
981 struct scope *scope_stack;
984 static void scope_pop(struct parse_context *c)
986 struct scope *s = c->scope_stack;
988 c->scope_stack = s->parent;
993 static void scope_push(struct parse_context *c)
995 struct scope *s = calloc(1, sizeof(*s));
997 c->scope_stack->child_count += 1;
998 s->parent = c->scope_stack;
1000 c->scope_depth += 1;
1006 OpenScope -> ${ scope_push(c); }$
1007 ClosePara -> ${ var_block_close(c, CloseParallel); }$
1009 Each variable records a scope depth and is in one of four states:
1011 - "in scope". This is the case between the declaration of the
1012 variable and the end of the containing block, and also between
1013 the usage with affirms a merge and the end of that block.
1015 The scope depth is not greater than the current parse context scope
1016 nest depth. When the block of that depth closes, the state will
1017 change. To achieve this, all "in scope" variables are linked
1018 together as a stack in nesting order.
1020 - "pending". The "in scope" block has closed, but other parallel
1021 scopes are still being processed. So far, every parallel block at
1022 the same level that has closed has declared the name.
1024 The scope depth is the depth of the last parallel block that
1025 enclosed the declaration, and that has closed.
1027 - "conditionally in scope". The "in scope" block and all parallel
1028 scopes have closed, and no further mention of the name has been
1029 seen. This state includes a secondary nest depth which records the
1030 outermost scope seen since the variable became conditionally in
1031 scope. If a use of the name is found, the variable becomes "in
1032 scope" and that secondary depth becomes the recorded scope depth.
1033 If the name is declared as a new variable, the old variable becomes
1034 "out of scope" and the recorded scope depth stays unchanged.
1036 - "out of scope". The variable is neither in scope nor conditionally
1037 in scope. It is permanently out of scope now and can be removed from
1038 the "in scope" stack.
1040 ###### variable fields
1041 int depth, min_depth;
1042 enum { OutScope, PendingScope, CondScope, InScope } scope;
1043 struct variable *in_scope;
1045 ###### parse context
1047 struct variable *in_scope;
1049 All variables with the same name are linked together using the
1050 'previous' link. Those variable that have
1051 been affirmatively merged all have a 'merged' pointer that points to
1052 one primary variable - the most recently declared instance. When
1053 merging variables, we need to also adjust the 'merged' pointer on any
1054 other variables that had previously been merged with the one that will
1055 no longer be primary.
1057 A variable that is no longer the most recent instance of a name may
1058 still have "pending" scope, if it might still be merged with most
1059 recent instance. These variables don't really belong in the
1060 "in_scope" list, but are not immediately removed when a new instance
1061 is found. Instead, they are detected and ignored when considering the
1062 list of in_scope names.
1064 ###### variable fields
1065 struct variable *merged;
1067 ###### ast functions
1069 static void variable_merge(struct variable *primary, struct variable *secondary)
1073 if (primary->merged)
1075 primary = primary->merged;
1077 for (v = primary->previous; v; v=v->previous)
1078 if (v == secondary || v == secondary->merged ||
1079 v->merged == secondary ||
1080 (v->merged && v->merged == secondary->merged)) {
1081 v->scope = OutScope;
1082 v->merged = primary;
1086 ###### free context vars
1088 while (context.varlist) {
1089 struct binding *b = context.varlist;
1090 struct variable *v = b->var;
1091 context.varlist = b->next;
1094 struct variable *t = v;
1099 // This is a global constant
1100 free_exec(t->where_decl);
1105 #### Manipulating Bindings
1107 When a name is conditionally visible, a new declaration discards the
1108 old binding - the condition lapses. Conversely a usage of the name
1109 affirms the visibility and extends it to the end of the containing
1110 block - i.e. the block that contains both the original declaration and
1111 the latest usage. This is determined from `min_depth`. When a
1112 conditionally visible variable gets affirmed like this, it is also
1113 merged with other conditionally visible variables with the same name.
1115 When we parse a variable declaration we either report an error if the
1116 name is currently bound, or create a new variable at the current nest
1117 depth if the name is unbound or bound to a conditionally scoped or
1118 pending-scope variable. If the previous variable was conditionally
1119 scoped, it and its homonyms becomes out-of-scope.
1121 When we parse a variable reference (including non-declarative
1122 assignment) we report an error if the name is not bound or is bound to
1123 a pending-scope variable; update the scope if the name is bound to a
1124 conditionally scoped variable; or just proceed normally if the named
1125 variable is in scope.
1127 When we exit a scope, any variables bound at this level are either
1128 marked out of scope or pending-scoped, depending on whether the scope
1129 was sequential or parallel. Here a "parallel" scope means the "then"
1130 or "else" part of a conditional, or any "case" or "else" branch of a
1131 switch. Other scopes are "sequential".
1133 When exiting a parallel scope we check if there are any variables that
1134 were previously pending and are still visible. If there are, then
1135 there weren't redeclared in the most recent scope, so they cannot be
1136 merged and must become out-of-scope. If it is not the first of
1137 parallel scopes (based on `child_count`), we check that there was a
1138 previous binding that is still pending-scope. If there isn't, the new
1139 variable must now be out-of-scope.
1141 When exiting a sequential scope that immediately enclosed parallel
1142 scopes, we need to resolve any pending-scope variables. If there was
1143 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1144 we need to mark all pending-scope variable as out-of-scope. Otherwise
1145 all pending-scope variables become conditionally scoped.
1148 enum closetype { CloseSequential, CloseParallel, CloseElse };
1150 ###### ast functions
1152 static struct variable *var_decl(struct parse_context *c, struct text s)
1154 struct binding *b = find_binding(c, s);
1155 struct variable *v = b->var;
1157 switch (v ? v->scope : OutScope) {
1159 /* Caller will report the error */
1163 v && v->scope == CondScope;
1165 v->scope = OutScope;
1169 v = calloc(1, sizeof(*v));
1170 v->previous = b->var;
1173 v->min_depth = v->depth = c->scope_depth;
1175 v->in_scope = c->in_scope;
1177 v->val = val_prepare(NULL);
1181 static struct variable *var_ref(struct parse_context *c, struct text s)
1183 struct binding *b = find_binding(c, s);
1184 struct variable *v = b->var;
1185 struct variable *v2;
1187 switch (v ? v->scope : OutScope) {
1190 /* Caller will report the error */
1193 /* All CondScope variables of this name need to be merged
1194 * and become InScope
1196 v->depth = v->min_depth;
1198 for (v2 = v->previous;
1199 v2 && v2->scope == CondScope;
1201 variable_merge(v, v2);
1209 static void var_block_close(struct parse_context *c, enum closetype ct)
1211 /* Close off all variables that are in_scope */
1212 struct variable *v, **vp, *v2;
1215 for (vp = &c->in_scope;
1216 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1218 if (v->name->var == v) switch (ct) {
1220 case CloseParallel: /* handle PendingScope */
1224 if (c->scope_stack->child_count == 1)
1225 v->scope = PendingScope;
1226 else if (v->previous &&
1227 v->previous->scope == PendingScope)
1228 v->scope = PendingScope;
1229 else if (v->val.type == Tlabel)
1230 v->scope = PendingScope;
1231 else if (v->name->var == v)
1232 v->scope = OutScope;
1233 if (ct == CloseElse) {
1234 /* All Pending variables with this name
1235 * are now Conditional */
1237 v2 && v2->scope == PendingScope;
1239 v2->scope = CondScope;
1244 v2 && v2->scope == PendingScope;
1246 if (v2->val.type != Tlabel)
1247 v2->scope = OutScope;
1249 case OutScope: break;
1252 case CloseSequential:
1253 if (v->val.type == Tlabel)
1254 v->scope = PendingScope;
1257 v->scope = OutScope;
1260 /* There was no 'else', so we can only become
1261 * conditional if we know the cases were exhaustive,
1262 * and that doesn't mean anything yet.
1263 * So only labels become conditional..
1266 v2 && v2->scope == PendingScope;
1268 if (v2->val.type == Tlabel) {
1269 v2->scope = CondScope;
1270 v2->min_depth = c->scope_depth;
1272 v2->scope = OutScope;
1275 case OutScope: break;
1279 if (v->scope == OutScope || v->name->var != v)
1288 Executables can be lots of different things. In many cases an
1289 executable is just an operation combined with one or two other
1290 executables. This allows for expressions and lists etc. Other times
1291 an executable is something quite specific like a constant or variable
1292 name. So we define a `struct exec` to be a general executable with a
1293 type, and a `struct binode` which is a subclass of `exec`, forms a
1294 node in a binary tree, and holds an operation. There will be other
1295 subclasses, and to access these we need to be able to `cast` the
1296 `exec` into the various other types.
1299 #define cast(structname, pointer) ({ \
1300 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1301 if (__mptr && *__mptr != X##structname) abort(); \
1302 (struct structname *)( (char *)__mptr);})
1304 #define new(structname) ({ \
1305 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1306 __ptr->type = X##structname; \
1307 __ptr->line = -1; __ptr->column = -1; \
1310 #define new_pos(structname, token) ({ \
1311 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1312 __ptr->type = X##structname; \
1313 __ptr->line = token.line; __ptr->column = token.col; \
1322 enum exec_types type;
1330 struct exec *left, *right;
1333 ###### ast functions
1335 static int __fput_loc(struct exec *loc, FILE *f)
1339 if (loc->line >= 0) {
1340 fprintf(f, "%d:%d: ", loc->line, loc->column);
1343 if (loc->type == Xbinode)
1344 return __fput_loc(cast(binode,loc)->left, f) ||
1345 __fput_loc(cast(binode,loc)->right, f);
1348 static void fput_loc(struct exec *loc, FILE *f)
1350 if (!__fput_loc(loc, f))
1351 fprintf(f, "??:??: "); // NOTEST
1354 Each different type of `exec` node needs a number of functions
1355 defined, a bit like methods. We must be able to be able to free it,
1356 print it, analyse it and execute it. Once we have specific `exec`
1357 types we will need to parse them too. Let's take this a bit more
1362 The parser generator requires a `free_foo` function for each struct
1363 that stores attributes and they will often be `exec`s and subtypes
1364 there-of. So we need `free_exec` which can handle all the subtypes,
1365 and we need `free_binode`.
1367 ###### ast functions
1369 static void free_binode(struct binode *b)
1374 free_exec(b->right);
1378 ###### core functions
1379 static void free_exec(struct exec *e)
1388 ###### forward decls
1390 static void free_exec(struct exec *e);
1392 ###### free exec cases
1393 case Xbinode: free_binode(cast(binode, e)); break;
1397 Printing an `exec` requires that we know the current indent level for
1398 printing line-oriented components. As will become clear later, we
1399 also want to know what sort of bracketing to use.
1401 ###### ast functions
1403 static void do_indent(int i, char *str)
1410 ###### core functions
1411 static void print_binode(struct binode *b, int indent, int bracket)
1415 ## print binode cases
1419 static void print_exec(struct exec *e, int indent, int bracket)
1425 print_binode(cast(binode, e), indent, bracket); break;
1430 ###### forward decls
1432 static void print_exec(struct exec *e, int indent, int bracket);
1436 As discussed, analysis involves propagating type requirements around
1437 the program and looking for errors.
1439 So `propagate_types` is passed an expected type (being a `struct type`
1440 pointer together with some `val_rules` flags) that the `exec` is
1441 expected to return, and returns the type that it does return, either
1442 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1443 by reference. It is set to `0` when an error is found, and `2` when
1444 any change is made. If it remains unchanged at `1`, then no more
1445 propagation is needed.
1449 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1453 if (rules & Rnolabel)
1454 fputs(" (labels not permitted)", stderr);
1457 ###### core functions
1459 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1460 struct type *type, int rules);
1461 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1462 struct type *type, int rules)
1469 switch (prog->type) {
1472 struct binode *b = cast(binode, prog);
1474 ## propagate binode cases
1478 ## propagate exec cases
1483 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1484 struct type *type, int rules)
1486 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1495 Interpreting an `exec` doesn't require anything but the `exec`. State
1496 is stored in variables and each variable will be directly linked from
1497 within the `exec` tree. The exception to this is the whole `program`
1498 which needs to look at command line arguments. The `program` will be
1499 interpreted separately.
1501 Each `exec` can return a value, which may be `Tnone` but must be
1502 non-NULL; Some `exec`s will return the location of a value, which can
1503 be updates. To support this, each exec case must store either a value
1504 in `val` or the pointer to a value in `lval`. If `lval` is set, but a
1505 simple value is required, `inter_exec()` will dereference `lval` to
1508 ###### core functions
1511 struct value val, *lval;
1514 static struct lrval _interp_exec(struct exec *e);
1516 static struct value interp_exec(struct exec *e)
1518 struct lrval ret = _interp_exec(e);
1521 return dup_value(*ret.lval);
1526 static struct value *linterp_exec(struct exec *e)
1528 struct lrval ret = _interp_exec(e);
1533 static struct lrval _interp_exec(struct exec *e)
1536 struct value rv, *lrv = NULL;
1547 struct binode *b = cast(binode, e);
1548 struct value left, right, *lleft;
1549 left.type = right.type = Tnone;
1551 ## interp binode cases
1553 free_value(left); free_value(right);
1556 ## interp exec cases
1565 Now that we have the shape of the interpreter in place we can add some
1566 complex types and connected them in to the data structures and the
1567 different phases of parse, analyse, print, interpret.
1569 Thus far we have arrays and structs.
1573 Arrays can be declared by giving a size and a type, as `[size]type' so
1574 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1575 size can be an arbitrary expression which is evaluated when the name
1578 Arrays cannot be assigned. When pointers are introduced we will also
1579 introduce array slices which can refer to part or all of an array -
1580 the assignment syntax will create a slice. For now, an array can only
1581 ever be referenced by the name it is declared with. It is likely that
1582 a "`copy`" primitive will eventually be define which can be used to
1583 make a copy of an array with controllable depth.
1585 ###### type union fields
1589 struct variable *vsize;
1590 struct type *member;
1593 ###### value union fields
1595 struct value *elmnts;
1598 ###### value functions
1600 static struct value array_prepare(struct type *type)
1605 ret.array.elmnts = NULL;
1609 static struct value array_init(struct type *type)
1615 if (type->array.vsize) {
1618 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1619 mpq_denref(type->array.vsize->val.num));
1620 type->array.size = mpz_get_si(q);
1623 ret.array.elmnts = calloc(type->array.size,
1624 sizeof(ret.array.elmnts[0]));
1625 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1626 ret.array.elmnts[i] = val_init(type->array.member);
1630 static void array_free(struct value val)
1634 if (val.array.elmnts)
1635 for (i = 0; i < val.type->array.size; i++)
1636 free_value(val.array.elmnts[i]);
1637 free(val.array.elmnts);
1640 static int array_compat(struct type *require, struct type *have)
1642 if (have->compat != require->compat)
1644 /* Both are arrays, so we can look at details */
1645 if (!type_compat(require->array.member, have->array.member, 0))
1647 if (require->array.vsize == NULL && have->array.vsize == NULL)
1648 return require->array.size == have->array.size;
1650 return require->array.vsize == have->array.vsize;
1653 static void array_print_type(struct type *type, FILE *f)
1656 if (type->array.vsize) {
1657 struct binding *b = type->array.vsize->name;
1658 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1660 fprintf(f, "%d]", type->array.size);
1661 type_print(type->array.member, f);
1664 static struct type array_prototype = {
1665 .prepare = array_prepare,
1667 .print_type = array_print_type,
1668 .compat = array_compat,
1674 | [ NUMBER ] Type ${
1675 $0 = calloc(1, sizeof(struct type));
1676 *($0) = array_prototype;
1677 $0->array.member = $<4;
1678 $0->array.vsize = NULL;
1682 if (number_parse(num, tail, $2.txt) == 0)
1683 tok_err(c, "error: unrecognised number", &$2);
1685 tok_err(c, "error: unsupported number suffix", &$2);
1687 $0->array.size = mpz_get_ui(mpq_numref(num));
1688 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1689 tok_err(c, "error: array size must be an integer",
1691 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1692 tok_err(c, "error: array size is too large",
1696 $0->next= c->anon_typelist;
1697 c->anon_typelist = $0;
1701 | [ IDENTIFIER ] Type ${ {
1702 struct variable *v = var_ref(c, $2.txt);
1705 tok_err(c, "error: name undeclared", &$2);
1706 else if (!v->constant)
1707 tok_err(c, "error: array size must be a constant", &$2);
1709 $0 = calloc(1, sizeof(struct type));
1710 *($0) = array_prototype;
1711 $0->array.member = $<4;
1713 $0->array.vsize = v;
1714 $0->next= c->anon_typelist;
1715 c->anon_typelist = $0;
1718 ###### parse context
1720 struct type *anon_typelist;
1722 ###### free context types
1724 while (context.anon_typelist) {
1725 struct type *t = context.anon_typelist;
1727 context.anon_typelist = t->next;
1734 ###### variable grammar
1736 | Variable [ Expression ] ${ {
1737 struct binode *b = new(binode);
1744 ###### print binode cases
1746 print_exec(b->left, -1, bracket);
1748 print_exec(b->right, -1, bracket);
1752 ###### propagate binode cases
1754 /* left must be an array, right must be a number,
1755 * result is the member type of the array
1757 propagate_types(b->right, c, ok, Tnum, 0);
1758 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1759 if (!t || t->compat != array_compat) {
1760 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1763 if (!type_compat(type, t->array.member, rules)) {
1764 type_err(c, "error: have %1 but need %2", prog,
1765 t->array.member, rules, type);
1767 return t->array.member;
1771 ###### interp binode cases
1776 lleft = linterp_exec(b->left);
1777 right = interp_exec(b->right);
1779 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1783 if (i >= 0 && i < lleft->type->array.size)
1784 lrv = &lleft->array.elmnts[i];
1786 rv = val_init(lleft->type->array.member);
1792 A `struct` is a data-type that contains one or more other data-types.
1793 It differs from an array in that each member can be of a different
1794 type, and they are accessed by name rather than by number. Thus you
1795 cannot choose an element by calculation, you need to know what you
1798 The language makes no promises about how a given structure will be
1799 stored in memory - it is free to rearrange fields to suit whatever
1800 criteria seems important.
1802 Structs are declared separately from program code - they cannot be
1803 declared in-line in a variable declaration like arrays can. A struct
1804 is given a name and this name is used to identify the type - the name
1805 is not prefixed by the word `struct` as it would be in C.
1807 Structs are only treated as the same if they have the same name.
1808 Simply having the same fields in the same order is not enough. This
1809 might change once we can create structure initializes from a list of
1812 Each component datum is identified much like a variable is declared,
1813 with a name, one or two colons, and a type. The type cannot be omitted
1814 as there is no opportunity to deduce the type from usage. An initial
1815 value can be given following an equals sign, so
1817 ##### Example: a struct type
1823 would declare a type called "complex" which has two number fields,
1824 each initialised to zero.
1826 Struct will need to be declared separately from the code that uses
1827 them, so we will need to be able to print out the declaration of a
1828 struct when reprinting the whole program. So a `print_type_decl` type
1829 function will be needed.
1831 ###### type union fields
1842 ###### value union fields
1844 struct value *fields;
1847 ###### type functions
1848 void (*print_type_decl)(struct type *type, FILE *f);
1850 ###### value functions
1852 static struct value structure_prepare(struct type *type)
1857 ret.structure.fields = NULL;
1861 static struct value structure_init(struct type *type)
1867 ret.structure.fields = calloc(type->structure.nfields,
1868 sizeof(ret.structure.fields[0]));
1869 for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
1870 ret.structure.fields[i] = val_init(type->structure.fields[i].type);
1874 static void structure_free(struct value val)
1878 if (val.structure.fields)
1879 for (i = 0; i < val.type->structure.nfields; i++)
1880 free_value(val.structure.fields[i]);
1881 free(val.structure.fields);
1884 static void structure_free_type(struct type *t)
1887 for (i = 0; i < t->structure.nfields; i++)
1888 free_value(t->structure.fields[i].init);
1889 free(t->structure.fields);
1892 static struct type structure_prototype = {
1893 .prepare = structure_prepare,
1894 .init = structure_init,
1895 .free = structure_free,
1896 .free_type = structure_free_type,
1897 .print_type_decl = structure_print_type,
1911 ###### free exec cases
1913 free_exec(cast(fieldref, e)->left);
1917 ###### variable grammar
1919 | Variable . IDENTIFIER ${ {
1920 struct fieldref *fr = new_pos(fieldref, $2);
1927 ###### print exec cases
1931 struct fieldref *f = cast(fieldref, e);
1932 print_exec(f->left, -1, bracket);
1933 printf(".%.*s", f->name.len, f->name.txt);
1937 ###### ast functions
1938 static int find_struct_index(struct type *type, struct text field)
1941 for (i = 0; i < type->structure.nfields; i++)
1942 if (text_cmp(type->structure.fields[i].name, field) == 0)
1947 ###### propagate exec cases
1951 struct fieldref *f = cast(fieldref, prog);
1952 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1955 type_err(c, "error: unknown type for field access", f->left,
1957 else if (st->prepare != structure_prepare)
1958 type_err(c, "error: field reference attempted on %1, not a struct",
1959 f->left, st, 0, NULL);
1960 else if (f->index == -2) {
1961 f->index = find_struct_index(st, f->name);
1963 type_err(c, "error: cannot find requested field in %1",
1964 f->left, st, 0, NULL);
1966 if (f->index >= 0) {
1967 struct type *ft = st->structure.fields[f->index].type;
1968 if (!type_compat(type, ft, rules))
1969 type_err(c, "error: have %1 but need %2", prog,
1976 ###### interp exec cases
1979 struct fieldref *f = cast(fieldref, e);
1980 struct value *lleft = linterp_exec(f->left);
1981 lrv = &lleft->structure.fields[f->index];
1987 struct fieldlist *prev;
1991 ###### ast functions
1992 static void free_fieldlist(struct fieldlist *f)
1996 free_fieldlist(f->prev);
1997 free_value(f->f.init);
2001 ###### top level grammar
2002 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2004 add_type(c, $2.txt, &structure_prototype);
2006 struct fieldlist *f;
2008 for (f = $3; f; f=f->prev)
2011 t->structure.nfields = cnt;
2012 t->structure.fields = calloc(cnt, sizeof(struct field));
2016 t->structure.fields[cnt] = f->f;
2017 f->f.init = val_prepare(Tnone);
2023 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2024 | { SimpleFieldList } ${ $0 = $<SFL; }$
2025 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2026 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2028 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2029 | FieldLines SimpleFieldList Newlines ${
2034 SimpleFieldList -> Field ${ $0 = $<F; }$
2035 | SimpleFieldList ; Field ${
2039 | SimpleFieldList ; ${
2042 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2044 Field -> IDENTIFIER : Type = Expression ${ {
2047 $0 = calloc(1, sizeof(struct fieldlist));
2048 $0->f.name = $1.txt;
2050 $0->f.init = val_prepare($0->f.type);
2053 propagate_types($<5, c, &ok, $3, 0);
2058 $0->f.init = interp_exec($5);
2060 | IDENTIFIER : Type ${
2061 $0 = calloc(1, sizeof(struct fieldlist));
2062 $0->f.name = $1.txt;
2064 $0->f.init = val_init($3);
2067 ###### forward decls
2068 static void structure_print_type(struct type *t, FILE *f);
2070 ###### value functions
2071 static void structure_print_type(struct type *t, FILE *f)
2075 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2077 for (i = 0; i < t->structure.nfields; i++) {
2078 struct field *fl = t->structure.fields + i;
2079 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2080 type_print(fl->type, f);
2081 if (fl->init.type->print) {
2083 if (fl->init.type == Tstr)
2085 print_value(fl->init);
2086 if (fl->init.type == Tstr)
2093 ###### print type decls
2098 while (target != 0) {
2100 for (t = context.typelist; t ; t=t->next)
2101 if (t->print_type_decl) {
2110 t->print_type_decl(t, stdout);
2116 ## Executables: the elements of code
2118 Each code element needs to be parsed, printed, analysed,
2119 interpreted, and freed. There are several, so let's just start with
2120 the easy ones and work our way up.
2124 We have already met values as separate objects. When manifest
2125 constants appear in the program text, that must result in an executable
2126 which has a constant value. So the `val` structure embeds a value in
2142 $0 = new_pos(val, $1);
2143 $0->val.type = Tbool;
2147 $0 = new_pos(val, $1);
2148 $0->val.type = Tbool;
2152 $0 = new_pos(val, $1);
2153 $0->val.type = Tnum;
2156 if (number_parse($0->val.num, tail, $1.txt) == 0)
2157 mpq_init($0->val.num);
2159 tok_err(c, "error: unsupported number suffix",
2164 $0 = new_pos(val, $1);
2165 $0->val.type = Tstr;
2168 string_parse(&$1, '\\', &$0->val.str, tail);
2170 tok_err(c, "error: unsupported string suffix",
2175 $0 = new_pos(val, $1);
2176 $0->val.type = Tstr;
2179 string_parse(&$1, '\\', &$0->val.str, tail);
2181 tok_err(c, "error: unsupported string suffix",
2186 ###### print exec cases
2189 struct val *v = cast(val, e);
2190 if (v->val.type == Tstr)
2192 print_value(v->val);
2193 if (v->val.type == Tstr)
2198 ###### propagate exec cases
2201 struct val *val = cast(val, prog);
2202 if (!type_compat(type, val->val.type, rules))
2203 type_err(c, "error: expected %1%r found %2",
2204 prog, type, rules, val->val.type);
2205 return val->val.type;
2208 ###### interp exec cases
2210 rv = dup_value(cast(val, e)->val);
2213 ###### ast functions
2214 static void free_val(struct val *v)
2222 ###### free exec cases
2223 case Xval: free_val(cast(val, e)); break;
2225 ###### ast functions
2226 // Move all nodes from 'b' to 'rv', reversing the order.
2227 // In 'b' 'left' is a list, and 'right' is the last node.
2228 // In 'rv', left' is the first node and 'right' is a list.
2229 static struct binode *reorder_bilist(struct binode *b)
2231 struct binode *rv = NULL;
2234 struct exec *t = b->right;
2238 b = cast(binode, b->left);
2248 Just as we used a `val` to wrap a value into an `exec`, we similarly
2249 need a `var` to wrap a `variable` into an exec. While each `val`
2250 contained a copy of the value, each `var` hold a link to the variable
2251 because it really is the same variable no matter where it appears.
2252 When a variable is used, we need to remember to follow the `->merged`
2253 link to find the primary instance.
2261 struct variable *var;
2267 VariableDecl -> IDENTIFIER : ${ {
2268 struct variable *v = var_decl(c, $1.txt);
2269 $0 = new_pos(var, $1);
2274 v = var_ref(c, $1.txt);
2276 type_err(c, "error: variable '%v' redeclared",
2278 type_err(c, "info: this is where '%v' was first declared",
2279 v->where_decl, NULL, 0, NULL);
2282 | IDENTIFIER :: ${ {
2283 struct variable *v = var_decl(c, $1.txt);
2284 $0 = new_pos(var, $1);
2290 v = var_ref(c, $1.txt);
2292 type_err(c, "error: variable '%v' redeclared",
2294 type_err(c, "info: this is where '%v' was first declared",
2295 v->where_decl, NULL, 0, NULL);
2298 | IDENTIFIER : Type ${ {
2299 struct variable *v = var_decl(c, $1.txt);
2300 $0 = new_pos(var, $1);
2305 v->val = val_prepare($<3);
2307 v = var_ref(c, $1.txt);
2309 type_err(c, "error: variable '%v' redeclared",
2311 type_err(c, "info: this is where '%v' was first declared",
2312 v->where_decl, NULL, 0, NULL);
2315 | IDENTIFIER :: Type ${ {
2316 struct variable *v = var_decl(c, $1.txt);
2317 $0 = new_pos(var, $1);
2322 v->val = val_prepare($<3);
2325 v = var_ref(c, $1.txt);
2327 type_err(c, "error: variable '%v' redeclared",
2329 type_err(c, "info: this is where '%v' was first declared",
2330 v->where_decl, NULL, 0, NULL);
2335 Variable -> IDENTIFIER ${ {
2336 struct variable *v = var_ref(c, $1.txt);
2337 $0 = new_pos(var, $1);
2339 /* This might be a label - allocate a var just in case */
2340 v = var_decl(c, $1.txt);
2342 v->val = val_prepare(Tnone);
2347 cast(var, $0)->var = v;
2352 Type -> IDENTIFIER ${
2353 $0 = find_type(c, $1.txt);
2356 "error: undefined type", &$1);
2363 ###### print exec cases
2366 struct var *v = cast(var, e);
2368 struct binding *b = v->var->name;
2369 printf("%.*s", b->name.len, b->name.txt);
2376 if (loc->type == Xvar) {
2377 struct var *v = cast(var, loc);
2379 struct binding *b = v->var->name;
2380 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2382 fputs("???", stderr); // NOTEST
2384 fputs("NOTVAR", stderr); // NOTEST
2387 ###### propagate exec cases
2391 struct var *var = cast(var, prog);
2392 struct variable *v = var->var;
2394 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2395 return Tnone; // NOTEST
2399 if (v->constant && (rules & Rnoconstant)) {
2400 type_err(c, "error: Cannot assign to a constant: %v",
2401 prog, NULL, 0, NULL);
2402 type_err(c, "info: name was defined as a constant here",
2403 v->where_decl, NULL, 0, NULL);
2406 if (v->val.type == Tnone && v->where_decl == prog)
2407 type_err(c, "error: variable used but not declared: %v",
2408 prog, NULL, 0, NULL);
2409 if (v->val.type == NULL) {
2410 if (type && *ok != 0) {
2411 v->val = val_prepare(type);
2412 v->where_set = prog;
2417 if (!type_compat(type, v->val.type, rules)) {
2418 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2419 type, rules, v->val.type);
2420 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2421 v->val.type, rules, NULL);
2428 ###### interp exec cases
2431 struct var *var = cast(var, e);
2432 struct variable *v = var->var;
2440 ###### ast functions
2442 static void free_var(struct var *v)
2447 ###### free exec cases
2448 case Xvar: free_var(cast(var, e)); break;
2450 ### Expressions: Conditional
2452 Our first user of the `binode` will be conditional expressions, which
2453 is a bit odd as they actually have three components. That will be
2454 handled by having 2 binodes for each expression. The conditional
2455 expression is the lowest precedence operatior, so it gets to define
2456 what an "Expression" is. The next level up is "BoolExpr", which
2459 Conditional expressions are of the form "value `if` condition `else`
2460 other_value". They associate to the right, so everything to the right
2461 of `else` is part of an else value, while only the BoolExpr to the
2462 left of `if` is the if values. Between `if` and `else` there is no
2463 room for ambiguity, so a full conditional expression is allowed in there.
2474 Expression -> Expression if Expression else Expression $$ifelse ${ {
2475 struct binode *b1 = new(binode);
2476 struct binode *b2 = new(binode);
2485 ## expression grammar
2487 ###### print binode cases
2490 b2 = cast(binode, b->right);
2491 if (bracket) printf("(");
2492 print_exec(b2->left, -1, bracket);
2494 print_exec(b->left, -1, bracket);
2496 print_exec(b2->right, -1, bracket);
2497 if (bracket) printf(")");
2500 ###### propagate binode cases
2503 /* cond must be Tbool, others must match */
2504 struct binode *b2 = cast(binode, b->right);
2507 propagate_types(b->left, c, ok, Tbool, 0);
2508 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2509 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2513 ###### interp binode cases
2516 struct binode *b2 = cast(binode, b->right);
2517 left = interp_exec(b->left);
2519 rv = interp_exec(b2->left);
2521 rv = interp_exec(b2->right);
2525 ### Expressions: Boolean
2527 The next class of expressions to use the `binode` will be Boolean
2528 expressions. As I haven't implemented precedence in the parser
2529 generator yet, we need different names for each precedence level used
2530 by expressions. The outer most or lowest level precedence after
2531 conditional expressions are Boolean operators which form an `BoolExpr`
2532 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2533 have `and then` and `or else` which only evaluate the second operand
2534 if the result would make a difference.
2543 ###### expr precedence
2548 ###### expression grammar
2549 | Expression or Expression ${ {
2550 struct binode *b = new(binode);
2556 | Expression or else Expression ${ {
2557 struct binode *b = new(binode);
2564 | Expression and Expression ${ {
2565 struct binode *b = new(binode);
2571 | Expression and then Expression ${ {
2572 struct binode *b = new(binode);
2579 | not Expression ${ {
2580 struct binode *b = new(binode);
2586 ###### print binode cases
2588 if (bracket) printf("(");
2589 print_exec(b->left, -1, bracket);
2591 print_exec(b->right, -1, bracket);
2592 if (bracket) printf(")");
2595 if (bracket) printf("(");
2596 print_exec(b->left, -1, bracket);
2597 printf(" and then ");
2598 print_exec(b->right, -1, bracket);
2599 if (bracket) printf(")");
2602 if (bracket) printf("(");
2603 print_exec(b->left, -1, bracket);
2605 print_exec(b->right, -1, bracket);
2606 if (bracket) printf(")");
2609 if (bracket) printf("(");
2610 print_exec(b->left, -1, bracket);
2611 printf(" or else ");
2612 print_exec(b->right, -1, bracket);
2613 if (bracket) printf(")");
2616 if (bracket) printf("(");
2618 print_exec(b->right, -1, bracket);
2619 if (bracket) printf(")");
2622 ###### propagate binode cases
2628 /* both must be Tbool, result is Tbool */
2629 propagate_types(b->left, c, ok, Tbool, 0);
2630 propagate_types(b->right, c, ok, Tbool, 0);
2631 if (type && type != Tbool)
2632 type_err(c, "error: %1 operation found where %2 expected", prog,
2636 ###### interp binode cases
2638 rv = interp_exec(b->left);
2639 right = interp_exec(b->right);
2640 rv.bool = rv.bool && right.bool;
2643 rv = interp_exec(b->left);
2645 rv = interp_exec(b->right);
2648 rv = interp_exec(b->left);
2649 right = interp_exec(b->right);
2650 rv.bool = rv.bool || right.bool;
2653 rv = interp_exec(b->left);
2655 rv = interp_exec(b->right);
2658 rv = interp_exec(b->right);
2662 ### Expressions: Comparison
2664 Of slightly higher precedence that Boolean expressions are
2666 A comparison takes arguments of any comparable type, but the two types must be
2669 To simplify the parsing we introduce an `eop` which can record an
2670 expression operator.
2677 ###### ast functions
2678 static void free_eop(struct eop *e)
2692 ###### expr precedence
2693 $LEFT < > <= >= == != CMPop
2695 ###### expression grammar
2696 | Expression CMPop Expression ${ {
2697 struct binode *b = new(binode);
2707 CMPop -> < ${ $0.op = Less; }$
2708 | > ${ $0.op = Gtr; }$
2709 | <= ${ $0.op = LessEq; }$
2710 | >= ${ $0.op = GtrEq; }$
2711 | == ${ $0.op = Eql; }$
2712 | != ${ $0.op = NEql; }$
2714 ###### print binode cases
2722 if (bracket) printf("(");
2723 print_exec(b->left, -1, bracket);
2725 case Less: printf(" < "); break;
2726 case LessEq: printf(" <= "); break;
2727 case Gtr: printf(" > "); break;
2728 case GtrEq: printf(" >= "); break;
2729 case Eql: printf(" == "); break;
2730 case NEql: printf(" != "); break;
2731 default: abort(); // NOTEST
2733 print_exec(b->right, -1, bracket);
2734 if (bracket) printf(")");
2737 ###### propagate binode cases
2744 /* Both must match but not be labels, result is Tbool */
2745 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2747 propagate_types(b->right, c, ok, t, 0);
2749 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2751 t = propagate_types(b->left, c, ok, t, 0);
2753 if (!type_compat(type, Tbool, 0))
2754 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2755 Tbool, rules, type);
2758 ###### interp binode cases
2767 left = interp_exec(b->left);
2768 right = interp_exec(b->right);
2769 cmp = value_cmp(left, right);
2772 case Less: rv.bool = cmp < 0; break;
2773 case LessEq: rv.bool = cmp <= 0; break;
2774 case Gtr: rv.bool = cmp > 0; break;
2775 case GtrEq: rv.bool = cmp >= 0; break;
2776 case Eql: rv.bool = cmp == 0; break;
2777 case NEql: rv.bool = cmp != 0; break;
2778 default: rv.bool = 0; break; // NOTEST
2783 ### Expressions: The rest
2785 The remaining expressions with the highest precedence are arithmetic
2786 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2787 The `Factor` is where the `Value` and `Variable` that we already have
2790 `+` and `-` are both infix and prefix operations (where they are
2791 absolute value and negation). These have different operator names.
2793 We also have a 'Bracket' operator which records where parentheses were
2794 found. This makes it easy to reproduce these when printing. Once
2795 precedence is handled better I might be able to discard this.
2804 ###### expr precedence
2810 ###### expression grammar
2811 | Expression Eop Expression ${ {
2812 struct binode *b = new(binode);
2819 | Expression Top Expression ${ {
2820 struct binode *b = new(binode);
2827 | ( Expression ) ${ {
2828 struct binode *b = new_pos(binode, $1);
2833 | Uop Expression ${ {
2834 struct binode *b = new(binode);
2839 | Value ${ $0 = $<1; }$
2840 | Variable ${ $0 = $<1; }$
2843 Eop -> + ${ $0.op = Plus; }$
2844 | - ${ $0.op = Minus; }$
2846 Uop -> + ${ $0.op = Absolute; }$
2847 | - ${ $0.op = Negate; }$
2849 Top -> * ${ $0.op = Times; }$
2850 | / ${ $0.op = Divide; }$
2851 | % ${ $0.op = Rem; }$
2852 | ++ ${ $0.op = Concat; }$
2854 ###### print binode cases
2861 if (bracket) printf("(");
2862 print_exec(b->left, indent, bracket);
2864 case Plus: fputs(" + ", stdout); break;
2865 case Minus: fputs(" - ", stdout); break;
2866 case Times: fputs(" * ", stdout); break;
2867 case Divide: fputs(" / ", stdout); break;
2868 case Rem: fputs(" % ", stdout); break;
2869 case Concat: fputs(" ++ ", stdout); break;
2870 default: abort(); // NOTEST
2872 print_exec(b->right, indent, bracket);
2873 if (bracket) printf(")");
2876 if (bracket) printf("(");
2878 print_exec(b->right, indent, bracket);
2879 if (bracket) printf(")");
2882 if (bracket) printf("(");
2884 print_exec(b->right, indent, bracket);
2885 if (bracket) printf(")");
2889 print_exec(b->right, indent, bracket);
2893 ###### propagate binode cases
2899 /* both must be numbers, result is Tnum */
2902 /* as propagate_types ignores a NULL,
2903 * unary ops fit here too */
2904 propagate_types(b->left, c, ok, Tnum, 0);
2905 propagate_types(b->right, c, ok, Tnum, 0);
2906 if (!type_compat(type, Tnum, 0))
2907 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2912 /* both must be Tstr, result is Tstr */
2913 propagate_types(b->left, c, ok, Tstr, 0);
2914 propagate_types(b->right, c, ok, Tstr, 0);
2915 if (!type_compat(type, Tstr, 0))
2916 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2921 return propagate_types(b->right, c, ok, type, 0);
2923 ###### interp binode cases
2926 rv = interp_exec(b->left);
2927 right = interp_exec(b->right);
2928 mpq_add(rv.num, rv.num, right.num);
2931 rv = interp_exec(b->left);
2932 right = interp_exec(b->right);
2933 mpq_sub(rv.num, rv.num, right.num);
2936 rv = interp_exec(b->left);
2937 right = interp_exec(b->right);
2938 mpq_mul(rv.num, rv.num, right.num);
2941 rv = interp_exec(b->left);
2942 right = interp_exec(b->right);
2943 mpq_div(rv.num, rv.num, right.num);
2948 left = interp_exec(b->left);
2949 right = interp_exec(b->right);
2950 mpz_init(l); mpz_init(r); mpz_init(rem);
2951 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2952 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2953 mpz_tdiv_r(rem, l, r);
2954 rv = val_init(Tnum);
2955 mpq_set_z(rv.num, rem);
2956 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2960 rv = interp_exec(b->right);
2961 mpq_neg(rv.num, rv.num);
2964 rv = interp_exec(b->right);
2965 mpq_abs(rv.num, rv.num);
2968 rv = interp_exec(b->right);
2971 left = interp_exec(b->left);
2972 right = interp_exec(b->right);
2974 rv.str = text_join(left.str, right.str);
2977 ###### value functions
2979 static struct text text_join(struct text a, struct text b)
2982 rv.len = a.len + b.len;
2983 rv.txt = malloc(rv.len);
2984 memcpy(rv.txt, a.txt, a.len);
2985 memcpy(rv.txt+a.len, b.txt, b.len);
2989 ### Blocks, Statements, and Statement lists.
2991 Now that we have expressions out of the way we need to turn to
2992 statements. There are simple statements and more complex statements.
2993 Simple statements do not contain (syntactic) newlines, complex statements do.
2995 Statements often come in sequences and we have corresponding simple
2996 statement lists and complex statement lists.
2997 The former comprise only simple statements separated by semicolons.
2998 The later comprise complex statements and simple statement lists. They are
2999 separated by newlines. Thus the semicolon is only used to separate
3000 simple statements on the one line. This may be overly restrictive,
3001 but I'm not sure I ever want a complex statement to share a line with
3004 Note that a simple statement list can still use multiple lines if
3005 subsequent lines are indented, so
3007 ###### Example: wrapped simple statement list
3012 is a single simple statement list. This might allow room for
3013 confusion, so I'm not set on it yet.
3015 A simple statement list needs no extra syntax. A complex statement
3016 list has two syntactic forms. It can be enclosed in braces (much like
3017 C blocks), or it can be introduced by a colon and continue until an
3018 unindented newline (much like Python blocks). With this extra syntax
3019 it is referred to as a block.
3021 Note that a block does not have to include any newlines if it only
3022 contains simple statements. So both of:
3024 if condition: a=b; d=f
3026 if condition { a=b; print f }
3030 In either case the list is constructed from a `binode` list with
3031 `Block` as the operator. When parsing the list it is most convenient
3032 to append to the end, so a list is a list and a statement. When using
3033 the list it is more convenient to consider a list to be a statement
3034 and a list. So we need a function to re-order a list.
3035 `reorder_bilist` serves this purpose.
3037 The only stand-alone statement we introduce at this stage is `pass`
3038 which does nothing and is represented as a `NULL` pointer in a `Block`
3039 list. Other stand-alone statements will follow once the infrastructure
3045 ###### expr precedence
3051 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3052 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3053 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3054 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3055 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3057 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3058 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3059 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3060 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3061 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3063 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3064 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3065 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3067 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3068 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3069 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3070 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3071 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3073 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3075 ComplexStatements -> ComplexStatements ComplexStatement ${
3085 | ComplexStatement ${
3097 ComplexStatement -> SimpleStatements Newlines ${
3098 $0 = reorder_bilist($<SS);
3100 | SimpleStatements ; Newlines ${
3101 $0 = reorder_bilist($<SS);
3103 ## ComplexStatement Grammar
3106 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3112 | SimpleStatement ${
3119 SimpleStatement -> pass ${ $0 = NULL; }$
3120 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3121 ## SimpleStatement Grammar
3123 ###### print binode cases
3127 if (b->left == NULL)
3130 print_exec(b->left, indent, bracket);
3133 print_exec(b->right, indent, bracket);
3136 // block, one per line
3137 if (b->left == NULL)
3138 do_indent(indent, "pass\n");
3140 print_exec(b->left, indent, bracket);
3142 print_exec(b->right, indent, bracket);
3146 ###### propagate binode cases
3149 /* If any statement returns something other than Tnone
3150 * or Tbool then all such must return same type.
3151 * As each statement may be Tnone or something else,
3152 * we must always pass NULL (unknown) down, otherwise an incorrect
3153 * error might occur. We never return Tnone unless it is
3158 for (e = b; e; e = cast(binode, e->right)) {
3159 t = propagate_types(e->left, c, ok, NULL, rules);
3160 if ((rules & Rboolok) && t == Tbool)
3162 if (t && t != Tnone && t != Tbool) {
3166 type_err(c, "error: expected %1%r, found %2",
3167 e->left, type, rules, t);
3173 ###### interp binode cases
3175 while (rv.type == Tnone &&
3178 rv = interp_exec(b->left);
3179 b = cast(binode, b->right);
3183 ### The Print statement
3185 `print` is a simple statement that takes a comma-separated list of
3186 expressions and prints the values separated by spaces and terminated
3187 by a newline. No control of formatting is possible.
3189 `print` faces the same list-ordering issue as blocks, and uses the
3195 ##### expr precedence
3198 ###### SimpleStatement Grammar
3200 | print ExpressionList ${
3201 $0 = reorder_bilist($<2);
3203 | print ExpressionList , ${
3208 $0 = reorder_bilist($0);
3219 ExpressionList -> ExpressionList , Expression ${
3232 ###### print binode cases
3235 do_indent(indent, "print");
3239 print_exec(b->left, -1, bracket);
3243 b = cast(binode, b->right);
3249 ###### propagate binode cases
3252 /* don't care but all must be consistent */
3253 propagate_types(b->left, c, ok, NULL, Rnolabel);
3254 propagate_types(b->right, c, ok, NULL, Rnolabel);
3257 ###### interp binode cases
3263 for ( ; b; b = cast(binode, b->right))
3267 left = interp_exec(b->left);
3280 ###### Assignment statement
3282 An assignment will assign a value to a variable, providing it hasn't
3283 be declared as a constant. The analysis phase ensures that the type
3284 will be correct so the interpreter just needs to perform the
3285 calculation. There is a form of assignment which declares a new
3286 variable as well as assigning a value. If a name is assigned before
3287 it is declared, and error will be raised as the name is created as
3288 `Tlabel` and it is illegal to assign to such names.
3294 ###### SimpleStatement Grammar
3295 | Variable = Expression ${
3301 | VariableDecl = Expression ${
3309 if ($1->var->where_set == NULL) {
3311 "Variable declared with no type or value: %v",
3321 ###### print binode cases
3324 do_indent(indent, "");
3325 print_exec(b->left, indent, bracket);
3327 print_exec(b->right, indent, bracket);
3334 struct variable *v = cast(var, b->left)->var;
3335 do_indent(indent, "");
3336 print_exec(b->left, indent, bracket);
3337 if (cast(var, b->left)->var->constant) {
3338 if (v->where_decl == v->where_set) {
3340 type_print(v->val.type, stdout);
3345 if (v->where_decl == v->where_set) {
3347 type_print(v->val.type, stdout);
3354 print_exec(b->right, indent, bracket);
3361 ###### propagate binode cases
3365 /* Both must match and not be labels,
3366 * Type must support 'dup',
3367 * For Assign, left must not be constant.
3370 t = propagate_types(b->left, c, ok, NULL,
3371 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3376 if (propagate_types(b->right, c, ok, t, 0) != t)
3377 if (b->left->type == Xvar)
3378 type_err(c, "info: variable '%v' was set as %1 here.",
3379 cast(var, b->left)->var->where_set, t, rules, NULL);
3381 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3383 propagate_types(b->left, c, ok, t,
3384 (b->op == Assign ? Rnoconstant : 0));
3386 if (t && t->dup == NULL)
3387 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3392 ###### interp binode cases
3395 lleft = linterp_exec(b->left);
3396 right = interp_exec(b->right);
3401 free_value(right); // NOTEST
3407 struct variable *v = cast(var, b->left)->var;
3411 right = interp_exec(b->right);
3413 right = val_init(v->val.type);
3420 ### The `use` statement
3422 The `use` statement is the last "simple" statement. It is needed when
3423 the condition in a conditional statement is a block. `use` works much
3424 like `return` in C, but only completes the `condition`, not the whole
3430 ###### expr precedence
3433 ###### SimpleStatement Grammar
3435 $0 = new_pos(binode, $1);
3438 if ($0->right->type == Xvar) {
3439 struct var *v = cast(var, $0->right);
3440 if (v->var->val.type == Tnone) {
3441 /* Convert this to a label */
3442 v->var->val = val_prepare(Tlabel);
3443 v->var->val.label = &v->var->val;
3448 ###### print binode cases
3451 do_indent(indent, "use ");
3452 print_exec(b->right, -1, bracket);
3457 ###### propagate binode cases
3460 /* result matches value */
3461 return propagate_types(b->right, c, ok, type, 0);
3463 ###### interp binode cases
3466 rv = interp_exec(b->right);
3469 ### The Conditional Statement
3471 This is the biggy and currently the only complex statement. This
3472 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3473 It is comprised of a number of parts, all of which are optional though
3474 set combinations apply. Each part is (usually) a key word (`then` is
3475 sometimes optional) followed by either an expression or a code block,
3476 except the `casepart` which is a "key word and an expression" followed
3477 by a code block. The code-block option is valid for all parts and,
3478 where an expression is also allowed, the code block can use the `use`
3479 statement to report a value. If the code block does not report a value
3480 the effect is similar to reporting `True`.
3482 The `else` and `case` parts, as well as `then` when combined with
3483 `if`, can contain a `use` statement which will apply to some
3484 containing conditional statement. `for` parts, `do` parts and `then`
3485 parts used with `for` can never contain a `use`, except in some
3486 subordinate conditional statement.
3488 If there is a `forpart`, it is executed first, only once.
3489 If there is a `dopart`, then it is executed repeatedly providing
3490 always that the `condpart` or `cond`, if present, does not return a non-True
3491 value. `condpart` can fail to return any value if it simply executes
3492 to completion. This is treated the same as returning `True`.
3494 If there is a `thenpart` it will be executed whenever the `condpart`
3495 or `cond` returns True (or does not return any value), but this will happen
3496 *after* `dopart` (when present).
3498 If `elsepart` is present it will be executed at most once when the
3499 condition returns `False` or some value that isn't `True` and isn't
3500 matched by any `casepart`. If there are any `casepart`s, they will be
3501 executed when the condition returns a matching value.
3503 The particular sorts of values allowed in case parts has not yet been
3504 determined in the language design, so nothing is prohibited.
3506 The various blocks in this complex statement potentially provide scope
3507 for variables as described earlier. Each such block must include the
3508 "OpenScope" nonterminal before parsing the block, and must call
3509 `var_block_close()` when closing the block.
3511 The code following "`if`", "`switch`" and "`for`" does not get its own
3512 scope, but is in a scope covering the whole statement, so names
3513 declared there cannot be redeclared elsewhere. Similarly the
3514 condition following "`while`" is in a scope the covers the body
3515 ("`do`" part) of the loop, and which does not allow conditional scope
3516 extension. Code following "`then`" (both looping and non-looping),
3517 "`else`" and "`case`" each get their own local scope.
3519 The type requirements on the code block in a `whilepart` are quite
3520 unusal. It is allowed to return a value of some identifiable type, in
3521 which case the loop aborts and an appropriate `casepart` is run, or it
3522 can return a Boolean, in which case the loop either continues to the
3523 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3524 This is different both from the `ifpart` code block which is expected to
3525 return a Boolean, or the `switchpart` code block which is expected to
3526 return the same type as the casepart values. The correct analysis of
3527 the type of the `whilepart` code block is the reason for the
3528 `Rboolok` flag which is passed to `propagate_types()`.
3530 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3539 struct exec *action;
3540 struct casepart *next;
3542 struct cond_statement {
3544 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3545 struct casepart *casepart;
3548 ###### ast functions
3550 static void free_casepart(struct casepart *cp)
3554 free_exec(cp->value);
3555 free_exec(cp->action);
3562 static void free_cond_statement(struct cond_statement *s)
3566 free_exec(s->forpart);
3567 free_exec(s->condpart);
3568 free_exec(s->dopart);
3569 free_exec(s->thenpart);
3570 free_exec(s->elsepart);
3571 free_casepart(s->casepart);
3575 ###### free exec cases
3576 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3578 ###### ComplexStatement Grammar
3579 | CondStatement ${ $0 = $<1; }$
3581 ###### expr precedence
3582 $TERM for then while do
3589 // A CondStatement must end with EOL, as does CondSuffix and
3591 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3592 // may or may not end with EOL
3593 // WhilePart and IfPart include an appropriate Suffix
3596 // Both ForPart and Whilepart open scopes, and CondSuffix only
3597 // closes one - so in the first branch here we have another to close.
3598 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3601 $0->thenpart = $<TP;
3602 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3603 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3604 var_block_close(c, CloseSequential);
3606 | ForPart OptNL WhilePart CondSuffix ${
3609 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3610 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3611 var_block_close(c, CloseSequential);
3613 | WhilePart CondSuffix ${
3615 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3616 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3618 | SwitchPart OptNL CasePart CondSuffix ${
3620 $0->condpart = $<SP;
3621 $CP->next = $0->casepart;
3622 $0->casepart = $<CP;
3624 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3626 $0->condpart = $<SP;
3627 $CP->next = $0->casepart;
3628 $0->casepart = $<CP;
3630 | IfPart IfSuffix ${
3632 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3633 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3634 // This is where we close an "if" statement
3635 var_block_close(c, CloseSequential);
3638 CondSuffix -> IfSuffix ${
3640 // This is where we close scope of the whole
3641 // "for" or "while" statement
3642 var_block_close(c, CloseSequential);
3644 | Newlines CasePart CondSuffix ${
3646 $CP->next = $0->casepart;
3647 $0->casepart = $<CP;
3649 | CasePart CondSuffix ${
3651 $CP->next = $0->casepart;
3652 $0->casepart = $<CP;
3655 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3656 | Newlines ElsePart ${ $0 = $<EP; }$
3657 | ElsePart ${$0 = $<EP; }$
3659 ElsePart -> else OpenBlock Newlines ${
3660 $0 = new(cond_statement);
3661 $0->elsepart = $<OB;
3662 var_block_close(c, CloseElse);
3664 | else OpenScope CondStatement ${
3665 $0 = new(cond_statement);
3666 $0->elsepart = $<CS;
3667 var_block_close(c, CloseElse);
3671 CasePart -> case Expression OpenScope ColonBlock ${
3672 $0 = calloc(1,sizeof(struct casepart));
3675 var_block_close(c, CloseParallel);
3679 // These scopes are closed in CondSuffix
3680 ForPart -> for OpenBlock ${
3684 ThenPart -> then OpenBlock ${
3686 var_block_close(c, CloseSequential);
3690 // This scope is closed in CondSuffix
3691 WhilePart -> while UseBlock OptNL do Block ${
3695 | while OpenScope Expression ColonBlock ${
3696 $0.condpart = $<Exp;
3700 IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
3704 | if OpenScope Expression OpenScope ColonBlock ClosePara ${
3708 | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
3714 // This scope is closed in CondSuffix
3715 SwitchPart -> switch OpenScope Expression ${
3718 | switch UseBlock ${
3722 ###### print exec cases
3724 case Xcond_statement:
3726 struct cond_statement *cs = cast(cond_statement, e);
3727 struct casepart *cp;
3729 do_indent(indent, "for");
3730 if (bracket) printf(" {\n"); else printf("\n");
3731 print_exec(cs->forpart, indent+1, bracket);
3734 do_indent(indent, "} then {\n");
3736 do_indent(indent, "then\n");
3737 print_exec(cs->thenpart, indent+1, bracket);
3739 if (bracket) do_indent(indent, "}\n");
3743 if (cs->condpart && cs->condpart->type == Xbinode &&
3744 cast(binode, cs->condpart)->op == Block) {
3746 do_indent(indent, "while {\n");
3748 do_indent(indent, "while\n");
3749 print_exec(cs->condpart, indent+1, bracket);
3751 do_indent(indent, "} do {\n");
3753 do_indent(indent, "do\n");
3754 print_exec(cs->dopart, indent+1, bracket);
3756 do_indent(indent, "}\n");
3758 do_indent(indent, "while ");
3759 print_exec(cs->condpart, 0, bracket);
3764 print_exec(cs->dopart, indent+1, bracket);
3766 do_indent(indent, "}\n");
3771 do_indent(indent, "switch");
3773 do_indent(indent, "if");
3774 if (cs->condpart && cs->condpart->type == Xbinode &&
3775 cast(binode, cs->condpart)->op == Block) {
3780 print_exec(cs->condpart, indent+1, bracket);
3782 do_indent(indent, "}\n");
3784 do_indent(indent, "then:\n");
3785 print_exec(cs->thenpart, indent+1, bracket);
3789 print_exec(cs->condpart, 0, bracket);
3795 print_exec(cs->thenpart, indent+1, bracket);
3797 do_indent(indent, "}\n");
3802 for (cp = cs->casepart; cp; cp = cp->next) {
3803 do_indent(indent, "case ");
3804 print_exec(cp->value, -1, 0);
3809 print_exec(cp->action, indent+1, bracket);
3811 do_indent(indent, "}\n");
3814 do_indent(indent, "else");
3819 print_exec(cs->elsepart, indent+1, bracket);
3821 do_indent(indent, "}\n");
3826 ###### propagate exec cases
3827 case Xcond_statement:
3829 // forpart and dopart must return Tnone
3830 // thenpart must return Tnone if there is a dopart,
3831 // otherwise it is like elsepart.
3833 // be bool if there is no casepart
3834 // match casepart->values if there is a switchpart
3835 // either be bool or match casepart->value if there
3837 // elsepart and casepart->action must match the return type
3838 // expected of this statement.
3839 struct cond_statement *cs = cast(cond_statement, prog);
3840 struct casepart *cp;
3842 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3843 if (!type_compat(Tnone, t, 0))
3845 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3846 if (!type_compat(Tnone, t, 0))
3849 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3850 if (!type_compat(Tnone, t, 0))
3853 if (cs->casepart == NULL)
3854 propagate_types(cs->condpart, c, ok, Tbool, 0);
3856 /* Condpart must match case values, with bool permitted */
3858 for (cp = cs->casepart;
3859 cp && !t; cp = cp->next)
3860 t = propagate_types(cp->value, c, ok, NULL, 0);
3861 if (!t && cs->condpart)
3862 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3863 // Now we have a type (I hope) push it down
3865 for (cp = cs->casepart; cp; cp = cp->next)
3866 propagate_types(cp->value, c, ok, t, 0);
3867 propagate_types(cs->condpart, c, ok, t, Rboolok);
3870 // (if)then, else, and case parts must return expected type.
3871 if (!cs->dopart && !type)
3872 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3874 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3875 for (cp = cs->casepart;
3878 type = propagate_types(cp->action, c, ok, NULL, rules);
3881 propagate_types(cs->thenpart, c, ok, type, rules);
3882 propagate_types(cs->elsepart, c, ok, type, rules);
3883 for (cp = cs->casepart; cp ; cp = cp->next)
3884 propagate_types(cp->action, c, ok, type, rules);
3890 ###### interp exec cases
3891 case Xcond_statement:
3893 struct value v, cnd;
3894 struct casepart *cp;
3895 struct cond_statement *c = cast(cond_statement, e);
3898 interp_exec(c->forpart);
3901 cnd = interp_exec(c->condpart);
3904 if (!(cnd.type == Tnone ||
3905 (cnd.type == Tbool && cnd.bool != 0)))
3907 // cnd is Tnone or Tbool, doesn't need to be freed
3909 interp_exec(c->dopart);
3912 rv = interp_exec(c->thenpart);
3913 if (rv.type != Tnone || !c->dopart)
3917 } while (c->dopart);
3919 for (cp = c->casepart; cp; cp = cp->next) {
3920 v = interp_exec(cp->value);
3921 if (value_cmp(v, cnd) == 0) {
3924 rv = interp_exec(cp->action);
3931 rv = interp_exec(c->elsepart);
3938 ### Top level structure
3940 All the language elements so far can be used in various places. Now
3941 it is time to clarify what those places are.
3943 At the top level of a file there will be a number of declarations.
3944 Many of the things that can be declared haven't been described yet,
3945 such as functions, procedures, imports, and probably more.
3946 For now there are two sorts of things that can appear at the top
3947 level. They are predefined constants, `struct` types, and the main
3948 program. While the syntax will allow the main program to appear
3949 multiple times, that will trigger an error if it is actually attempted.
3951 The various declarations do not return anything. They store the
3952 various declarations in the parse context.
3954 ###### Parser: grammar
3957 Ocean -> OptNL DeclarationList
3964 DeclarationList -> Declaration
3965 | DeclarationList Declaration
3967 Declaration -> ERROR Newlines ${
3969 "error: unhandled parse error", &$1);
3975 ## top level grammar
3977 ### The `const` section
3979 As well as being defined in with the code that uses them, constants
3980 can be declared at the top level. These have full-file scope, so they
3981 are always `InScope`. The value of a top level constant can be given
3982 as an expression, and this is evaluated immediately rather than in the
3983 later interpretation stage. Once we add functions to the language, we
3984 will need rules concern which, if any, can be used to define a top
3987 Constants are defined in a section that starts with the reserved word
3988 `const` and then has a block with a list of assignment statements.
3989 For syntactic consistency, these must use the double-colon syntax to
3990 make it clear that they are constants. Type can also be given: if
3991 not, the type will be determined during analysis, as with other
3994 As the types constants are inserted at the head of a list, printing
3995 them in the same order that they were read is not straight forward.
3996 We take a quadratic approach here and count the number of constants
3997 (variables of depth 0), then count down from there, each time
3998 searching through for the Nth constant for decreasing N.
4000 ###### top level grammar
4002 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4003 | const { SimpleConstList } Newlines
4004 | const IN OptNL ConstList OUT Newlines
4005 | const SimpleConstList Newlines
4007 ConstList -> ConstList SimpleConstLine
4009 SimpleConstList -> SimpleConstList ; Const
4012 SimpleConstLine -> SimpleConstList Newlines
4013 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4016 CType -> Type ${ $0 = $<1; }$
4019 Const -> IDENTIFIER :: CType = Expression ${ {
4023 v = var_decl(c, $1.txt);
4025 struct var *var = new_pos(var, $1);
4026 v->where_decl = var;
4031 v = var_ref(c, $1.txt);
4032 tok_err(c, "error: name already declared", &$1);
4033 type_err(c, "info: this is where '%v' was first declared",
4034 v->where_decl, NULL, 0, NULL);
4038 propagate_types($5, c, &ok, $3, 0);
4043 v->val = interp_exec($5);
4047 ###### print const decls
4052 while (target != 0) {
4054 for (v = context.in_scope; v; v=v->in_scope)
4055 if (v->depth == 0) {
4066 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4067 type_print(v->val.type, stdout);
4069 if (v->val.type == Tstr)
4071 print_value(v->val);
4072 if (v->val.type == Tstr)
4080 ### Finally the whole program.
4082 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4083 the keyword "program" and a list of variable names which are assigned
4084 values from command line arguments. Following this is a `block` which
4085 is the code to execute. Unlike Pascal, constants and other
4086 declarations come *before* the program.
4088 As this is the top level, several things are handled a bit
4090 The whole program is not interpreted by `interp_exec` as that isn't
4091 passed the argument list which the program requires. Similarly type
4092 analysis is a bit more interesting at this level.
4097 ###### top level grammar
4099 DeclareProgram -> Program ${ {
4101 type_err(c, "Program defined a second time",
4108 Program -> program OpenScope Varlist ColonBlock Newlines ${
4111 $0->left = reorder_bilist($<Vl);
4113 var_block_close(c, CloseSequential);
4114 if (c->scope_stack && !c->parse_error) abort();
4117 Varlist -> Varlist ArgDecl ${
4126 ArgDecl -> IDENTIFIER ${ {
4127 struct variable *v = var_decl(c, $1.txt);
4134 ###### print binode cases
4136 do_indent(indent, "program");
4137 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4139 print_exec(b2->left, 0, 0);
4145 print_exec(b->right, indent+1, bracket);
4147 do_indent(indent, "}\n");
4150 ###### propagate binode cases
4151 case Program: abort(); // NOTEST
4153 ###### core functions
4155 static int analyse_prog(struct exec *prog, struct parse_context *c)
4157 struct binode *b = cast(binode, prog);
4164 propagate_types(b->right, c, &ok, Tnone, 0);
4169 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4170 struct var *v = cast(var, b->left);
4171 if (!v->var->val.type) {
4172 v->var->where_set = b;
4173 v->var->val = val_prepare(Tstr);
4176 b = cast(binode, prog);
4179 propagate_types(b->right, c, &ok, Tnone, 0);
4184 /* Make sure everything is still consistent */
4185 propagate_types(b->right, c, &ok, Tnone, 0);
4189 static void interp_prog(struct exec *prog, char **argv)
4191 struct binode *p = cast(binode, prog);
4197 al = cast(binode, p->left);
4199 struct var *v = cast(var, al->left);
4200 struct value *vl = &v->var->val;
4202 if (argv[0] == NULL) {
4203 printf("Not enough args\n");
4206 al = cast(binode, al->right);
4208 *vl = parse_value(vl->type, argv[0]);
4209 if (vl->type == NULL)
4213 v = interp_exec(p->right);
4217 ###### interp binode cases
4218 case Program: abort(); // NOTEST
4220 ## And now to test it out.
4222 Having a language requires having a "hello world" program. I'll
4223 provide a little more than that: a program that prints "Hello world"
4224 finds the GCD of two numbers, prints the first few elements of
4225 Fibonacci, performs a binary search for a number, and a few other
4226 things which will likely grow as the languages grows.
4228 ###### File: oceani.mk
4231 @echo "===== DEMO ====="
4232 ./oceani --section "demo: hello" oceani.mdc 55 33
4238 four ::= 2 + 2 ; five ::= 10/2
4239 const pie ::= "I like Pie";
4240 cake ::= "The cake is"
4249 print "Hello World, what lovely oceans you have!"
4250 print "Are there", five, "?"
4251 print pi, pie, "but", cake
4253 /* When a variable is defined in both branches of an 'if',
4254 * and used afterwards, the variables are merged.
4260 print "Is", A, "bigger than", B,"? ", bigger
4261 /* If a variable is not used after the 'if', no
4262 * merge happens, so types can be different
4265 double:string = "yes"
4266 print A, "is more than twice", B, "?", double
4269 print "double", B, "is", double
4274 if a > 0 and then b > 0:
4280 print "GCD of", A, "and", B,"is", a
4282 print a, "is not positive, cannot calculate GCD"
4284 print b, "is not positive, cannot calculate GCD"
4289 print "Fibonacci:", f1,f2,
4290 then togo = togo - 1
4298 /* Binary search... */
4303 mid := (lo + hi) / 2
4315 print "Yay, I found", target
4317 print "Closest I found was", mid
4322 // "middle square" PRNG. Not particularly good, but one my
4323 // Dad taught me - the first one I ever heard of.
4324 for i:=1; then i = i + 1; while i < size:
4325 n := list[i-1] * list[i-1]
4326 list[i] = (n / 100) % 10 000
4328 print "Before sort:",
4329 for i:=0; then i = i + 1; while i < size:
4333 for i := 1; then i=i+1; while i < size:
4334 for j:=i-1; then j=j-1; while j >= 0:
4335 if list[j] > list[j+1]:
4339 print " After sort:",
4340 for i:=0; then i = i + 1; while i < size:
4344 if 1 == 2 then print "yes"; else print "no"
4348 bob.alive = (bob.name == "Hello")
4349 print "bob", "is" if bob.alive else "isn't", "alive"