1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
167 struct section *s, *ss;
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
180 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
183 case 't': dotrace=1; break;
184 case 'p': doprint=1; break;
185 case 'n': doexec=0; break;
186 case 'b': brackets=1; break;
187 case 's': section = optarg; break;
188 default: fprintf(stderr, Usage);
192 if (optind >= argc) {
193 fprintf(stderr, "oceani: no input file given\n");
196 fd = open(argv[optind], O_RDONLY);
198 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
201 context.file_name = argv[optind];
202 len = lseek(fd, 0, 2);
203 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
204 s = code_extract(file, file+len, NULL);
206 fprintf(stderr, "oceani: could not find any code in %s\n",
211 ## context initialization
214 for (ss = s; ss; ss = ss->next) {
215 struct text sec = ss->section;
216 if (sec.len == strlen(section) &&
217 strncmp(sec.txt, section, sec.len) == 0)
221 fprintf(stderr, "oceani: cannot find section %s\n",
227 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
230 fprintf(stderr, "oceani: no program found.\n");
231 context.parse_error = 1;
233 if (context.prog && doprint) {
236 print_exec(context.prog, 0, brackets);
238 if (context.prog && doexec && !context.parse_error) {
239 if (!analyse_prog(context.prog, &context)) {
240 fprintf(stderr, "oceani: type error in program - not running.\n");
243 interp_prog(context.prog, argv+optind+1);
245 free_exec(context.prog);
248 struct section *t = s->next;
254 ## free context types
255 exit(context.parse_error ? 1 : 0);
260 The four requirements of parse, analyse, print, interpret apply to
261 each language element individually so that is how most of the code
264 Three of the four are fairly self explanatory. The one that requires
265 a little explanation is the analysis step.
267 The current language design does not require the types of variables to
268 be declared, but they must still have a single type. Different
269 operations impose different requirements on the variables, for example
270 addition requires both arguments to be numeric, and assignment
271 requires the variable on the left to have the same type as the
272 expression on the right.
274 Analysis involves propagating these type requirements around and
275 consequently setting the type of each variable. If any requirements
276 are violated (e.g. a string is compared with a number) or if a
277 variable needs to have two different types, then an error is raised
278 and the program will not run.
280 If the same variable is declared in both branchs of an 'if/else', or
281 in all cases of a 'switch' then the multiple instances may be merged
282 into just one variable if the variable is references after the
283 conditional statement. When this happens, the types must naturally be
284 consistent across all the branches. When the variable is not used
285 outside the if, the variables in the different branches are distinct
286 and can be of different types.
288 Determining the types of all variables early is important for
289 processing command line arguments. These can be assigned to any of
290 several types of variable, but we must first know the correct type so
291 any required conversion can happen. If a variable is associated with
292 a command line argument but no type can be interpreted (e.g. the
293 variable is only ever used in a `print` statement), then the type is
296 Undeclared names may only appear in "use" statements and "case" expressions.
297 These names are given a type of "label" and a unique value.
298 This allows them to fill the role of a name in an enumerated type, which
299 is useful for testing the `switch` statement.
301 As we will see, the condition part of a `while` statement can return
302 either a Boolean or some other type. This requires that the expected
303 type that gets passed around comprises a type and a flag to indicate
304 that `Tbool` is also permitted.
306 As there are, as yet, no distinct types that are compatible, there
307 isn't much subtlety in the analysis. When we have distinct number
308 types, this will become more interesting.
312 When analysis discovers an inconsistency it needs to report an error;
313 just refusing to run the code ensures that the error doesn't cascade,
314 but by itself it isn't very useful. A clear understanding of the sort
315 of error message that are useful will help guide the process of
318 At a simplistic level, the only sort of error that type analysis can
319 report is that the type of some construct doesn't match a contextual
320 requirement. For example, in `4 + "hello"` the addition provides a
321 contextual requirement for numbers, but `"hello"` is not a number. In
322 this particular example no further information is needed as the types
323 are obvious from local information. When a variable is involved that
324 isn't the case. It may be helpful to explain why the variable has a
325 particular type, by indicating the location where the type was set,
326 whether by declaration or usage.
328 Using a recursive-descent analysis we can easily detect a problem at
329 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
330 will detect that one argument is not a number and the usage of `hello`
331 will detect that a number was wanted, but not provided. In this
332 (early) version of the language, we will generate error reports at
333 multiple locations, so the use of `hello` will report an error and
334 explain were the value was set, and the addition will report an error
335 and say why numbers are needed. To be able to report locations for
336 errors, each language element will need to record a file location
337 (line and column) and each variable will need to record the language
338 element where its type was set. For now we will assume that each line
339 of an error message indicates one location in the file, and up to 2
340 types. So we provide a `printf`-like function which takes a format, a
341 language (a `struct exec` which has not yet been introduced), and 2
342 types. "`%1`" reports the first type, "`%2`" reports the second. We
343 will need a function to print the location, once we know how that is
344 stored. As will be explained later, there are sometimes extra rules for
345 type matching and they might affect error messages, we need to pass those
348 As well as type errors, we sometimes need to report problems with
349 tokens, which might be unexpected or might name a type that has not
350 been defined. For these we have `tok_err()` which reports an error
351 with a given token. Each of the error functions sets the flag in the
352 context so indicate that parsing failed.
356 static void fput_loc(struct exec *loc, FILE *f);
358 ###### core functions
360 static void type_err(struct parse_context *c,
361 char *fmt, struct exec *loc,
362 struct type *t1, int rules, struct type *t2)
364 fprintf(stderr, "%s:", c->file_name);
365 fput_loc(loc, stderr);
366 for (; *fmt ; fmt++) {
373 case '%': fputc(*fmt, stderr); break; // NOTEST
374 default: fputc('?', stderr); break; // NOTEST
376 type_print(t1, stderr);
379 type_print(t2, stderr);
388 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
390 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
391 t->txt.len, t->txt.txt);
395 ## Entities: declared and predeclared.
397 There are various "things" that the language and/or the interpreter
398 needs to know about to parse and execute a program. These include
399 types, variables, values, and executable code. These are all lumped
400 together under the term "entities" (calling them "objects" would be
401 confusing) and introduced here. These will introduced and described
402 here. The following section will present the different specific code
403 elements which comprise or manipulate these various entities.
407 Values come in a wide range of types, with more likely to be added.
408 Each type needs to be able to parse and print its own values (for
409 convenience at least) as well as to compare two values, at least for
410 equality and possibly for order. For now, values might need to be
411 duplicated and freed, though eventually such manipulations will be
412 better integrated into the language.
414 Rather than requiring every numeric type to support all numeric
415 operations (add, multiple, etc), we allow types to be able to present
416 as one of a few standard types: integer, float, and fraction. The
417 existence of these conversion functions eventaully enable types to
418 determine if they are compatible with other types, though such types
419 have not yet been implemented.
421 Named type are stored in a simple linked list. Objects of each type are "values"
422 which are often passed around by value.
429 ## value union fields
436 struct value (*init)(struct type *type);
437 struct value (*prepare)(struct type *type);
438 struct value (*parse)(struct type *type, char *str);
439 void (*print)(struct value val);
440 void (*print_type)(struct type *type, FILE *f);
441 int (*cmp_order)(struct value v1, struct value v2);
442 int (*cmp_eq)(struct value v1, struct value v2);
443 struct value (*dup)(struct value val);
444 void (*free)(struct value val);
445 void (*free_type)(struct type *t);
446 int (*compat)(struct type *this, struct type *other);
447 long long (*to_int)(struct value *v);
448 double (*to_float)(struct value *v);
449 int (*to_mpq)(mpq_t *q, struct value *v);
458 struct type *typelist;
462 static struct type *find_type(struct parse_context *c, struct text s)
464 struct type *l = c->typelist;
467 text_cmp(l->name, s) != 0)
472 static struct type *add_type(struct parse_context *c, struct text s,
477 n = calloc(1, sizeof(*n));
480 n->next = c->typelist;
485 static void free_type(struct type *t)
487 /* The type is always a reference to something in the
488 * context, so we don't need to free anything.
492 static void free_value(struct value v)
498 static int type_compat(struct type *require, struct type *have, int rules)
500 if ((rules & Rboolok) && have == Tbool)
502 if ((rules & Rnolabel) && have == Tlabel)
504 if (!require || !have)
508 return require->compat(require, have);
510 return require == have;
513 static void type_print(struct type *type, FILE *f)
516 fputs("*unknown*type*", f);
517 else if (type->name.len)
518 fprintf(f, "%.*s", type->name.len, type->name.txt);
519 else if (type->print_type)
520 type->print_type(type, f);
522 fputs("*invalid*type*", f); // NOTEST
525 static struct value val_prepare(struct type *type)
530 return type->prepare(type);
535 static struct value val_init(struct type *type)
540 return type->init(type);
545 static struct value dup_value(struct value v)
548 return v.type->dup(v);
552 static int value_cmp(struct value left, struct value right)
554 if (left.type && left.type->cmp_order)
555 return left.type->cmp_order(left, right);
556 if (left.type && left.type->cmp_eq)
557 return left.type->cmp_eq(left, right);
561 static void print_value(struct value v)
563 if (v.type && v.type->print)
566 printf("*Unknown*"); // NOTEST
569 static struct value parse_value(struct type *type, char *arg)
573 if (type && type->parse)
574 return type->parse(type, arg);
575 rv.type = NULL; // NOTEST
581 static void free_value(struct value v);
582 static int type_compat(struct type *require, struct type *have, int rules);
583 static void type_print(struct type *type, FILE *f);
584 static struct value val_init(struct type *type);
585 static struct value dup_value(struct value v);
586 static int value_cmp(struct value left, struct value right);
587 static void print_value(struct value v);
588 static struct value parse_value(struct type *type, char *arg);
590 ###### free context types
592 while (context.typelist) {
593 struct type *t = context.typelist;
595 context.typelist = t->next;
603 Values of the base types can be numbers, which we represent as
604 multi-precision fractions, strings, Booleans and labels. When
605 analysing the program we also need to allow for places where no value
606 is meaningful (type `Tnone`) and where we don't know what type to
607 expect yet (type is `NULL`).
609 Values are never shared, they are always copied when used, and freed
610 when no longer needed.
612 When propagating type information around the program, we need to
613 determine if two types are compatible, where type `NULL` is compatible
614 with anything. There are two special cases with type compatibility,
615 both related to the Conditional Statement which will be described
616 later. In some cases a Boolean can be accepted as well as some other
617 primary type, and in others any type is acceptable except a label (`Vlabel`).
618 A separate function encoding these cases will simplify some code later.
620 When assigning command line arguments to variables, we need to be able
621 to parse each type from a string.
623 The distinction beteen "prepare" and "init" needs to be explained.
624 "init" sets up an initial value, such as "zero" or the empty string.
625 "prepare" simply prepares the data structure so that if "free" gets
626 called on it, it won't do something silly. Normally a value will be
627 stored after "prepare" but before "free", but this might not happen if
636 myLDLIBS := libnumber.o libstring.o -lgmp
637 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
639 ###### type union fields
640 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
642 ###### value union fields
649 static void _free_value(struct value v)
651 switch (v.type->vtype) {
653 case Vstr: free(v.str.txt); break;
654 case Vnum: mpq_clear(v.num); break;
660 ###### value functions
662 static struct value _val_prepare(struct type *type)
667 switch(type->vtype) {
671 memset(&rv.num, 0, sizeof(rv.num));
687 static struct value _val_init(struct type *type)
692 switch(type->vtype) {
693 case Vnone: // NOTEST
696 mpq_init(rv.num); break;
698 rv.str.txt = malloc(1);
704 case Vlabel: // NOTEST
705 rv.label = NULL; // NOTEST
711 static struct value _dup_value(struct value v)
715 switch (rv.type->vtype) {
716 case Vnone: // NOTEST
726 mpq_set(rv.num, v.num);
729 rv.str.len = v.str.len;
730 rv.str.txt = malloc(rv.str.len);
731 memcpy(rv.str.txt, v.str.txt, v.str.len);
737 static int _value_cmp(struct value left, struct value right)
740 if (left.type != right.type)
741 return left.type - right.type; // NOTEST
742 switch (left.type->vtype) {
743 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
744 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
745 case Vstr: cmp = text_cmp(left.str, right.str); break;
746 case Vbool: cmp = left.bool - right.bool; break;
747 case Vnone: cmp = 0; // NOTEST
752 static void _print_value(struct value v)
754 switch (v.type->vtype) {
755 case Vnone: // NOTEST
756 printf("*no-value*"); break; // NOTEST
757 case Vlabel: // NOTEST
758 printf("*label-%p*", v.label); break; // NOTEST
760 printf("%.*s", v.str.len, v.str.txt); break;
762 printf("%s", v.bool ? "True":"False"); break;
767 mpf_set_q(fl, v.num);
768 gmp_printf("%Fg", fl);
775 static struct value _parse_value(struct type *type, char *arg)
783 switch(type->vtype) {
784 case Vlabel: // NOTEST
785 case Vnone: // NOTEST
786 val.type = NULL; // NOTEST
789 val.str.len = strlen(arg);
790 val.str.txt = malloc(val.str.len);
791 memcpy(val.str.txt, arg, val.str.len);
798 tx.txt = arg; tx.len = strlen(tx.txt);
799 if (number_parse(val.num, tail, tx) == 0)
802 mpq_neg(val.num, val.num);
804 printf("Unsupported suffix: %s\n", arg);
809 if (strcasecmp(arg, "true") == 0 ||
810 strcmp(arg, "1") == 0)
812 else if (strcasecmp(arg, "false") == 0 ||
813 strcmp(arg, "0") == 0)
816 printf("Bad bool: %s\n", arg);
824 static void _free_value(struct value v);
826 static struct type base_prototype = {
828 .prepare = _val_prepare,
829 .parse = _parse_value,
830 .print = _print_value,
831 .cmp_order = _value_cmp,
832 .cmp_eq = _value_cmp,
837 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
840 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
842 struct text txt = { n, strlen(n) };
845 t = add_type(c, txt, &base_prototype);
850 ###### context initialization
852 Tbool = add_base_type(&context, "Boolean", Vbool);
853 Tstr = add_base_type(&context, "string", Vstr);
854 Tnum = add_base_type(&context, "number", Vnum);
855 Tnone = add_base_type(&context, "none", Vnone);
856 Tlabel = add_base_type(&context, "label", Vlabel);
860 Variables are scoped named values. We store the names in a linked
861 list of "bindings" sorted lexically, and use sequential search and
868 struct binding *next; // in lexical order
872 This linked list is stored in the parse context so that "reduce"
873 functions can find or add variables, and so the analysis phase can
874 ensure that every variable gets a type.
878 struct binding *varlist; // In lexical order
882 static struct binding *find_binding(struct parse_context *c, struct text s)
884 struct binding **l = &c->varlist;
889 (cmp = text_cmp((*l)->name, s)) < 0)
893 n = calloc(1, sizeof(*n));
900 Each name can be linked to multiple variables defined in different
901 scopes. Each scope starts where the name is declared and continues
902 until the end of the containing code block. Scopes of a given name
903 cannot nest, so a declaration while a name is in-scope is an error.
905 ###### binding fields
906 struct variable *var;
910 struct variable *previous;
912 struct binding *name;
913 struct exec *where_decl;// where name was declared
914 struct exec *where_set; // where type was set
918 While the naming seems strange, we include local constants in the
919 definition of variables. A name declared `var := value` can
920 subsequently be changed, but a name declared `var ::= value` cannot -
923 ###### variable fields
926 Scopes in parallel branches can be partially merged. More
927 specifically, if a given name is declared in both branches of an
928 if/else then its scope is a candidate for merging. Similarly if
929 every branch of an exhaustive switch (e.g. has an "else" clause)
930 declares a given name, then the scopes from the branches are
931 candidates for merging.
933 Note that names declared inside a loop (which is only parallel to
934 itself) are never visible after the loop. Similarly names defined in
935 scopes which are not parallel, such as those started by `for` and
936 `switch`, are never visible after the scope. Only variables defined in
937 both `then` and `else` (including the implicit then after an `if`, and
938 excluding `then` used with `for`) and in all `case`s and `else` of a
939 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
941 Labels, which are a bit like variables, follow different rules.
942 Labels are not explicitly declared, but if an undeclared name appears
943 in a context where a label is legal, that effectively declares the
944 name as a label. The declaration remains in force (or in scope) at
945 least to the end of the immediately containing block and conditionally
946 in any larger containing block which does not declare the name in some
947 other way. Importantly, the conditional scope extension happens even
948 if the label is only used in one parallel branch of a conditional --
949 when used in one branch it is treated as having been declared in all
952 Merge candidates are tentatively visible beyond the end of the
953 branching statement which creates them. If the name is used, the
954 merge is affirmed and they become a single variable visible at the
955 outer layer. If not - if it is redeclared first - the merge lapses.
957 To track scopes we have an extra stack, implemented as a linked list,
958 which roughly parallels the parse stack and which is used exclusively
959 for scoping. When a new scope is opened, a new frame is pushed and
960 the child-count of the parent frame is incremented. This child-count
961 is used to distinguish between the first of a set of parallel scopes,
962 in which declared variables must not be in scope, and subsequent
963 branches, whether they must already be conditionally scoped.
965 To push a new frame *before* any code in the frame is parsed, we need a
966 grammar reduction. This is most easily achieved with a grammar
967 element which derives the empty string, and creates the new scope when
968 it is recognized. This can be placed, for example, between a keyword
969 like "if" and the code following it.
973 struct scope *parent;
979 struct scope *scope_stack;
982 static void scope_pop(struct parse_context *c)
984 struct scope *s = c->scope_stack;
986 c->scope_stack = s->parent;
991 static void scope_push(struct parse_context *c)
993 struct scope *s = calloc(1, sizeof(*s));
995 c->scope_stack->child_count += 1;
996 s->parent = c->scope_stack;
1004 OpenScope -> ${ scope_push(config2context(config)); }$
1006 Each variable records a scope depth and is in one of four states:
1008 - "in scope". This is the case between the declaration of the
1009 variable and the end of the containing block, and also between
1010 the usage with affirms a merge and the end of that block.
1012 The scope depth is not greater than the current parse context scope
1013 nest depth. When the block of that depth closes, the state will
1014 change. To achieve this, all "in scope" variables are linked
1015 together as a stack in nesting order.
1017 - "pending". The "in scope" block has closed, but other parallel
1018 scopes are still being processed. So far, every parallel block at
1019 the same level that has closed has declared the name.
1021 The scope depth is the depth of the last parallel block that
1022 enclosed the declaration, and that has closed.
1024 - "conditionally in scope". The "in scope" block and all parallel
1025 scopes have closed, and no further mention of the name has been
1026 seen. This state includes a secondary nest depth which records the
1027 outermost scope seen since the variable became conditionally in
1028 scope. If a use of the name is found, the variable becomes "in
1029 scope" and that secondary depth becomes the recorded scope depth.
1030 If the name is declared as a new variable, the old variable becomes
1031 "out of scope" and the recorded scope depth stays unchanged.
1033 - "out of scope". The variable is neither in scope nor conditionally
1034 in scope. It is permanently out of scope now and can be removed from
1035 the "in scope" stack.
1037 ###### variable fields
1038 int depth, min_depth;
1039 enum { OutScope, PendingScope, CondScope, InScope } scope;
1040 struct variable *in_scope;
1042 ###### parse context
1044 struct variable *in_scope;
1046 All variables with the same name are linked together using the
1047 'previous' link. Those variable that have
1048 been affirmatively merged all have a 'merged' pointer that points to
1049 one primary variable - the most recently declared instance. When
1050 merging variables, we need to also adjust the 'merged' pointer on any
1051 other variables that had previously been merged with the one that will
1052 no longer be primary.
1054 A variable that is no longer the most recent instance of a name may
1055 still have "pending" scope, if it might still be merged with most
1056 recent instance. These variables don't really belong in the
1057 "in_scope" list, but are not immediately removed when a new instance
1058 is found. Instead, they are detected and ignored when considering the
1059 list of in_scope names.
1061 ###### variable fields
1062 struct variable *merged;
1064 ###### ast functions
1066 static void variable_merge(struct variable *primary, struct variable *secondary)
1070 if (primary->merged)
1072 primary = primary->merged;
1074 for (v = primary->previous; v; v=v->previous)
1075 if (v == secondary || v == secondary->merged ||
1076 v->merged == secondary ||
1077 (v->merged && v->merged == secondary->merged)) {
1078 v->scope = OutScope;
1079 v->merged = primary;
1083 ###### free context vars
1085 while (context.varlist) {
1086 struct binding *b = context.varlist;
1087 struct variable *v = b->var;
1088 context.varlist = b->next;
1091 struct variable *t = v;
1095 if (t->min_depth == 0)
1096 // This is a global constant
1097 free_exec(t->where_decl);
1102 #### Manipulating Bindings
1104 When a name is conditionally visible, a new declaration discards the
1105 old binding - the condition lapses. Conversely a usage of the name
1106 affirms the visibility and extends it to the end of the containing
1107 block - i.e. the block that contains both the original declaration and
1108 the latest usage. This is determined from `min_depth`. When a
1109 conditionally visible variable gets affirmed like this, it is also
1110 merged with other conditionally visible variables with the same name.
1112 When we parse a variable declaration we either report an error if the
1113 name is currently bound, or create a new variable at the current nest
1114 depth if the name is unbound or bound to a conditionally scoped or
1115 pending-scope variable. If the previous variable was conditionally
1116 scoped, it and its homonyms becomes out-of-scope.
1118 When we parse a variable reference (including non-declarative
1119 assignment) we report an error if the name is not bound or is bound to
1120 a pending-scope variable; update the scope if the name is bound to a
1121 conditionally scoped variable; or just proceed normally if the named
1122 variable is in scope.
1124 When we exit a scope, any variables bound at this level are either
1125 marked out of scope or pending-scoped, depending on whether the scope
1126 was sequential or parallel. Here a "parallel" scope means the "then"
1127 or "else" part of a conditional, or any "case" or "else" branch of a
1128 switch. Other scopes are "sequential".
1130 When exiting a parallel scope we check if there are any variables that
1131 were previously pending and are still visible. If there are, then
1132 there weren't redeclared in the most recent scope, so they cannot be
1133 merged and must become out-of-scope. If it is not the first of
1134 parallel scopes (based on `child_count`), we check that there was a
1135 previous binding that is still pending-scope. If there isn't, the new
1136 variable must now be out-of-scope.
1138 When exiting a sequential scope that immediately enclosed parallel
1139 scopes, we need to resolve any pending-scope variables. If there was
1140 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1141 we need to mark all pending-scope variable as out-of-scope. Otherwise
1142 all pending-scope variables become conditionally scoped.
1145 enum closetype { CloseSequential, CloseParallel, CloseElse };
1147 ###### ast functions
1149 static struct variable *var_decl(struct parse_context *c, struct text s)
1151 struct binding *b = find_binding(c, s);
1152 struct variable *v = b->var;
1154 switch (v ? v->scope : OutScope) {
1156 /* Caller will report the error */
1160 v && v->scope == CondScope;
1162 v->scope = OutScope;
1166 v = calloc(1, sizeof(*v));
1167 v->previous = b->var;
1170 v->min_depth = v->depth = c->scope_depth;
1172 v->in_scope = c->in_scope;
1174 v->val = val_prepare(NULL);
1178 static struct variable *var_ref(struct parse_context *c, struct text s)
1180 struct binding *b = find_binding(c, s);
1181 struct variable *v = b->var;
1182 struct variable *v2;
1184 switch (v ? v->scope : OutScope) {
1187 /* Caller will report the error */
1190 /* All CondScope variables of this name need to be merged
1191 * and become InScope
1193 v->depth = v->min_depth;
1195 for (v2 = v->previous;
1196 v2 && v2->scope == CondScope;
1198 variable_merge(v, v2);
1206 static void var_block_close(struct parse_context *c, enum closetype ct)
1208 /* Close off all variables that are in_scope */
1209 struct variable *v, **vp, *v2;
1212 for (vp = &c->in_scope;
1213 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1215 if (v->name->var == v) switch (ct) {
1217 case CloseParallel: /* handle PendingScope */
1221 if (c->scope_stack->child_count == 1)
1222 v->scope = PendingScope;
1223 else if (v->previous &&
1224 v->previous->scope == PendingScope)
1225 v->scope = PendingScope;
1226 else if (v->val.type == Tlabel)
1227 v->scope = PendingScope;
1228 else if (v->name->var == v)
1229 v->scope = OutScope;
1230 if (ct == CloseElse) {
1231 /* All Pending variables with this name
1232 * are now Conditional */
1234 v2 && v2->scope == PendingScope;
1236 v2->scope = CondScope;
1241 v2 && v2->scope == PendingScope;
1243 if (v2->val.type != Tlabel)
1244 v2->scope = OutScope;
1246 case OutScope: break;
1249 case CloseSequential:
1250 if (v->val.type == Tlabel)
1251 v->scope = PendingScope;
1254 v->scope = OutScope;
1257 /* There was no 'else', so we can only become
1258 * conditional if we know the cases were exhaustive,
1259 * and that doesn't mean anything yet.
1260 * So only labels become conditional..
1263 v2 && v2->scope == PendingScope;
1265 if (v2->val.type == Tlabel) {
1266 v2->scope = CondScope;
1267 v2->min_depth = c->scope_depth;
1269 v2->scope = OutScope;
1272 case OutScope: break;
1276 if (v->scope == OutScope || v->name->var != v)
1285 Executables can be lots of different things. In many cases an
1286 executable is just an operation combined with one or two other
1287 executables. This allows for expressions and lists etc. Other times
1288 an executable is something quite specific like a constant or variable
1289 name. So we define a `struct exec` to be a general executable with a
1290 type, and a `struct binode` which is a subclass of `exec`, forms a
1291 node in a binary tree, and holds an operation. There will be other
1292 subclasses, and to access these we need to be able to `cast` the
1293 `exec` into the various other types.
1296 #define cast(structname, pointer) ({ \
1297 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1298 if (__mptr && *__mptr != X##structname) abort(); \
1299 (struct structname *)( (char *)__mptr);})
1301 #define new(structname) ({ \
1302 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1303 __ptr->type = X##structname; \
1304 __ptr->line = -1; __ptr->column = -1; \
1307 #define new_pos(structname, token) ({ \
1308 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1309 __ptr->type = X##structname; \
1310 __ptr->line = token.line; __ptr->column = token.col; \
1319 enum exec_types type;
1327 struct exec *left, *right;
1330 ###### ast functions
1332 static int __fput_loc(struct exec *loc, FILE *f)
1336 if (loc->line >= 0) {
1337 fprintf(f, "%d:%d: ", loc->line, loc->column);
1340 if (loc->type == Xbinode)
1341 return __fput_loc(cast(binode,loc)->left, f) ||
1342 __fput_loc(cast(binode,loc)->right, f);
1345 static void fput_loc(struct exec *loc, FILE *f)
1347 if (!__fput_loc(loc, f))
1348 fprintf(f, "??:??: "); // NOTEST
1351 Each different type of `exec` node needs a number of functions
1352 defined, a bit like methods. We must be able to be able to free it,
1353 print it, analyse it and execute it. Once we have specific `exec`
1354 types we will need to parse them too. Let's take this a bit more
1359 The parser generator requires a `free_foo` function for each struct
1360 that stores attributes and they will often be `exec`s and subtypes
1361 there-of. So we need `free_exec` which can handle all the subtypes,
1362 and we need `free_binode`.
1364 ###### ast functions
1366 static void free_binode(struct binode *b)
1371 free_exec(b->right);
1375 ###### core functions
1376 static void free_exec(struct exec *e)
1385 ###### forward decls
1387 static void free_exec(struct exec *e);
1389 ###### free exec cases
1390 case Xbinode: free_binode(cast(binode, e)); break;
1394 Printing an `exec` requires that we know the current indent level for
1395 printing line-oriented components. As will become clear later, we
1396 also want to know what sort of bracketing to use.
1398 ###### ast functions
1400 static void do_indent(int i, char *str)
1407 ###### core functions
1408 static void print_binode(struct binode *b, int indent, int bracket)
1412 ## print binode cases
1416 static void print_exec(struct exec *e, int indent, int bracket)
1422 print_binode(cast(binode, e), indent, bracket); break;
1427 ###### forward decls
1429 static void print_exec(struct exec *e, int indent, int bracket);
1433 As discussed, analysis involves propagating type requirements around
1434 the program and looking for errors.
1436 So `propagate_types` is passed an expected type (being a `struct type`
1437 pointer together with some `val_rules` flags) that the `exec` is
1438 expected to return, and returns the type that it does return, either
1439 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1440 by reference. It is set to `0` when an error is found, and `2` when
1441 any change is made. If it remains unchanged at `1`, then no more
1442 propagation is needed.
1446 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1450 if (rules & Rnolabel)
1451 fputs(" (labels not permitted)", stderr);
1454 ###### core functions
1456 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1457 struct type *type, int rules)
1464 switch (prog->type) {
1467 struct binode *b = cast(binode, prog);
1469 ## propagate binode cases
1473 ## propagate exec cases
1480 Interpreting an `exec` doesn't require anything but the `exec`. State
1481 is stored in variables and each variable will be directly linked from
1482 within the `exec` tree. The exception to this is the whole `program`
1483 which needs to look at command line arguments. The `program` will be
1484 interpreted separately.
1486 Each `exec` can return a value, which may be `Tnone` but must be
1487 non-NULL; Some `exec`s will return the location of a value, which can
1488 be updates. To support this, each exec case must store either a value
1489 in `val` or the pointer to a value in `lval`. If `lval` is set, but a
1490 simple value is required, `inter_exec()` will dereference `lval` to
1493 ###### core functions
1496 struct value val, *lval;
1499 static struct lrval _interp_exec(struct exec *e);
1501 static struct value interp_exec(struct exec *e)
1503 struct lrval ret = _interp_exec(e);
1506 return dup_value(*ret.lval);
1511 static struct value *linterp_exec(struct exec *e)
1513 struct lrval ret = _interp_exec(e);
1518 static struct lrval _interp_exec(struct exec *e)
1521 struct value rv, *lrv = NULL;
1532 struct binode *b = cast(binode, e);
1533 struct value left, right, *lleft;
1534 left.type = right.type = Tnone;
1536 ## interp binode cases
1538 free_value(left); free_value(right);
1541 ## interp exec cases
1550 Now that we have the shape of the interpreter in place we can add some
1551 complex types and connected them in to the data structures and the
1552 different phases of parse, analyse, print, interpret.
1554 Thus far we have arrays and structs.
1558 Arrays can be declared by giving a size and a type, as `[size]type' so
1559 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1560 size can be an arbitrary expression which is evaluated when the name
1563 Arrays cannot be assigned. When pointers are introduced we will also
1564 introduce array slices which can refer to part or all of an array -
1565 the assignment syntax will create a slice. For now, an array can only
1566 ever be referenced by the name it is declared with. It is likely that
1567 a "`copy`" primitive will eventually be define which can be used to
1568 make a copy of an array with controllable depth.
1570 ###### type union fields
1574 struct variable *vsize;
1575 struct type *member;
1578 ###### value union fields
1580 struct value *elmnts;
1583 ###### value functions
1585 static struct value array_prepare(struct type *type)
1590 ret.array.elmnts = NULL;
1594 static struct value array_init(struct type *type)
1600 if (type->array.vsize) {
1603 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1604 mpq_denref(type->array.vsize->val.num));
1605 type->array.size = mpz_get_si(q);
1608 ret.array.elmnts = calloc(type->array.size,
1609 sizeof(ret.array.elmnts[0]));
1610 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1611 ret.array.elmnts[i] = val_init(type->array.member);
1615 static void array_free(struct value val)
1619 if (val.array.elmnts)
1620 for (i = 0; i < val.type->array.size; i++)
1621 free_value(val.array.elmnts[i]);
1622 free(val.array.elmnts);
1625 static int array_compat(struct type *require, struct type *have)
1627 if (have->compat != require->compat)
1629 /* Both are arrays, so we can look at details */
1630 if (!type_compat(require->array.member, have->array.member, 0))
1632 if (require->array.vsize == NULL && have->array.vsize == NULL)
1633 return require->array.size == have->array.size;
1635 return require->array.vsize == have->array.vsize;
1638 static void array_print_type(struct type *type, FILE *f)
1641 if (type->array.vsize) {
1642 struct binding *b = type->array.vsize->name;
1643 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1645 fprintf(f, "%d]", type->array.size);
1646 type_print(type->array.member, f);
1649 static struct type array_prototype = {
1650 .prepare = array_prepare,
1652 .print_type = array_print_type,
1653 .compat = array_compat,
1659 | [ NUMBER ] Type ${
1660 $0 = calloc(1, sizeof(struct type));
1661 *($0) = array_prototype;
1662 $0->array.member = $<4;
1663 $0->array.vsize = NULL;
1665 struct parse_context *c = config2context(config);
1668 if (number_parse(num, tail, $2.txt) == 0)
1669 tok_err(c, "error: unrecognised number", &$2);
1671 tok_err(c, "error: unsupported number suffix", &$2);
1673 $0->array.size = mpz_get_ui(mpq_numref(num));
1674 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1675 tok_err(c, "error: array size must be an integer",
1677 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1678 tok_err(c, "error: array size is too large",
1682 $0->next= c->anon_typelist;
1683 c->anon_typelist = $0;
1687 | [ IDENTIFIER ] Type ${ {
1688 struct parse_context *c = config2context(config);
1689 struct variable *v = var_ref(c, $2.txt);
1692 tok_err(config2context(config), "error: name undeclared", &$2);
1693 else if (!v->constant)
1694 tok_err(config2context(config), "error: array size must be a constant", &$2);
1696 $0 = calloc(1, sizeof(struct type));
1697 *($0) = array_prototype;
1698 $0->array.member = $<4;
1700 $0->array.vsize = v;
1701 $0->next= c->anon_typelist;
1702 c->anon_typelist = $0;
1705 ###### parse context
1707 struct type *anon_typelist;
1709 ###### free context types
1711 while (context.anon_typelist) {
1712 struct type *t = context.anon_typelist;
1714 context.anon_typelist = t->next;
1721 ###### variable grammar
1723 | Variable [ Expression ] ${ {
1724 struct binode *b = new(binode);
1731 ###### print binode cases
1733 print_exec(b->left, -1, 0);
1735 print_exec(b->right, -1, 0);
1739 ###### propagate binode cases
1741 /* left must be an array, right must be a number,
1742 * result is the member type of the array
1744 propagate_types(b->right, c, ok, Tnum, 0);
1745 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1746 if (!t || t->compat != array_compat) {
1747 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1751 if (!type_compat(type, t->array.member, rules)) {
1752 type_err(c, "error: have %1 but need %2", prog,
1753 t->array.member, rules, type);
1756 return t->array.member;
1760 ###### interp binode cases
1765 lleft = linterp_exec(b->left);
1766 right = interp_exec(b->right);
1768 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1772 if (i >= 0 && i < lleft->type->array.size)
1773 lrv = &lleft->array.elmnts[i];
1775 rv = val_init(lleft->type->array.member);
1781 A `struct` is a data-type that contains one or more other data-types.
1782 It differs from an array in that each member can be of a different
1783 type, and they are accessed by name rather than by number. Thus you
1784 cannot choose an element by calculation, you need to know what you
1787 The language makes no promises about how a given structure will be
1788 stored in memory - it is free to rearrange fields to suit whatever
1789 criteria seems important.
1791 Structs are declared separately from program code - they cannot be
1792 declared in-line in a variable declaration like arrays can. A struct
1793 is given a name and this name is used to identify the type - the name
1794 is not prefixed by the word `struct` as it would be in C.
1796 Structs are only treated as the same if they have the same name.
1797 Simply having the same fields in the same order is not enough. This
1798 might change once we can create structure initializes from a list of
1801 Each component datum is identified much like a variable is declared,
1802 with a name, one or two colons, and a type. The type cannot be omitted
1803 as there is no opportunity to deduce the type from usage. An initial
1804 value can be given following an equals sign, so
1806 ##### Example: a struct type
1812 would declare a type called "complex" which has two number fields,
1813 each initialised to zero.
1815 Struct will need to be declared separately from the code that uses
1816 them, so we will need to be able to print out the declaration of a
1817 struct when reprinting the whole program. So a `print_type_decl` type
1818 function will be needed.
1820 ###### type union fields
1831 ###### value union fields
1833 struct value *fields;
1836 ###### type functions
1837 void (*print_type_decl)(struct type *type, FILE *f);
1839 ###### value functions
1841 static struct value structure_prepare(struct type *type)
1846 ret.structure.fields = NULL;
1850 static struct value structure_init(struct type *type)
1856 ret.structure.fields = calloc(type->structure.nfields,
1857 sizeof(ret.structure.fields[0]));
1858 for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
1859 ret.structure.fields[i] = val_init(type->structure.fields[i].type);
1863 static void structure_free(struct value val)
1867 if (val.structure.fields)
1868 for (i = 0; i < val.type->structure.nfields; i++)
1869 free_value(val.structure.fields[i]);
1870 free(val.structure.fields);
1873 static void structure_free_type(struct type *t)
1876 for (i = 0; i < t->structure.nfields; i++)
1877 free_value(t->structure.fields[i].init);
1878 free(t->structure.fields);
1881 static struct type structure_prototype = {
1882 .prepare = structure_prepare,
1883 .init = structure_init,
1884 .free = structure_free,
1885 .free_type = structure_free_type,
1886 .print_type_decl = structure_print_type,
1900 ###### free exec cases
1902 free_exec(cast(fieldref, e)->left);
1906 ###### variable grammar
1908 | Variable . IDENTIFIER ${ {
1909 struct fieldref *fr = new_pos(fieldref, $2);
1916 ###### print exec cases
1920 struct fieldref *f = cast(fieldref, e);
1921 print_exec(f->left, -1, 0);
1922 printf(".%.*s", f->name.len, f->name.txt);
1926 ###### ast functions
1927 static int find_struct_index(struct type *type, struct text field)
1930 for (i = 0; i < type->structure.nfields; i++)
1931 if (text_cmp(type->structure.fields[i].name, field) == 0)
1936 ###### propagate exec cases
1940 struct fieldref *f = cast(fieldref, prog);
1941 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1944 type_err(c, "error: unknown type for field access", f->left,
1946 else if (st->prepare != structure_prepare)
1947 type_err(c, "error: field reference attempted on %1, not a struct",
1948 f->left, st, 0, NULL);
1949 else if (f->index == -2) {
1950 f->index = find_struct_index(st, f->name);
1952 type_err(c, "error: cannot find requested field in %1",
1953 f->left, st, 0, NULL);
1957 if (f->index >= 0) {
1958 struct type *ft = st->structure.fields[f->index].type;
1959 if (!type_compat(type, ft, rules)) {
1960 type_err(c, "error: have %1 but need %2", prog,
1969 ###### interp exec cases
1972 struct fieldref *f = cast(fieldref, e);
1973 struct value *lleft = linterp_exec(f->left);
1974 lrv = &lleft->structure.fields[f->index];
1980 struct fieldlist *prev;
1984 ###### ast functions
1985 static void free_fieldlist(struct fieldlist *f)
1989 free_fieldlist(f->prev);
1990 free_value(f->f.init);
1994 ###### top level grammar
1995 DeclareStruct -> struct IDENTIFIER FieldBlock ${ {
1997 add_type(config2context(config), $2.txt, &structure_prototype);
1999 struct fieldlist *f;
2001 for (f = $3; f; f=f->prev)
2004 t->structure.nfields = cnt;
2005 t->structure.fields = calloc(cnt, sizeof(struct field));
2009 t->structure.fields[cnt] = f->f;
2010 f->f.init = val_prepare(Tnone);
2016 FieldBlock -> Open SimpleFieldList Close ${ $0 = $<2; }$
2017 | Open Newlines SimpleFieldList Close ${ $0 = $<3; }$
2018 | : FieldList ${ $0 = $<2; }$
2020 FieldList -> Field NEWLINE ${ $0 = $<1; }$
2021 | FieldList NEWLINE ${ $0 = $<1; }$
2022 | FieldList Field NEWLINE ${
2027 SimpleFieldList -> Field ; ${ $0 = $<1; }$
2028 | SimpleFieldList Field ; ${
2033 Field -> IDENTIFIER : Type = Expression ${ {
2036 $0 = calloc(1, sizeof(struct fieldlist));
2037 $0->f.name = $1.txt;
2039 $0->f.init = val_prepare($0->f.type);
2042 propagate_types($<5, config2context(config), &ok, $3, 0);
2045 config2context(config)->parse_error = 1;
2047 $0->f.init = interp_exec($5);
2049 | IDENTIFIER : Type ${
2050 $0 = calloc(1, sizeof(struct fieldlist));
2051 $0->f.name = $1.txt;
2053 $0->f.init = val_init($3);
2056 ###### forward decls
2057 static void structure_print_type(struct type *t, FILE *f);
2059 ###### value functions
2060 static void structure_print_type(struct type *t, FILE *f)
2064 fprintf(f, "struct %.*s:\n", t->name.len, t->name.txt);
2066 for (i = 0; i < t->structure.nfields; i++) {
2067 struct field *fl = t->structure.fields + i;
2068 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2069 type_print(fl->type, f);
2070 if (fl->init.type->print) {
2072 if (fl->init.type == Tstr)
2074 print_value(fl->init);
2075 if (fl->init.type == Tstr)
2082 ###### print type decls
2087 while (target != 0) {
2089 for (t = context.typelist; t ; t=t->next)
2090 if (t->print_type_decl) {
2099 t->print_type_decl(t, stdout);
2105 ## Executables: the elements of code
2107 Each code element needs to be parsed, printed, analysed,
2108 interpreted, and freed. There are several, so let's just start with
2109 the easy ones and work our way up.
2113 We have already met values as separate objects. When manifest
2114 constants appear in the program text, that must result in an executable
2115 which has a constant value. So the `val` structure embeds a value in
2131 $0 = new_pos(val, $1);
2132 $0->val.type = Tbool;
2136 $0 = new_pos(val, $1);
2137 $0->val.type = Tbool;
2141 $0 = new_pos(val, $1);
2142 $0->val.type = Tnum;
2145 if (number_parse($0->val.num, tail, $1.txt) == 0)
2146 mpq_init($0->val.num);
2148 tok_err(config2context(config), "error: unsupported number suffix",
2153 $0 = new_pos(val, $1);
2154 $0->val.type = Tstr;
2157 string_parse(&$1, '\\', &$0->val.str, tail);
2159 tok_err(config2context(config), "error: unsupported string suffix",
2164 $0 = new_pos(val, $1);
2165 $0->val.type = Tstr;
2168 string_parse(&$1, '\\', &$0->val.str, tail);
2170 tok_err(config2context(config), "error: unsupported string suffix",
2175 ###### print exec cases
2178 struct val *v = cast(val, e);
2179 if (v->val.type == Tstr)
2181 print_value(v->val);
2182 if (v->val.type == Tstr)
2187 ###### propagate exec cases
2190 struct val *val = cast(val, prog);
2191 if (!type_compat(type, val->val.type, rules)) {
2192 type_err(c, "error: expected %1%r found %2",
2193 prog, type, rules, val->val.type);
2196 return val->val.type;
2199 ###### interp exec cases
2201 rv = dup_value(cast(val, e)->val);
2204 ###### ast functions
2205 static void free_val(struct val *v)
2213 ###### free exec cases
2214 case Xval: free_val(cast(val, e)); break;
2216 ###### ast functions
2217 // Move all nodes from 'b' to 'rv', reversing the order.
2218 // In 'b' 'left' is a list, and 'right' is the last node.
2219 // In 'rv', left' is the first node and 'right' is a list.
2220 static struct binode *reorder_bilist(struct binode *b)
2222 struct binode *rv = NULL;
2225 struct exec *t = b->right;
2229 b = cast(binode, b->left);
2239 Just as we used a `val` to wrap a value into an `exec`, we similarly
2240 need a `var` to wrap a `variable` into an exec. While each `val`
2241 contained a copy of the value, each `var` hold a link to the variable
2242 because it really is the same variable no matter where it appears.
2243 When a variable is used, we need to remember to follow the `->merged`
2244 link to find the primary instance.
2252 struct variable *var;
2258 VariableDecl -> IDENTIFIER : ${ {
2259 struct variable *v = var_decl(config2context(config), $1.txt);
2260 $0 = new_pos(var, $1);
2265 v = var_ref(config2context(config), $1.txt);
2267 type_err(config2context(config), "error: variable '%v' redeclared",
2269 type_err(config2context(config), "info: this is where '%v' was first declared",
2270 v->where_decl, NULL, 0, NULL);
2273 | IDENTIFIER :: ${ {
2274 struct variable *v = var_decl(config2context(config), $1.txt);
2275 $0 = new_pos(var, $1);
2281 v = var_ref(config2context(config), $1.txt);
2283 type_err(config2context(config), "error: variable '%v' redeclared",
2285 type_err(config2context(config), "info: this is where '%v' was first declared",
2286 v->where_decl, NULL, 0, NULL);
2289 | IDENTIFIER : Type ${ {
2290 struct variable *v = var_decl(config2context(config), $1.txt);
2291 $0 = new_pos(var, $1);
2296 v->val = val_prepare($<3);
2298 v = var_ref(config2context(config), $1.txt);
2300 type_err(config2context(config), "error: variable '%v' redeclared",
2302 type_err(config2context(config), "info: this is where '%v' was first declared",
2303 v->where_decl, NULL, 0, NULL);
2306 | IDENTIFIER :: Type ${ {
2307 struct variable *v = var_decl(config2context(config), $1.txt);
2308 $0 = new_pos(var, $1);
2313 v->val = val_prepare($<3);
2316 v = var_ref(config2context(config), $1.txt);
2318 type_err(config2context(config), "error: variable '%v' redeclared",
2320 type_err(config2context(config), "info: this is where '%v' was first declared",
2321 v->where_decl, NULL, 0, NULL);
2326 Variable -> IDENTIFIER ${ {
2327 struct variable *v = var_ref(config2context(config), $1.txt);
2328 $0 = new_pos(var, $1);
2330 /* This might be a label - allocate a var just in case */
2331 v = var_decl(config2context(config), $1.txt);
2333 v->val = val_prepare(Tlabel);
2334 v->val.label = &v->val;
2338 cast(var, $0)->var = v;
2343 Type -> IDENTIFIER ${
2344 $0 = find_type(config2context(config), $1.txt);
2346 tok_err(config2context(config),
2347 "error: undefined type", &$1);
2354 ###### print exec cases
2357 struct var *v = cast(var, e);
2359 struct binding *b = v->var->name;
2360 printf("%.*s", b->name.len, b->name.txt);
2367 if (loc->type == Xvar) {
2368 struct var *v = cast(var, loc);
2370 struct binding *b = v->var->name;
2371 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2373 fputs("???", stderr); // NOTEST
2375 fputs("NOTVAR", stderr); // NOTEST
2378 ###### propagate exec cases
2382 struct var *var = cast(var, prog);
2383 struct variable *v = var->var;
2385 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2387 return Tnone; // NOTEST
2391 if (v->constant && (rules & Rnoconstant)) {
2392 type_err(c, "error: Cannot assign to a constant: %v",
2393 prog, NULL, 0, NULL);
2394 type_err(c, "info: name was defined as a constant here",
2395 v->where_decl, NULL, 0, NULL);
2399 if (v->val.type == NULL) {
2400 if (type && *ok != 0) {
2401 v->val = val_prepare(type);
2402 v->where_set = prog;
2407 if (!type_compat(type, v->val.type, rules)) {
2408 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2409 type, rules, v->val.type);
2410 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2411 v->val.type, rules, NULL);
2419 ###### interp exec cases
2422 struct var *var = cast(var, e);
2423 struct variable *v = var->var;
2431 ###### ast functions
2433 static void free_var(struct var *v)
2438 ###### free exec cases
2439 case Xvar: free_var(cast(var, e)); break;
2441 ### Expressions: Conditional
2443 Our first user of the `binode` will be conditional expressions, which
2444 is a bit odd as they actually have three components. That will be
2445 handled by having 2 binodes for each expression. The conditional
2446 expression is the lowest precedence operatior, so it gets to define
2447 what an "Expression" is. The next level up is "BoolExpr", which
2450 Conditional expressions are of the form "value `if` condition `else`
2451 other_value". They associate to the right, so everything to the right
2452 of `else` is part of an else value, while only the BoolExpr to the
2453 left of `if` is the if values. Between `if` and `else` there is no
2454 room for ambiguity, so a full conditional expression is allowed in there.
2462 Expression -> BoolExpr if Expression else Expression ${ {
2463 struct binode *b1 = new(binode);
2464 struct binode *b2 = new(binode);
2473 | BoolExpr ${ $0 = $<1; }$
2475 ###### print binode cases
2478 b2 = cast(binode, b->right);
2479 print_exec(b2->left, -1, 0);
2481 print_exec(b->left, -1, 0);
2483 print_exec(b2->right, -1, 0);
2486 ###### propagate binode cases
2489 /* cond must be Tbool, others must match */
2490 struct binode *b2 = cast(binode, b->right);
2493 propagate_types(b->left, c, ok, Tbool, 0);
2494 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2495 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2499 ###### interp binode cases
2502 struct binode *b2 = cast(binode, b->right);
2503 left = interp_exec(b->left);
2505 rv = interp_exec(b2->left);
2507 rv = interp_exec(b2->right);
2511 ### Expressions: Boolean
2513 The next class of expressions to use the `binode` will be Boolean
2514 expressions. As I haven't implemented precedence in the parser
2515 generator yet, we need different names for each precedence level used
2516 by expressions. The outer most or lowest level precedence after
2517 conditional expressions are Boolean operators which form an `BoolExpr`
2518 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2519 have `and then` and `or else` which only evaluate the second operand
2520 if the result would make a difference.
2532 BoolExpr -> BoolExpr or BTerm ${ {
2533 struct binode *b = new(binode);
2539 | BoolExpr or else BTerm ${ {
2540 struct binode *b = new(binode);
2546 | BTerm ${ $0 = $<1; }$
2548 BTerm -> BTerm and BFact ${ {
2549 struct binode *b = new(binode);
2555 | BTerm and then BFact ${ {
2556 struct binode *b = new(binode);
2562 | BFact ${ $0 = $<1; }$
2564 BFact -> not BFact ${ {
2565 struct binode *b = new(binode);
2572 ###### print binode cases
2574 print_exec(b->left, -1, 0);
2576 print_exec(b->right, -1, 0);
2579 print_exec(b->left, -1, 0);
2580 printf(" and then ");
2581 print_exec(b->right, -1, 0);
2584 print_exec(b->left, -1, 0);
2586 print_exec(b->right, -1, 0);
2589 print_exec(b->left, -1, 0);
2590 printf(" or else ");
2591 print_exec(b->right, -1, 0);
2595 print_exec(b->right, -1, 0);
2598 ###### propagate binode cases
2604 /* both must be Tbool, result is Tbool */
2605 propagate_types(b->left, c, ok, Tbool, 0);
2606 propagate_types(b->right, c, ok, Tbool, 0);
2607 if (type && type != Tbool) {
2608 type_err(c, "error: %1 operation found where %2 expected", prog,
2614 ###### interp binode cases
2616 rv = interp_exec(b->left);
2617 right = interp_exec(b->right);
2618 rv.bool = rv.bool && right.bool;
2621 rv = interp_exec(b->left);
2623 rv = interp_exec(b->right);
2626 rv = interp_exec(b->left);
2627 right = interp_exec(b->right);
2628 rv.bool = rv.bool || right.bool;
2631 rv = interp_exec(b->left);
2633 rv = interp_exec(b->right);
2636 rv = interp_exec(b->right);
2640 ### Expressions: Comparison
2642 Of slightly higher precedence that Boolean expressions are
2644 A comparison takes arguments of any comparable type, but the two types must be
2647 To simplify the parsing we introduce an `eop` which can record an
2648 expression operator.
2655 ###### ast functions
2656 static void free_eop(struct eop *e)
2671 | Expr CMPop Expr ${ {
2672 struct binode *b = new(binode);
2678 | Expr ${ $0 = $<1; }$
2683 CMPop -> < ${ $0.op = Less; }$
2684 | > ${ $0.op = Gtr; }$
2685 | <= ${ $0.op = LessEq; }$
2686 | >= ${ $0.op = GtrEq; }$
2687 | == ${ $0.op = Eql; }$
2688 | != ${ $0.op = NEql; }$
2690 ###### print binode cases
2698 print_exec(b->left, -1, 0);
2700 case Less: printf(" < "); break;
2701 case LessEq: printf(" <= "); break;
2702 case Gtr: printf(" > "); break;
2703 case GtrEq: printf(" >= "); break;
2704 case Eql: printf(" == "); break;
2705 case NEql: printf(" != "); break;
2706 default: abort(); // NOTEST
2708 print_exec(b->right, -1, 0);
2711 ###### propagate binode cases
2718 /* Both must match but not be labels, result is Tbool */
2719 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2721 propagate_types(b->right, c, ok, t, 0);
2723 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2725 t = propagate_types(b->left, c, ok, t, 0);
2727 if (!type_compat(type, Tbool, 0)) {
2728 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2729 Tbool, rules, type);
2734 ###### interp binode cases
2743 left = interp_exec(b->left);
2744 right = interp_exec(b->right);
2745 cmp = value_cmp(left, right);
2748 case Less: rv.bool = cmp < 0; break;
2749 case LessEq: rv.bool = cmp <= 0; break;
2750 case Gtr: rv.bool = cmp > 0; break;
2751 case GtrEq: rv.bool = cmp >= 0; break;
2752 case Eql: rv.bool = cmp == 0; break;
2753 case NEql: rv.bool = cmp != 0; break;
2754 default: rv.bool = 0; break; // NOTEST
2759 ### Expressions: The rest
2761 The remaining expressions with the highest precedence are arithmetic
2762 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2763 The `Factor` is where the `Value` and `Variable` that we already have
2766 `+` and `-` are both infix and prefix operations (where they are
2767 absolute value and negation). These have different operator names.
2769 We also have a 'Bracket' operator which records where parentheses were
2770 found. This makes it easy to reproduce these when printing. Once
2771 precedence is handled better I might be able to discard this.
2783 Expr -> Expr Eop Term ${ {
2784 struct binode *b = new(binode);
2790 | Term ${ $0 = $<1; }$
2792 Term -> Term Top Factor ${ {
2793 struct binode *b = new(binode);
2799 | Factor ${ $0 = $<1; }$
2801 Factor -> ( Expression ) ${ {
2802 struct binode *b = new_pos(binode, $1);
2808 struct binode *b = new(binode);
2813 | Value ${ $0 = $<1; }$
2814 | Variable ${ $0 = $<1; }$
2817 Eop -> + ${ $0.op = Plus; }$
2818 | - ${ $0.op = Minus; }$
2820 Uop -> + ${ $0.op = Absolute; }$
2821 | - ${ $0.op = Negate; }$
2823 Top -> * ${ $0.op = Times; }$
2824 | / ${ $0.op = Divide; }$
2825 | % ${ $0.op = Rem; }$
2826 | ++ ${ $0.op = Concat; }$
2828 ###### print binode cases
2835 print_exec(b->left, indent, 0);
2837 case Plus: fputs(" + ", stdout); break;
2838 case Minus: fputs(" - ", stdout); break;
2839 case Times: fputs(" * ", stdout); break;
2840 case Divide: fputs(" / ", stdout); break;
2841 case Rem: fputs(" % ", stdout); break;
2842 case Concat: fputs(" ++ ", stdout); break;
2843 default: abort(); // NOTEST
2845 print_exec(b->right, indent, 0);
2849 print_exec(b->right, indent, 0);
2853 print_exec(b->right, indent, 0);
2857 print_exec(b->right, indent, 0);
2861 ###### propagate binode cases
2867 /* both must be numbers, result is Tnum */
2870 /* as propagate_types ignores a NULL,
2871 * unary ops fit here too */
2872 propagate_types(b->left, c, ok, Tnum, 0);
2873 propagate_types(b->right, c, ok, Tnum, 0);
2874 if (!type_compat(type, Tnum, 0)) {
2875 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2882 /* both must be Tstr, result is Tstr */
2883 propagate_types(b->left, c, ok, Tstr, 0);
2884 propagate_types(b->right, c, ok, Tstr, 0);
2885 if (!type_compat(type, Tstr, 0)) {
2886 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2893 return propagate_types(b->right, c, ok, type, 0);
2895 ###### interp binode cases
2898 rv = interp_exec(b->left);
2899 right = interp_exec(b->right);
2900 mpq_add(rv.num, rv.num, right.num);
2903 rv = interp_exec(b->left);
2904 right = interp_exec(b->right);
2905 mpq_sub(rv.num, rv.num, right.num);
2908 rv = interp_exec(b->left);
2909 right = interp_exec(b->right);
2910 mpq_mul(rv.num, rv.num, right.num);
2913 rv = interp_exec(b->left);
2914 right = interp_exec(b->right);
2915 mpq_div(rv.num, rv.num, right.num);
2920 left = interp_exec(b->left);
2921 right = interp_exec(b->right);
2922 mpz_init(l); mpz_init(r); mpz_init(rem);
2923 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2924 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2925 mpz_tdiv_r(rem, l, r);
2926 rv = val_init(Tnum);
2927 mpq_set_z(rv.num, rem);
2928 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2932 rv = interp_exec(b->right);
2933 mpq_neg(rv.num, rv.num);
2936 rv = interp_exec(b->right);
2937 mpq_abs(rv.num, rv.num);
2940 rv = interp_exec(b->right);
2943 left = interp_exec(b->left);
2944 right = interp_exec(b->right);
2946 rv.str = text_join(left.str, right.str);
2949 ###### value functions
2951 static struct text text_join(struct text a, struct text b)
2954 rv.len = a.len + b.len;
2955 rv.txt = malloc(rv.len);
2956 memcpy(rv.txt, a.txt, a.len);
2957 memcpy(rv.txt+a.len, b.txt, b.len);
2961 ### Blocks, Statements, and Statement lists.
2963 Now that we have expressions out of the way we need to turn to
2964 statements. There are simple statements and more complex statements.
2965 Simple statements do not contain (syntactic) newlines, complex statements do.
2967 Statements often come in sequences and we have corresponding simple
2968 statement lists and complex statement lists.
2969 The former comprise only simple statements separated by semicolons.
2970 The later comprise complex statements and simple statement lists. They are
2971 separated by newlines. Thus the semicolon is only used to separate
2972 simple statements on the one line. This may be overly restrictive,
2973 but I'm not sure I ever want a complex statement to share a line with
2976 Note that a simple statement list can still use multiple lines if
2977 subsequent lines are indented, so
2979 ###### Example: wrapped simple statement list
2984 is a single simple statement list. This might allow room for
2985 confusion, so I'm not set on it yet.
2987 A simple statement list needs no extra syntax. A complex statement
2988 list has two syntactic forms. It can be enclosed in braces (much like
2989 C blocks), or it can be introduced by a colon and continue until an
2990 unindented newline (much like Python blocks). With this extra syntax
2991 it is referred to as a block.
2993 Note that a block does not have to include any newlines if it only
2994 contains simple statements. So both of:
2996 if condition: a=b; d=f
2998 if condition { a=b; print f }
3002 In either case the list is constructed from a `binode` list with
3003 `Block` as the operator. When parsing the list it is most convenient
3004 to append to the end, so a list is a list and a statement. When using
3005 the list it is more convenient to consider a list to be a statement
3006 and a list. So we need a function to re-order a list.
3007 `reorder_bilist` serves this purpose.
3009 The only stand-alone statement we introduce at this stage is `pass`
3010 which does nothing and is represented as a `NULL` pointer in a `Block`
3011 list. Other stand-alone statements will follow once the infrastructure
3031 Block -> Open Statementlist Close ${ $0 = $<2; }$
3032 | Open Newlines Statementlist Close ${ $0 = $<3; }$
3033 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
3034 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
3035 | : Statementlist ${ $0 = $<2; }$
3036 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
3038 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
3040 ComplexStatements -> ComplexStatements ComplexStatement ${
3046 | ComplexStatements NEWLINE ${ $0 = $<1; }$
3047 | ComplexStatement ${
3055 ComplexStatement -> SimpleStatements NEWLINE ${
3056 $0 = reorder_bilist($<1);
3058 ## ComplexStatement Grammar
3061 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3067 | SimpleStatement ${
3073 | SimpleStatements ; ${ $0 = $<1; }$
3075 SimpleStatement -> pass ${ $0 = NULL; }$
3076 ## SimpleStatement Grammar
3078 ###### print binode cases
3082 if (b->left == NULL)
3085 print_exec(b->left, indent, 0);
3088 print_exec(b->right, indent, 0);
3091 // block, one per line
3092 if (b->left == NULL)
3093 do_indent(indent, "pass\n");
3095 print_exec(b->left, indent, bracket);
3097 print_exec(b->right, indent, bracket);
3101 ###### propagate binode cases
3104 /* If any statement returns something other than Tnone
3105 * or Tbool then all such must return same type.
3106 * As each statement may be Tnone or something else,
3107 * we must always pass NULL (unknown) down, otherwise an incorrect
3108 * error might occur. We never return Tnone unless it is
3113 for (e = b; e; e = cast(binode, e->right)) {
3114 t = propagate_types(e->left, c, ok, NULL, rules);
3115 if ((rules & Rboolok) && t == Tbool)
3117 if (t && t != Tnone && t != Tbool) {
3120 else if (t != type) {
3121 type_err(c, "error: expected %1%r, found %2",
3122 e->left, type, rules, t);
3130 ###### interp binode cases
3132 while (rv.type == Tnone &&
3135 rv = interp_exec(b->left);
3136 b = cast(binode, b->right);
3140 ### The Print statement
3142 `print` is a simple statement that takes a comma-separated list of
3143 expressions and prints the values separated by spaces and terminated
3144 by a newline. No control of formatting is possible.
3146 `print` faces the same list-ordering issue as blocks, and uses the
3152 ###### SimpleStatement Grammar
3154 | print ExpressionList ${
3155 $0 = reorder_bilist($<2);
3157 | print ExpressionList , ${
3162 $0 = reorder_bilist($0);
3173 ExpressionList -> ExpressionList , Expression ${
3186 ###### print binode cases
3189 do_indent(indent, "print");
3193 print_exec(b->left, -1, 0);
3197 b = cast(binode, b->right);
3203 ###### propagate binode cases
3206 /* don't care but all must be consistent */
3207 propagate_types(b->left, c, ok, NULL, Rnolabel);
3208 propagate_types(b->right, c, ok, NULL, Rnolabel);
3211 ###### interp binode cases
3217 for ( ; b; b = cast(binode, b->right))
3221 left = interp_exec(b->left);
3234 ###### Assignment statement
3236 An assignment will assign a value to a variable, providing it hasn't
3237 be declared as a constant. The analysis phase ensures that the type
3238 will be correct so the interpreter just needs to perform the
3239 calculation. There is a form of assignment which declares a new
3240 variable as well as assigning a value. If a name is assigned before
3241 it is declared, and error will be raised as the name is created as
3242 `Tlabel` and it is illegal to assign to such names.
3248 ###### SimpleStatement Grammar
3249 | Variable = Expression ${
3255 | VariableDecl = Expression ${
3263 if ($1->var->where_set == NULL) {
3264 type_err(config2context(config),
3265 "Variable declared with no type or value: %v",
3275 ###### print binode cases
3278 do_indent(indent, "");
3279 print_exec(b->left, indent, 0);
3281 print_exec(b->right, indent, 0);
3288 struct variable *v = cast(var, b->left)->var;
3289 do_indent(indent, "");
3290 print_exec(b->left, indent, 0);
3291 if (cast(var, b->left)->var->constant) {
3292 if (v->where_decl == v->where_set) {
3294 type_print(v->val.type, stdout);
3299 if (v->where_decl == v->where_set) {
3301 type_print(v->val.type, stdout);
3308 print_exec(b->right, indent, 0);
3315 ###### propagate binode cases
3319 /* Both must match and not be labels,
3320 * Type must support 'dup',
3321 * For Assign, left must not be constant.
3324 t = propagate_types(b->left, c, ok, NULL,
3325 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3330 if (propagate_types(b->right, c, ok, t, 0) != t)
3331 if (b->left->type == Xvar)
3332 type_err(c, "info: variable '%v' was set as %1 here.",
3333 cast(var, b->left)->var->where_set, t, rules, NULL);
3335 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3337 propagate_types(b->left, c, ok, t,
3338 (b->op == Assign ? Rnoconstant : 0));
3340 if (t && t->dup == NULL) {
3341 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3348 ###### interp binode cases
3351 lleft = linterp_exec(b->left);
3352 right = interp_exec(b->right);
3357 free_value(right); // NOTEST
3363 struct variable *v = cast(var, b->left)->var;
3367 right = interp_exec(b->right);
3369 right = val_init(v->val.type);
3376 ### The `use` statement
3378 The `use` statement is the last "simple" statement. It is needed when
3379 the condition in a conditional statement is a block. `use` works much
3380 like `return` in C, but only completes the `condition`, not the whole
3386 ###### SimpleStatement Grammar
3388 $0 = new_pos(binode, $1);
3393 ###### print binode cases
3396 do_indent(indent, "use ");
3397 print_exec(b->right, -1, 0);
3402 ###### propagate binode cases
3405 /* result matches value */
3406 return propagate_types(b->right, c, ok, type, 0);
3408 ###### interp binode cases
3411 rv = interp_exec(b->right);
3414 ### The Conditional Statement
3416 This is the biggy and currently the only complex statement. This
3417 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3418 It is comprised of a number of parts, all of which are optional though
3419 set combinations apply. Each part is (usually) a key word (`then` is
3420 sometimes optional) followed by either an expression or a code block,
3421 except the `casepart` which is a "key word and an expression" followed
3422 by a code block. The code-block option is valid for all parts and,
3423 where an expression is also allowed, the code block can use the `use`
3424 statement to report a value. If the code block does not report a value
3425 the effect is similar to reporting `True`.
3427 The `else` and `case` parts, as well as `then` when combined with
3428 `if`, can contain a `use` statement which will apply to some
3429 containing conditional statement. `for` parts, `do` parts and `then`
3430 parts used with `for` can never contain a `use`, except in some
3431 subordinate conditional statement.
3433 If there is a `forpart`, it is executed first, only once.
3434 If there is a `dopart`, then it is executed repeatedly providing
3435 always that the `condpart` or `cond`, if present, does not return a non-True
3436 value. `condpart` can fail to return any value if it simply executes
3437 to completion. This is treated the same as returning `True`.
3439 If there is a `thenpart` it will be executed whenever the `condpart`
3440 or `cond` returns True (or does not return any value), but this will happen
3441 *after* `dopart` (when present).
3443 If `elsepart` is present it will be executed at most once when the
3444 condition returns `False` or some value that isn't `True` and isn't
3445 matched by any `casepart`. If there are any `casepart`s, they will be
3446 executed when the condition returns a matching value.
3448 The particular sorts of values allowed in case parts has not yet been
3449 determined in the language design, so nothing is prohibited.
3451 The various blocks in this complex statement potentially provide scope
3452 for variables as described earlier. Each such block must include the
3453 "OpenScope" nonterminal before parsing the block, and must call
3454 `var_block_close()` when closing the block.
3456 The code following "`if`", "`switch`" and "`for`" does not get its own
3457 scope, but is in a scope covering the whole statement, so names
3458 declared there cannot be redeclared elsewhere. Similarly the
3459 condition following "`while`" is in a scope the covers the body
3460 ("`do`" part) of the loop, and which does not allow conditional scope
3461 extension. Code following "`then`" (both looping and non-looping),
3462 "`else`" and "`case`" each get their own local scope.
3464 The type requirements on the code block in a `whilepart` are quite
3465 unusal. It is allowed to return a value of some identifiable type, in
3466 which case the loop aborts and an appropriate `casepart` is run, or it
3467 can return a Boolean, in which case the loop either continues to the
3468 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3469 This is different both from the `ifpart` code block which is expected to
3470 return a Boolean, or the `switchpart` code block which is expected to
3471 return the same type as the casepart values. The correct analysis of
3472 the type of the `whilepart` code block is the reason for the
3473 `Rboolok` flag which is passed to `propagate_types()`.
3475 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3484 struct exec *action;
3485 struct casepart *next;
3487 struct cond_statement {
3489 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3490 struct casepart *casepart;
3493 ###### ast functions
3495 static void free_casepart(struct casepart *cp)
3499 free_exec(cp->value);
3500 free_exec(cp->action);
3507 static void free_cond_statement(struct cond_statement *s)
3511 free_exec(s->forpart);
3512 free_exec(s->condpart);
3513 free_exec(s->dopart);
3514 free_exec(s->thenpart);
3515 free_exec(s->elsepart);
3516 free_casepart(s->casepart);
3520 ###### free exec cases
3521 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3523 ###### ComplexStatement Grammar
3524 | CondStatement ${ $0 = $<1; }$
3529 // both ForThen and Whilepart open scopes, and CondSuffix only
3530 // closes one - so in the first branch here we have another to close.
3531 CondStatement -> ForThen WhilePart CondSuffix ${
3533 $0->forpart = $1.forpart; $1.forpart = NULL;
3534 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3535 $0->condpart = $2.condpart; $2.condpart = NULL;
3536 $0->dopart = $2.dopart; $2.dopart = NULL;
3537 var_block_close(config2context(config), CloseSequential);
3539 | WhilePart CondSuffix ${
3541 $0->condpart = $1.condpart; $1.condpart = NULL;
3542 $0->dopart = $1.dopart; $1.dopart = NULL;
3544 | SwitchPart CondSuffix ${
3548 | IfPart IfSuffix ${
3550 $0->condpart = $1.condpart; $1.condpart = NULL;
3551 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3552 // This is where we close an "if" statement
3553 var_block_close(config2context(config), CloseSequential);
3556 CondSuffix -> IfSuffix ${
3558 // This is where we close scope of the whole
3559 // "for" or "while" statement
3560 var_block_close(config2context(config), CloseSequential);
3562 | CasePart CondSuffix ${
3564 $1->next = $0->casepart;
3569 CasePart -> Newlines case Expression OpenScope Block ${
3570 $0 = calloc(1,sizeof(struct casepart));
3573 var_block_close(config2context(config), CloseParallel);
3575 | case Expression OpenScope Block ${
3576 $0 = calloc(1,sizeof(struct casepart));
3579 var_block_close(config2context(config), CloseParallel);
3583 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3584 | Newlines else OpenScope Block ${
3585 $0 = new(cond_statement);
3587 var_block_close(config2context(config), CloseElse);
3589 | else OpenScope Block ${
3590 $0 = new(cond_statement);
3592 var_block_close(config2context(config), CloseElse);
3594 | Newlines else OpenScope CondStatement ${
3595 $0 = new(cond_statement);
3597 var_block_close(config2context(config), CloseElse);
3599 | else OpenScope CondStatement ${
3600 $0 = new(cond_statement);
3602 var_block_close(config2context(config), CloseElse);
3606 // These scopes are closed in CondSuffix
3607 ForPart -> for OpenScope SimpleStatements ${
3608 $0 = reorder_bilist($<3);
3610 | for OpenScope Block ${
3614 ThenPart -> then OpenScope SimpleStatements ${
3615 $0 = reorder_bilist($<3);
3616 var_block_close(config2context(config), CloseSequential);
3618 | then OpenScope Block ${
3620 var_block_close(config2context(config), CloseSequential);
3623 ThenPartNL -> ThenPart OptNL ${
3627 // This scope is closed in CondSuffix
3628 WhileHead -> while OpenScope Block ${
3633 ForThen -> ForPart OptNL ThenPartNL ${
3641 // This scope is closed in CondSuffix
3642 WhilePart -> while OpenScope Expression Block ${
3643 $0.type = Xcond_statement;
3647 | WhileHead OptNL do Block ${
3648 $0.type = Xcond_statement;
3653 IfPart -> if OpenScope Expression OpenScope Block ${
3654 $0.type = Xcond_statement;
3657 var_block_close(config2context(config), CloseParallel);
3659 | if OpenScope Block OptNL then OpenScope Block ${
3660 $0.type = Xcond_statement;
3663 var_block_close(config2context(config), CloseParallel);
3667 // This scope is closed in CondSuffix
3668 SwitchPart -> switch OpenScope Expression ${
3671 | switch OpenScope Block ${
3675 ###### print exec cases
3677 case Xcond_statement:
3679 struct cond_statement *cs = cast(cond_statement, e);
3680 struct casepart *cp;
3682 do_indent(indent, "for");
3683 if (bracket) printf(" {\n"); else printf(":\n");
3684 print_exec(cs->forpart, indent+1, bracket);
3687 do_indent(indent, "} then {\n");
3689 do_indent(indent, "then:\n");
3690 print_exec(cs->thenpart, indent+1, bracket);
3692 if (bracket) do_indent(indent, "}\n");
3696 if (cs->condpart && cs->condpart->type == Xbinode &&
3697 cast(binode, cs->condpart)->op == Block) {
3699 do_indent(indent, "while {\n");
3701 do_indent(indent, "while:\n");
3702 print_exec(cs->condpart, indent+1, bracket);
3704 do_indent(indent, "} do {\n");
3706 do_indent(indent, "do:\n");
3707 print_exec(cs->dopart, indent+1, bracket);
3709 do_indent(indent, "}\n");
3711 do_indent(indent, "while ");
3712 print_exec(cs->condpart, 0, bracket);
3717 print_exec(cs->dopart, indent+1, bracket);
3719 do_indent(indent, "}\n");
3724 do_indent(indent, "switch");
3726 do_indent(indent, "if");
3727 if (cs->condpart && cs->condpart->type == Xbinode &&
3728 cast(binode, cs->condpart)->op == Block) {
3733 print_exec(cs->condpart, indent+1, bracket);
3735 do_indent(indent, "}\n");
3737 do_indent(indent, "then:\n");
3738 print_exec(cs->thenpart, indent+1, bracket);
3742 print_exec(cs->condpart, 0, bracket);
3748 print_exec(cs->thenpart, indent+1, bracket);
3750 do_indent(indent, "}\n");
3755 for (cp = cs->casepart; cp; cp = cp->next) {
3756 do_indent(indent, "case ");
3757 print_exec(cp->value, -1, 0);
3762 print_exec(cp->action, indent+1, bracket);
3764 do_indent(indent, "}\n");
3767 do_indent(indent, "else");
3772 print_exec(cs->elsepart, indent+1, bracket);
3774 do_indent(indent, "}\n");
3779 ###### propagate exec cases
3780 case Xcond_statement:
3782 // forpart and dopart must return Tnone
3783 // thenpart must return Tnone if there is a dopart,
3784 // otherwise it is like elsepart.
3786 // be bool if there is no casepart
3787 // match casepart->values if there is a switchpart
3788 // either be bool or match casepart->value if there
3790 // elsepart and casepart->action must match the return type
3791 // expected of this statement.
3792 struct cond_statement *cs = cast(cond_statement, prog);
3793 struct casepart *cp;
3795 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3796 if (!type_compat(Tnone, t, 0))
3798 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3799 if (!type_compat(Tnone, t, 0))
3802 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3803 if (!type_compat(Tnone, t, 0))
3806 if (cs->casepart == NULL)
3807 propagate_types(cs->condpart, c, ok, Tbool, 0);
3809 /* Condpart must match case values, with bool permitted */
3811 for (cp = cs->casepart;
3812 cp && !t; cp = cp->next)
3813 t = propagate_types(cp->value, c, ok, NULL, 0);
3814 if (!t && cs->condpart)
3815 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3816 // Now we have a type (I hope) push it down
3818 for (cp = cs->casepart; cp; cp = cp->next)
3819 propagate_types(cp->value, c, ok, t, 0);
3820 propagate_types(cs->condpart, c, ok, t, Rboolok);
3823 // (if)then, else, and case parts must return expected type.
3824 if (!cs->dopart && !type)
3825 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3827 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3828 for (cp = cs->casepart;
3831 type = propagate_types(cp->action, c, ok, NULL, rules);
3834 propagate_types(cs->thenpart, c, ok, type, rules);
3835 propagate_types(cs->elsepart, c, ok, type, rules);
3836 for (cp = cs->casepart; cp ; cp = cp->next)
3837 propagate_types(cp->action, c, ok, type, rules);
3843 ###### interp exec cases
3844 case Xcond_statement:
3846 struct value v, cnd;
3847 struct casepart *cp;
3848 struct cond_statement *c = cast(cond_statement, e);
3851 interp_exec(c->forpart);
3854 cnd = interp_exec(c->condpart);
3857 if (!(cnd.type == Tnone ||
3858 (cnd.type == Tbool && cnd.bool != 0)))
3860 // cnd is Tnone or Tbool, doesn't need to be freed
3862 interp_exec(c->dopart);
3865 rv = interp_exec(c->thenpart);
3866 if (rv.type != Tnone || !c->dopart)
3870 } while (c->dopart);
3872 for (cp = c->casepart; cp; cp = cp->next) {
3873 v = interp_exec(cp->value);
3874 if (value_cmp(v, cnd) == 0) {
3877 rv = interp_exec(cp->action);
3884 rv = interp_exec(c->elsepart);
3891 ### Top level structure
3893 All the language elements so far can be used in various places. Now
3894 it is time to clarify what those places are.
3896 At the top level of a file there will be a number of declarations.
3897 Many of the things that can be declared haven't been described yet,
3898 such as functions, procedures, imports, and probably more.
3899 For now there are two sorts of things that can appear at the top
3900 level. They are predefined constants, `struct` types, and the main
3901 program. While the syntax will allow the main program to appear
3902 multiple times, that will trigger an error if it is actually attempted.
3904 The various declarations do not return anything. They store the
3905 various declarations in the parse context.
3907 ###### Parser: grammar
3910 Ocean -> DeclarationList
3912 DeclarationList -> Declaration
3913 | DeclarationList Declaration
3915 Declaration -> DeclareConstant
3920 ## top level grammar
3922 ### The `const` section
3924 As well as being defined in with the code that uses them, constants
3925 can be declared at the top level. These have full-file scope, so they
3926 are always `InScope`. The value of a top level constant can be given
3927 as an expression, and this is evaluated immediately rather than in the
3928 later interpretation stage. Once we add functions to the language, we
3929 will need rules concern which, if any, can be used to define a top
3932 Constants are defined in a section that starts with the reserved word
3933 `const` and then has a block with a list of assignment statements.
3934 For syntactic consistency, these must use the double-colon syntax to
3935 make it clear that they are constants. Type can also be given: if
3936 not, the type will be determined during analysis, as with other
3939 As the types constants are inserted at the head of a list, printing
3940 them in the same order that they were read is not straight forward.
3941 We take a quadratic approach here and count the number of constants
3942 (variables of depth 0), then count down from there, each time
3943 searching through for the Nth constant for decreasing N.
3945 ###### top level grammar
3947 DeclareConstant -> const Open ConstList Close
3948 | const Open Newlines ConstList Close
3949 | const Open SimpleConstList }
3950 | const Open Newlines SimpleConstList }
3952 | const SimpleConstList
3954 ConstList -> ComplexConsts
3955 ComplexConsts -> ComplexConst ComplexConsts
3957 ComplexConst -> SimpleConstList NEWLINE
3958 SimpleConstList -> Const ; SimpleConstList
3960 | Const ; SimpleConstList ;
3963 CType -> Type ${ $0 = $<1; }$
3966 Const -> IDENTIFIER :: CType = Expression ${ {
3970 v = var_decl(config2context(config), $1.txt);
3972 struct var *var = new_pos(var, $1);
3973 v->where_decl = var;
3978 v = var_ref(config2context(config), $1.txt);
3979 tok_err(config2context(config), "error: name already declared", &$1);
3980 type_err(config2context(config), "info: this is where '%v' was first declared",
3981 v->where_decl, NULL, 0, NULL);
3985 propagate_types($5, config2context(config), &ok, $3, 0);
3988 config2context(config)->parse_error = 1;
3990 v->val = interp_exec($5);
3994 ###### print const decls
3999 while (target != 0) {
4001 for (v = context.in_scope; v; v=v->in_scope)
4002 if (v->depth == 0) {
4013 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4014 type_print(v->val.type, stdout);
4016 if (v->val.type == Tstr)
4018 print_value(v->val);
4019 if (v->val.type == Tstr)
4027 ### Finally the whole program.
4029 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4030 the keyword "program" and a list of variable names which are assigned
4031 values from command line arguments. Following this is a `block` which
4032 is the code to execute. Unlike Pascal, constants and other
4033 declarations come *before* the program.
4035 As this is the top level, several things are handled a bit
4037 The whole program is not interpreted by `interp_exec` as that isn't
4038 passed the argument list which the program requires. Similarly type
4039 analysis is a bit more interesting at this level.
4044 ###### top level grammar
4046 DeclareProgram -> Program ${ {
4047 struct parse_context *c = config2context(config);
4049 type_err(c, "Program defined a second time",
4056 Program -> program OpenScope Varlist Block OptNL ${
4059 $0->left = reorder_bilist($<3);
4061 var_block_close(config2context(config), CloseSequential);
4062 if (config2context(config)->scope_stack) abort();
4065 tok_err(config2context(config),
4066 "error: unhandled parse error", &$1);
4069 Varlist -> Varlist ArgDecl ${
4078 ArgDecl -> IDENTIFIER ${ {
4079 struct variable *v = var_decl(config2context(config), $1.txt);
4086 ###### print binode cases
4088 do_indent(indent, "program");
4089 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4091 print_exec(b2->left, 0, 0);
4097 print_exec(b->right, indent+1, bracket);
4099 do_indent(indent, "}\n");
4102 ###### propagate binode cases
4103 case Program: abort(); // NOTEST
4105 ###### core functions
4107 static int analyse_prog(struct exec *prog, struct parse_context *c)
4109 struct binode *b = cast(binode, prog);
4116 propagate_types(b->right, c, &ok, Tnone, 0);
4121 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4122 struct var *v = cast(var, b->left);
4123 if (!v->var->val.type) {
4124 v->var->where_set = b;
4125 v->var->val = val_prepare(Tstr);
4128 b = cast(binode, prog);
4131 propagate_types(b->right, c, &ok, Tnone, 0);
4136 /* Make sure everything is still consistent */
4137 propagate_types(b->right, c, &ok, Tnone, 0);
4141 static void interp_prog(struct exec *prog, char **argv)
4143 struct binode *p = cast(binode, prog);
4149 al = cast(binode, p->left);
4151 struct var *v = cast(var, al->left);
4152 struct value *vl = &v->var->val;
4154 if (argv[0] == NULL) {
4155 printf("Not enough args\n");
4158 al = cast(binode, al->right);
4160 *vl = parse_value(vl->type, argv[0]);
4161 if (vl->type == NULL)
4165 v = interp_exec(p->right);
4169 ###### interp binode cases
4170 case Program: abort(); // NOTEST
4172 ## And now to test it out.
4174 Having a language requires having a "hello world" program. I'll
4175 provide a little more than that: a program that prints "Hello world"
4176 finds the GCD of two numbers, prints the first few elements of
4177 Fibonacci, performs a binary search for a number, and a few other
4178 things which will likely grow as the languages grows.
4180 ###### File: oceani.mk
4183 @echo "===== DEMO ====="
4184 ./oceani --section "demo: hello" oceani.mdc 55 33
4190 four ::= 2 + 2 ; five ::= 10/2
4191 const pie ::= "I like Pie";
4192 cake ::= "The cake is"
4201 print "Hello World, what lovely oceans you have!"
4202 print "Are there", five, "?"
4203 print pi, pie, "but", cake
4205 /* When a variable is defined in both branches of an 'if',
4206 * and used afterwards, the variables are merged.
4212 print "Is", A, "bigger than", B,"? ", bigger
4213 /* If a variable is not used after the 'if', no
4214 * merge happens, so types can be different
4217 double:string = "yes"
4218 print A, "is more than twice", B, "?", double
4221 print "double", B, "is", double
4226 if a > 0 and then b > 0:
4232 print "GCD of", A, "and", B,"is", a
4234 print a, "is not positive, cannot calculate GCD"
4236 print b, "is not positive, cannot calculate GCD"
4241 print "Fibonacci:", f1,f2,
4242 then togo = togo - 1
4250 /* Binary search... */
4255 mid := (lo + hi) / 2
4267 print "Yay, I found", target
4269 print "Closest I found was", mid
4274 // "middle square" PRNG. Not particularly good, but one my
4275 // Dad taught me - the first one I ever heard of.
4276 for i:=1; then i = i + 1; while i < size:
4277 n := list[i-1] * list[i-1]
4278 list[i] = (n / 100) % 10000
4280 print "Before sort:",
4281 for i:=0; then i = i + 1; while i < size:
4285 for i := 1; then i=i+1; while i < size:
4286 for j:=i-1; then j=j-1; while j >= 0:
4287 if list[j] > list[j+1]:
4291 print " After sort:",
4292 for i:=0; then i = i + 1; while i < size:
4298 bob.alive = (bob.name == "Hello")
4299 print "bob", "is" if bob.alive else "isn't", "alive"