1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
167 struct section *s, *ss;
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment)
174 .number_chars = ".,_+- ",
179 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
212 ## context initialization
215 for (ss = s; ss; ss = ss->next) {
216 struct text sec = ss->section;
217 if (sec.len == strlen(section) &&
218 strncmp(sec.txt, section, sec.len) == 0)
222 fprintf(stderr, "oceani: cannot find section %s\n",
228 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: no program found.\n");
232 context.parse_error = 1;
234 if (context.prog && doprint) {
237 print_exec(context.prog, 0, brackets);
239 if (context.prog && doexec && !context.parse_error) {
240 if (!analyse_prog(context.prog, &context)) {
241 fprintf(stderr, "oceani: type error in program - not running.\n");
244 interp_prog(context.prog, argv+optind+1);
246 free_exec(context.prog);
249 struct section *t = s->next;
255 ## free context types
256 exit(context.parse_error ? 1 : 0);
261 The four requirements of parse, analyse, print, interpret apply to
262 each language element individually so that is how most of the code
265 Three of the four are fairly self explanatory. The one that requires
266 a little explanation is the analysis step.
268 The current language design does not require the types of variables to
269 be declared, but they must still have a single type. Different
270 operations impose different requirements on the variables, for example
271 addition requires both arguments to be numeric, and assignment
272 requires the variable on the left to have the same type as the
273 expression on the right.
275 Analysis involves propagating these type requirements around and
276 consequently setting the type of each variable. If any requirements
277 are violated (e.g. a string is compared with a number) or if a
278 variable needs to have two different types, then an error is raised
279 and the program will not run.
281 If the same variable is declared in both branchs of an 'if/else', or
282 in all cases of a 'switch' then the multiple instances may be merged
283 into just one variable if the variable is references after the
284 conditional statement. When this happens, the types must naturally be
285 consistent across all the branches. When the variable is not used
286 outside the if, the variables in the different branches are distinct
287 and can be of different types.
289 Determining the types of all variables early is important for
290 processing command line arguments. These can be assigned to any of
291 several types of variable, but we must first know the correct type so
292 any required conversion can happen. If a variable is associated with
293 a command line argument but no type can be interpreted (e.g. the
294 variable is only ever used in a `print` statement), then the type is
297 Undeclared names may only appear in "use" statements and "case" expressions.
298 These names are given a type of "label" and a unique value.
299 This allows them to fill the role of a name in an enumerated type, which
300 is useful for testing the `switch` statement.
302 As we will see, the condition part of a `while` statement can return
303 either a Boolean or some other type. This requires that the expected
304 type that gets passed around comprises a type and a flag to indicate
305 that `Tbool` is also permitted.
307 As there are, as yet, no distinct types that are compatible, there
308 isn't much subtlety in the analysis. When we have distinct number
309 types, this will become more interesting.
313 When analysis discovers an inconsistency it needs to report an error;
314 just refusing to run the code ensures that the error doesn't cascade,
315 but by itself it isn't very useful. A clear understanding of the sort
316 of error message that are useful will help guide the process of
319 At a simplistic level, the only sort of error that type analysis can
320 report is that the type of some construct doesn't match a contextual
321 requirement. For example, in `4 + "hello"` the addition provides a
322 contextual requirement for numbers, but `"hello"` is not a number. In
323 this particular example no further information is needed as the types
324 are obvious from local information. When a variable is involved that
325 isn't the case. It may be helpful to explain why the variable has a
326 particular type, by indicating the location where the type was set,
327 whether by declaration or usage.
329 Using a recursive-descent analysis we can easily detect a problem at
330 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
331 will detect that one argument is not a number and the usage of `hello`
332 will detect that a number was wanted, but not provided. In this
333 (early) version of the language, we will generate error reports at
334 multiple locations, so the use of `hello` will report an error and
335 explain were the value was set, and the addition will report an error
336 and say why numbers are needed. To be able to report locations for
337 errors, each language element will need to record a file location
338 (line and column) and each variable will need to record the language
339 element where its type was set. For now we will assume that each line
340 of an error message indicates one location in the file, and up to 2
341 types. So we provide a `printf`-like function which takes a format, a
342 language (a `struct exec` which has not yet been introduced), and 2
343 types. "`%1`" reports the first type, "`%2`" reports the second. We
344 will need a function to print the location, once we know how that is
345 stored. As will be explained later, there are sometimes extra rules for
346 type matching and they might affect error messages, we need to pass those
349 As well as type errors, we sometimes need to report problems with
350 tokens, which might be unexpected or might name a type that has not
351 been defined. For these we have `tok_err()` which reports an error
352 with a given token. Each of the error functions sets the flag in the
353 context so indicate that parsing failed.
357 static void fput_loc(struct exec *loc, FILE *f);
359 ###### core functions
361 static void type_err(struct parse_context *c,
362 char *fmt, struct exec *loc,
363 struct type *t1, int rules, struct type *t2)
365 fprintf(stderr, "%s:", c->file_name);
366 fput_loc(loc, stderr);
367 for (; *fmt ; fmt++) {
374 case '%': fputc(*fmt, stderr); break; // NOTEST
375 default: fputc('?', stderr); break; // NOTEST
377 type_print(t1, stderr);
380 type_print(t2, stderr);
389 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
391 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
392 t->txt.len, t->txt.txt);
396 ## Entities: declared and predeclared.
398 There are various "things" that the language and/or the interpreter
399 needs to know about to parse and execute a program. These include
400 types, variables, values, and executable code. These are all lumped
401 together under the term "entities" (calling them "objects" would be
402 confusing) and introduced here. These will introduced and described
403 here. The following section will present the different specific code
404 elements which comprise or manipulate these various entities.
408 Values come in a wide range of types, with more likely to be added.
409 Each type needs to be able to parse and print its own values (for
410 convenience at least) as well as to compare two values, at least for
411 equality and possibly for order. For now, values might need to be
412 duplicated and freed, though eventually such manipulations will be
413 better integrated into the language.
415 Rather than requiring every numeric type to support all numeric
416 operations (add, multiple, etc), we allow types to be able to present
417 as one of a few standard types: integer, float, and fraction. The
418 existence of these conversion functions eventaully enable types to
419 determine if they are compatible with other types, though such types
420 have not yet been implemented.
422 Named type are stored in a simple linked list. Objects of each type are "values"
423 which are often passed around by value.
430 ## value union fields
437 struct value (*init)(struct type *type);
438 struct value (*prepare)(struct type *type);
439 struct value (*parse)(struct type *type, char *str);
440 void (*print)(struct value val);
441 void (*print_type)(struct type *type, FILE *f);
442 int (*cmp_order)(struct value v1, struct value v2);
443 int (*cmp_eq)(struct value v1, struct value v2);
444 struct value (*dup)(struct value val);
445 void (*free)(struct value val);
446 void (*free_type)(struct type *t);
447 int (*compat)(struct type *this, struct type *other);
448 long long (*to_int)(struct value *v);
449 double (*to_float)(struct value *v);
450 int (*to_mpq)(mpq_t *q, struct value *v);
459 struct type *typelist;
463 static struct type *find_type(struct parse_context *c, struct text s)
465 struct type *l = c->typelist;
468 text_cmp(l->name, s) != 0)
473 static struct type *add_type(struct parse_context *c, struct text s,
478 n = calloc(1, sizeof(*n));
481 n->next = c->typelist;
486 static void free_type(struct type *t)
488 /* The type is always a reference to something in the
489 * context, so we don't need to free anything.
493 static void free_value(struct value v)
499 static int type_compat(struct type *require, struct type *have, int rules)
501 if ((rules & Rboolok) && have == Tbool)
503 if ((rules & Rnolabel) && have == Tlabel)
505 if (!require || !have)
509 return require->compat(require, have);
511 return require == have;
514 static void type_print(struct type *type, FILE *f)
517 fputs("*unknown*type*", f);
518 else if (type->name.len)
519 fprintf(f, "%.*s", type->name.len, type->name.txt);
520 else if (type->print_type)
521 type->print_type(type, f);
523 fputs("*invalid*type*", f); // NOTEST
526 static struct value val_prepare(struct type *type)
531 return type->prepare(type);
536 static struct value val_init(struct type *type)
541 return type->init(type);
546 static struct value dup_value(struct value v)
549 return v.type->dup(v);
553 static int value_cmp(struct value left, struct value right)
555 if (left.type && left.type->cmp_order)
556 return left.type->cmp_order(left, right);
557 if (left.type && left.type->cmp_eq)
558 return left.type->cmp_eq(left, right);
562 static void print_value(struct value v)
564 if (v.type && v.type->print)
567 printf("*Unknown*"); // NOTEST
570 static struct value parse_value(struct type *type, char *arg)
574 if (type && type->parse)
575 return type->parse(type, arg);
576 rv.type = NULL; // NOTEST
582 static void free_value(struct value v);
583 static int type_compat(struct type *require, struct type *have, int rules);
584 static void type_print(struct type *type, FILE *f);
585 static struct value val_init(struct type *type);
586 static struct value dup_value(struct value v);
587 static int value_cmp(struct value left, struct value right);
588 static void print_value(struct value v);
589 static struct value parse_value(struct type *type, char *arg);
591 ###### free context types
593 while (context.typelist) {
594 struct type *t = context.typelist;
596 context.typelist = t->next;
604 Values of the base types can be numbers, which we represent as
605 multi-precision fractions, strings, Booleans and labels. When
606 analysing the program we also need to allow for places where no value
607 is meaningful (type `Tnone`) and where we don't know what type to
608 expect yet (type is `NULL`).
610 Values are never shared, they are always copied when used, and freed
611 when no longer needed.
613 When propagating type information around the program, we need to
614 determine if two types are compatible, where type `NULL` is compatible
615 with anything. There are two special cases with type compatibility,
616 both related to the Conditional Statement which will be described
617 later. In some cases a Boolean can be accepted as well as some other
618 primary type, and in others any type is acceptable except a label (`Vlabel`).
619 A separate function encoding these cases will simplify some code later.
621 When assigning command line arguments to variables, we need to be able
622 to parse each type from a string.
624 The distinction beteen "prepare" and "init" needs to be explained.
625 "init" sets up an initial value, such as "zero" or the empty string.
626 "prepare" simply prepares the data structure so that if "free" gets
627 called on it, it won't do something silly. Normally a value will be
628 stored after "prepare" but before "free", but this might not happen if
637 myLDLIBS := libnumber.o libstring.o -lgmp
638 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
640 ###### type union fields
641 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
643 ###### value union fields
650 static void _free_value(struct value v)
652 switch (v.type->vtype) {
654 case Vstr: free(v.str.txt); break;
655 case Vnum: mpq_clear(v.num); break;
661 ###### value functions
663 static struct value _val_prepare(struct type *type)
668 switch(type->vtype) {
672 memset(&rv.num, 0, sizeof(rv.num));
688 static struct value _val_init(struct type *type)
693 switch(type->vtype) {
694 case Vnone: // NOTEST
697 mpq_init(rv.num); break;
699 rv.str.txt = malloc(1);
705 case Vlabel: // NOTEST
706 rv.label = NULL; // NOTEST
712 static struct value _dup_value(struct value v)
716 switch (rv.type->vtype) {
717 case Vnone: // NOTEST
727 mpq_set(rv.num, v.num);
730 rv.str.len = v.str.len;
731 rv.str.txt = malloc(rv.str.len);
732 memcpy(rv.str.txt, v.str.txt, v.str.len);
738 static int _value_cmp(struct value left, struct value right)
741 if (left.type != right.type)
742 return left.type - right.type; // NOTEST
743 switch (left.type->vtype) {
744 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
745 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
746 case Vstr: cmp = text_cmp(left.str, right.str); break;
747 case Vbool: cmp = left.bool - right.bool; break;
748 case Vnone: cmp = 0; // NOTEST
753 static void _print_value(struct value v)
755 switch (v.type->vtype) {
756 case Vnone: // NOTEST
757 printf("*no-value*"); break; // NOTEST
758 case Vlabel: // NOTEST
759 printf("*label-%p*", v.label); break; // NOTEST
761 printf("%.*s", v.str.len, v.str.txt); break;
763 printf("%s", v.bool ? "True":"False"); break;
768 mpf_set_q(fl, v.num);
769 gmp_printf("%Fg", fl);
776 static struct value _parse_value(struct type *type, char *arg)
784 switch(type->vtype) {
785 case Vlabel: // NOTEST
786 case Vnone: // NOTEST
787 val.type = NULL; // NOTEST
790 val.str.len = strlen(arg);
791 val.str.txt = malloc(val.str.len);
792 memcpy(val.str.txt, arg, val.str.len);
799 tx.txt = arg; tx.len = strlen(tx.txt);
800 if (number_parse(val.num, tail, tx) == 0)
803 mpq_neg(val.num, val.num);
805 printf("Unsupported suffix: %s\n", arg);
810 if (strcasecmp(arg, "true") == 0 ||
811 strcmp(arg, "1") == 0)
813 else if (strcasecmp(arg, "false") == 0 ||
814 strcmp(arg, "0") == 0)
817 printf("Bad bool: %s\n", arg);
825 static void _free_value(struct value v);
827 static struct type base_prototype = {
829 .prepare = _val_prepare,
830 .parse = _parse_value,
831 .print = _print_value,
832 .cmp_order = _value_cmp,
833 .cmp_eq = _value_cmp,
838 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
841 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
843 struct text txt = { n, strlen(n) };
846 t = add_type(c, txt, &base_prototype);
851 ###### context initialization
853 Tbool = add_base_type(&context, "Boolean", Vbool);
854 Tstr = add_base_type(&context, "string", Vstr);
855 Tnum = add_base_type(&context, "number", Vnum);
856 Tnone = add_base_type(&context, "none", Vnone);
857 Tlabel = add_base_type(&context, "label", Vlabel);
861 Variables are scoped named values. We store the names in a linked
862 list of "bindings" sorted lexically, and use sequential search and
869 struct binding *next; // in lexical order
873 This linked list is stored in the parse context so that "reduce"
874 functions can find or add variables, and so the analysis phase can
875 ensure that every variable gets a type.
879 struct binding *varlist; // In lexical order
883 static struct binding *find_binding(struct parse_context *c, struct text s)
885 struct binding **l = &c->varlist;
890 (cmp = text_cmp((*l)->name, s)) < 0)
894 n = calloc(1, sizeof(*n));
901 Each name can be linked to multiple variables defined in different
902 scopes. Each scope starts where the name is declared and continues
903 until the end of the containing code block. Scopes of a given name
904 cannot nest, so a declaration while a name is in-scope is an error.
906 ###### binding fields
907 struct variable *var;
911 struct variable *previous;
913 struct binding *name;
914 struct exec *where_decl;// where name was declared
915 struct exec *where_set; // where type was set
919 While the naming seems strange, we include local constants in the
920 definition of variables. A name declared `var := value` can
921 subsequently be changed, but a name declared `var ::= value` cannot -
924 ###### variable fields
927 Scopes in parallel branches can be partially merged. More
928 specifically, if a given name is declared in both branches of an
929 if/else then its scope is a candidate for merging. Similarly if
930 every branch of an exhaustive switch (e.g. has an "else" clause)
931 declares a given name, then the scopes from the branches are
932 candidates for merging.
934 Note that names declared inside a loop (which is only parallel to
935 itself) are never visible after the loop. Similarly names defined in
936 scopes which are not parallel, such as those started by `for` and
937 `switch`, are never visible after the scope. Only variables defined in
938 both `then` and `else` (including the implicit then after an `if`, and
939 excluding `then` used with `for`) and in all `case`s and `else` of a
940 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
942 Labels, which are a bit like variables, follow different rules.
943 Labels are not explicitly declared, but if an undeclared name appears
944 in a context where a label is legal, that effectively declares the
945 name as a label. The declaration remains in force (or in scope) at
946 least to the end of the immediately containing block and conditionally
947 in any larger containing block which does not declare the name in some
948 other way. Importantly, the conditional scope extension happens even
949 if the label is only used in one parallel branch of a conditional --
950 when used in one branch it is treated as having been declared in all
953 Merge candidates are tentatively visible beyond the end of the
954 branching statement which creates them. If the name is used, the
955 merge is affirmed and they become a single variable visible at the
956 outer layer. If not - if it is redeclared first - the merge lapses.
958 To track scopes we have an extra stack, implemented as a linked list,
959 which roughly parallels the parse stack and which is used exclusively
960 for scoping. When a new scope is opened, a new frame is pushed and
961 the child-count of the parent frame is incremented. This child-count
962 is used to distinguish between the first of a set of parallel scopes,
963 in which declared variables must not be in scope, and subsequent
964 branches, whether they must already be conditionally scoped.
966 To push a new frame *before* any code in the frame is parsed, we need a
967 grammar reduction. This is most easily achieved with a grammar
968 element which derives the empty string, and creates the new scope when
969 it is recognized. This can be placed, for example, between a keyword
970 like "if" and the code following it.
974 struct scope *parent;
980 struct scope *scope_stack;
983 static void scope_pop(struct parse_context *c)
985 struct scope *s = c->scope_stack;
987 c->scope_stack = s->parent;
992 static void scope_push(struct parse_context *c)
994 struct scope *s = calloc(1, sizeof(*s));
996 c->scope_stack->child_count += 1;
997 s->parent = c->scope_stack;
1005 OpenScope -> ${ scope_push(config2context(config)); }$
1007 Each variable records a scope depth and is in one of four states:
1009 - "in scope". This is the case between the declaration of the
1010 variable and the end of the containing block, and also between
1011 the usage with affirms a merge and the end of that block.
1013 The scope depth is not greater than the current parse context scope
1014 nest depth. When the block of that depth closes, the state will
1015 change. To achieve this, all "in scope" variables are linked
1016 together as a stack in nesting order.
1018 - "pending". The "in scope" block has closed, but other parallel
1019 scopes are still being processed. So far, every parallel block at
1020 the same level that has closed has declared the name.
1022 The scope depth is the depth of the last parallel block that
1023 enclosed the declaration, and that has closed.
1025 - "conditionally in scope". The "in scope" block and all parallel
1026 scopes have closed, and no further mention of the name has been
1027 seen. This state includes a secondary nest depth which records the
1028 outermost scope seen since the variable became conditionally in
1029 scope. If a use of the name is found, the variable becomes "in
1030 scope" and that secondary depth becomes the recorded scope depth.
1031 If the name is declared as a new variable, the old variable becomes
1032 "out of scope" and the recorded scope depth stays unchanged.
1034 - "out of scope". The variable is neither in scope nor conditionally
1035 in scope. It is permanently out of scope now and can be removed from
1036 the "in scope" stack.
1038 ###### variable fields
1039 int depth, min_depth;
1040 enum { OutScope, PendingScope, CondScope, InScope } scope;
1041 struct variable *in_scope;
1043 ###### parse context
1045 struct variable *in_scope;
1047 All variables with the same name are linked together using the
1048 'previous' link. Those variable that have
1049 been affirmatively merged all have a 'merged' pointer that points to
1050 one primary variable - the most recently declared instance. When
1051 merging variables, we need to also adjust the 'merged' pointer on any
1052 other variables that had previously been merged with the one that will
1053 no longer be primary.
1055 A variable that is no longer the most recent instance of a name may
1056 still have "pending" scope, if it might still be merged with most
1057 recent instance. These variables don't really belong in the
1058 "in_scope" list, but are not immediately removed when a new instance
1059 is found. Instead, they are detected and ignored when considering the
1060 list of in_scope names.
1062 ###### variable fields
1063 struct variable *merged;
1065 ###### ast functions
1067 static void variable_merge(struct variable *primary, struct variable *secondary)
1071 if (primary->merged)
1073 primary = primary->merged;
1075 for (v = primary->previous; v; v=v->previous)
1076 if (v == secondary || v == secondary->merged ||
1077 v->merged == secondary ||
1078 (v->merged && v->merged == secondary->merged)) {
1079 v->scope = OutScope;
1080 v->merged = primary;
1084 ###### free context vars
1086 while (context.varlist) {
1087 struct binding *b = context.varlist;
1088 struct variable *v = b->var;
1089 context.varlist = b->next;
1092 struct variable *t = v;
1096 if (t->min_depth == 0)
1097 // This is a global constant
1098 free_exec(t->where_decl);
1103 #### Manipulating Bindings
1105 When a name is conditionally visible, a new declaration discards the
1106 old binding - the condition lapses. Conversely a usage of the name
1107 affirms the visibility and extends it to the end of the containing
1108 block - i.e. the block that contains both the original declaration and
1109 the latest usage. This is determined from `min_depth`. When a
1110 conditionally visible variable gets affirmed like this, it is also
1111 merged with other conditionally visible variables with the same name.
1113 When we parse a variable declaration we either report an error if the
1114 name is currently bound, or create a new variable at the current nest
1115 depth if the name is unbound or bound to a conditionally scoped or
1116 pending-scope variable. If the previous variable was conditionally
1117 scoped, it and its homonyms becomes out-of-scope.
1119 When we parse a variable reference (including non-declarative
1120 assignment) we report an error if the name is not bound or is bound to
1121 a pending-scope variable; update the scope if the name is bound to a
1122 conditionally scoped variable; or just proceed normally if the named
1123 variable is in scope.
1125 When we exit a scope, any variables bound at this level are either
1126 marked out of scope or pending-scoped, depending on whether the scope
1127 was sequential or parallel. Here a "parallel" scope means the "then"
1128 or "else" part of a conditional, or any "case" or "else" branch of a
1129 switch. Other scopes are "sequential".
1131 When exiting a parallel scope we check if there are any variables that
1132 were previously pending and are still visible. If there are, then
1133 there weren't redeclared in the most recent scope, so they cannot be
1134 merged and must become out-of-scope. If it is not the first of
1135 parallel scopes (based on `child_count`), we check that there was a
1136 previous binding that is still pending-scope. If there isn't, the new
1137 variable must now be out-of-scope.
1139 When exiting a sequential scope that immediately enclosed parallel
1140 scopes, we need to resolve any pending-scope variables. If there was
1141 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1142 we need to mark all pending-scope variable as out-of-scope. Otherwise
1143 all pending-scope variables become conditionally scoped.
1146 enum closetype { CloseSequential, CloseParallel, CloseElse };
1148 ###### ast functions
1150 static struct variable *var_decl(struct parse_context *c, struct text s)
1152 struct binding *b = find_binding(c, s);
1153 struct variable *v = b->var;
1155 switch (v ? v->scope : OutScope) {
1157 /* Caller will report the error */
1161 v && v->scope == CondScope;
1163 v->scope = OutScope;
1167 v = calloc(1, sizeof(*v));
1168 v->previous = b->var;
1171 v->min_depth = v->depth = c->scope_depth;
1173 v->in_scope = c->in_scope;
1175 v->val = val_prepare(NULL);
1179 static struct variable *var_ref(struct parse_context *c, struct text s)
1181 struct binding *b = find_binding(c, s);
1182 struct variable *v = b->var;
1183 struct variable *v2;
1185 switch (v ? v->scope : OutScope) {
1188 /* Caller will report the error */
1191 /* All CondScope variables of this name need to be merged
1192 * and become InScope
1194 v->depth = v->min_depth;
1196 for (v2 = v->previous;
1197 v2 && v2->scope == CondScope;
1199 variable_merge(v, v2);
1207 static void var_block_close(struct parse_context *c, enum closetype ct)
1209 /* Close off all variables that are in_scope */
1210 struct variable *v, **vp, *v2;
1213 for (vp = &c->in_scope;
1214 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1216 if (v->name->var == v) switch (ct) {
1218 case CloseParallel: /* handle PendingScope */
1222 if (c->scope_stack->child_count == 1)
1223 v->scope = PendingScope;
1224 else if (v->previous &&
1225 v->previous->scope == PendingScope)
1226 v->scope = PendingScope;
1227 else if (v->val.type == Tlabel)
1228 v->scope = PendingScope;
1229 else if (v->name->var == v)
1230 v->scope = OutScope;
1231 if (ct == CloseElse) {
1232 /* All Pending variables with this name
1233 * are now Conditional */
1235 v2 && v2->scope == PendingScope;
1237 v2->scope = CondScope;
1242 v2 && v2->scope == PendingScope;
1244 if (v2->val.type != Tlabel)
1245 v2->scope = OutScope;
1247 case OutScope: break;
1250 case CloseSequential:
1251 if (v->val.type == Tlabel)
1252 v->scope = PendingScope;
1255 v->scope = OutScope;
1258 /* There was no 'else', so we can only become
1259 * conditional if we know the cases were exhaustive,
1260 * and that doesn't mean anything yet.
1261 * So only labels become conditional..
1264 v2 && v2->scope == PendingScope;
1266 if (v2->val.type == Tlabel) {
1267 v2->scope = CondScope;
1268 v2->min_depth = c->scope_depth;
1270 v2->scope = OutScope;
1273 case OutScope: break;
1277 if (v->scope == OutScope || v->name->var != v)
1286 Executables can be lots of different things. In many cases an
1287 executable is just an operation combined with one or two other
1288 executables. This allows for expressions and lists etc. Other times
1289 an executable is something quite specific like a constant or variable
1290 name. So we define a `struct exec` to be a general executable with a
1291 type, and a `struct binode` which is a subclass of `exec`, forms a
1292 node in a binary tree, and holds an operation. There will be other
1293 subclasses, and to access these we need to be able to `cast` the
1294 `exec` into the various other types.
1297 #define cast(structname, pointer) ({ \
1298 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1299 if (__mptr && *__mptr != X##structname) abort(); \
1300 (struct structname *)( (char *)__mptr);})
1302 #define new(structname) ({ \
1303 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1304 __ptr->type = X##structname; \
1305 __ptr->line = -1; __ptr->column = -1; \
1308 #define new_pos(structname, token) ({ \
1309 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1310 __ptr->type = X##structname; \
1311 __ptr->line = token.line; __ptr->column = token.col; \
1320 enum exec_types type;
1328 struct exec *left, *right;
1331 ###### ast functions
1333 static int __fput_loc(struct exec *loc, FILE *f)
1337 if (loc->line >= 0) {
1338 fprintf(f, "%d:%d: ", loc->line, loc->column);
1341 if (loc->type == Xbinode)
1342 return __fput_loc(cast(binode,loc)->left, f) ||
1343 __fput_loc(cast(binode,loc)->right, f);
1346 static void fput_loc(struct exec *loc, FILE *f)
1348 if (!__fput_loc(loc, f))
1349 fprintf(f, "??:??: "); // NOTEST
1352 Each different type of `exec` node needs a number of functions
1353 defined, a bit like methods. We must be able to be able to free it,
1354 print it, analyse it and execute it. Once we have specific `exec`
1355 types we will need to parse them too. Let's take this a bit more
1360 The parser generator requires a `free_foo` function for each struct
1361 that stores attributes and they will often be `exec`s and subtypes
1362 there-of. So we need `free_exec` which can handle all the subtypes,
1363 and we need `free_binode`.
1365 ###### ast functions
1367 static void free_binode(struct binode *b)
1372 free_exec(b->right);
1376 ###### core functions
1377 static void free_exec(struct exec *e)
1386 ###### forward decls
1388 static void free_exec(struct exec *e);
1390 ###### free exec cases
1391 case Xbinode: free_binode(cast(binode, e)); break;
1395 Printing an `exec` requires that we know the current indent level for
1396 printing line-oriented components. As will become clear later, we
1397 also want to know what sort of bracketing to use.
1399 ###### ast functions
1401 static void do_indent(int i, char *str)
1408 ###### core functions
1409 static void print_binode(struct binode *b, int indent, int bracket)
1413 ## print binode cases
1417 static void print_exec(struct exec *e, int indent, int bracket)
1423 print_binode(cast(binode, e), indent, bracket); break;
1428 ###### forward decls
1430 static void print_exec(struct exec *e, int indent, int bracket);
1434 As discussed, analysis involves propagating type requirements around
1435 the program and looking for errors.
1437 So `propagate_types` is passed an expected type (being a `struct type`
1438 pointer together with some `val_rules` flags) that the `exec` is
1439 expected to return, and returns the type that it does return, either
1440 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1441 by reference. It is set to `0` when an error is found, and `2` when
1442 any change is made. If it remains unchanged at `1`, then no more
1443 propagation is needed.
1447 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1451 if (rules & Rnolabel)
1452 fputs(" (labels not permitted)", stderr);
1455 ###### core functions
1457 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1458 struct type *type, int rules)
1465 switch (prog->type) {
1468 struct binode *b = cast(binode, prog);
1470 ## propagate binode cases
1474 ## propagate exec cases
1481 Interpreting an `exec` doesn't require anything but the `exec`. State
1482 is stored in variables and each variable will be directly linked from
1483 within the `exec` tree. The exception to this is the whole `program`
1484 which needs to look at command line arguments. The `program` will be
1485 interpreted separately.
1487 Each `exec` can return a value, which may be `Tnone` but must be
1488 non-NULL; Some `exec`s will return the location of a value, which can
1489 be updates. To support this, each exec case must store either a value
1490 in `val` or the pointer to a value in `lval`. If `lval` is set, but a
1491 simple value is required, `inter_exec()` will dereference `lval` to
1494 ###### core functions
1497 struct value val, *lval;
1500 static struct lrval _interp_exec(struct exec *e);
1502 static struct value interp_exec(struct exec *e)
1504 struct lrval ret = _interp_exec(e);
1507 return dup_value(*ret.lval);
1512 static struct value *linterp_exec(struct exec *e)
1514 struct lrval ret = _interp_exec(e);
1519 static struct lrval _interp_exec(struct exec *e)
1522 struct value rv, *lrv = NULL;
1533 struct binode *b = cast(binode, e);
1534 struct value left, right, *lleft;
1535 left.type = right.type = Tnone;
1537 ## interp binode cases
1539 free_value(left); free_value(right);
1542 ## interp exec cases
1551 Now that we have the shape of the interpreter in place we can add some
1552 complex types and connected them in to the data structures and the
1553 different phases of parse, analyse, print, interpret.
1555 Thus far we have arrays and structs.
1559 Arrays can be declared by giving a size and a type, as `[size]type' so
1560 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1561 size can be an arbitrary expression which is evaluated when the name
1564 Arrays cannot be assigned. When pointers are introduced we will also
1565 introduce array slices which can refer to part or all of an array -
1566 the assignment syntax will create a slice. For now, an array can only
1567 ever be referenced by the name it is declared with. It is likely that
1568 a "`copy`" primitive will eventually be define which can be used to
1569 make a copy of an array with controllable depth.
1571 ###### type union fields
1575 struct variable *vsize;
1576 struct type *member;
1579 ###### value union fields
1581 struct value *elmnts;
1584 ###### value functions
1586 static struct value array_prepare(struct type *type)
1591 ret.array.elmnts = NULL;
1595 static struct value array_init(struct type *type)
1601 if (type->array.vsize) {
1604 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1605 mpq_denref(type->array.vsize->val.num));
1606 type->array.size = mpz_get_si(q);
1609 ret.array.elmnts = calloc(type->array.size,
1610 sizeof(ret.array.elmnts[0]));
1611 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1612 ret.array.elmnts[i] = val_init(type->array.member);
1616 static void array_free(struct value val)
1620 if (val.array.elmnts)
1621 for (i = 0; i < val.type->array.size; i++)
1622 free_value(val.array.elmnts[i]);
1623 free(val.array.elmnts);
1626 static int array_compat(struct type *require, struct type *have)
1628 if (have->compat != require->compat)
1630 /* Both are arrays, so we can look at details */
1631 if (!type_compat(require->array.member, have->array.member, 0))
1633 if (require->array.vsize == NULL && have->array.vsize == NULL)
1634 return require->array.size == have->array.size;
1636 return require->array.vsize == have->array.vsize;
1639 static void array_print_type(struct type *type, FILE *f)
1642 if (type->array.vsize) {
1643 struct binding *b = type->array.vsize->name;
1644 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1646 fprintf(f, "%d]", type->array.size);
1647 type_print(type->array.member, f);
1650 static struct type array_prototype = {
1651 .prepare = array_prepare,
1653 .print_type = array_print_type,
1654 .compat = array_compat,
1660 | [ NUMBER ] Type ${
1661 $0 = calloc(1, sizeof(struct type));
1662 *($0) = array_prototype;
1663 $0->array.member = $<4;
1664 $0->array.vsize = NULL;
1666 struct parse_context *c = config2context(config);
1669 if (number_parse(num, tail, $2.txt) == 0)
1670 tok_err(c, "error: unrecognised number", &$2);
1672 tok_err(c, "error: unsupported number suffix", &$2);
1674 $0->array.size = mpz_get_ui(mpq_numref(num));
1675 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1676 tok_err(c, "error: array size must be an integer",
1678 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1679 tok_err(c, "error: array size is too large",
1683 $0->next= c->anon_typelist;
1684 c->anon_typelist = $0;
1688 | [ IDENTIFIER ] Type ${ {
1689 struct parse_context *c = config2context(config);
1690 struct variable *v = var_ref(c, $2.txt);
1693 tok_err(config2context(config), "error: name undeclared", &$2);
1694 else if (!v->constant)
1695 tok_err(config2context(config), "error: array size must be a constant", &$2);
1697 $0 = calloc(1, sizeof(struct type));
1698 *($0) = array_prototype;
1699 $0->array.member = $<4;
1701 $0->array.vsize = v;
1702 $0->next= c->anon_typelist;
1703 c->anon_typelist = $0;
1706 ###### parse context
1708 struct type *anon_typelist;
1710 ###### free context types
1712 while (context.anon_typelist) {
1713 struct type *t = context.anon_typelist;
1715 context.anon_typelist = t->next;
1722 ###### variable grammar
1724 | Variable [ Expression ] ${ {
1725 struct binode *b = new(binode);
1732 ###### print binode cases
1734 print_exec(b->left, -1, 0);
1736 print_exec(b->right, -1, 0);
1740 ###### propagate binode cases
1742 /* left must be an array, right must be a number,
1743 * result is the member type of the array
1745 propagate_types(b->right, c, ok, Tnum, 0);
1746 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1747 if (!t || t->compat != array_compat) {
1748 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1752 if (!type_compat(type, t->array.member, rules)) {
1753 type_err(c, "error: have %1 but need %2", prog,
1754 t->array.member, rules, type);
1757 return t->array.member;
1761 ###### interp binode cases
1766 lleft = linterp_exec(b->left);
1767 right = interp_exec(b->right);
1769 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1773 if (i >= 0 && i < lleft->type->array.size)
1774 lrv = &lleft->array.elmnts[i];
1776 rv = val_init(lleft->type->array.member);
1782 A `struct` is a data-type that contains one or more other data-types.
1783 It differs from an array in that each member can be of a different
1784 type, and they are accessed by name rather than by number. Thus you
1785 cannot choose an element by calculation, you need to know what you
1788 The language makes no promises about how a given structure will be
1789 stored in memory - it is free to rearrange fields to suit whatever
1790 criteria seems important.
1792 Structs are declared separately from program code - they cannot be
1793 declared in-line in a variable declaration like arrays can. A struct
1794 is given a name and this name is used to identify the type - the name
1795 is not prefixed by the word `struct` as it would be in C.
1797 Structs are only treated as the same if they have the same name.
1798 Simply having the same fields in the same order is not enough. This
1799 might change once we can create structure initializes from a list of
1802 Each component datum is identified much like a variable is declared,
1803 with a name, one or two colons, and a type. The type cannot be omitted
1804 as there is no opportunity to deduce the type from usage. An initial
1805 value can be given following an equals sign, so
1807 ##### Example: a struct type
1813 would declare a type called "complex" which has two number fields,
1814 each initialised to zero.
1816 Struct will need to be declared separately from the code that uses
1817 them, so we will need to be able to print out the declaration of a
1818 struct when reprinting the whole program. So a `print_type_decl` type
1819 function will be needed.
1821 ###### type union fields
1832 ###### value union fields
1834 struct value *fields;
1837 ###### type functions
1838 void (*print_type_decl)(struct type *type, FILE *f);
1840 ###### value functions
1842 static struct value structure_prepare(struct type *type)
1847 ret.structure.fields = NULL;
1851 static struct value structure_init(struct type *type)
1857 ret.structure.fields = calloc(type->structure.nfields,
1858 sizeof(ret.structure.fields[0]));
1859 for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
1860 ret.structure.fields[i] = val_init(type->structure.fields[i].type);
1864 static void structure_free(struct value val)
1868 if (val.structure.fields)
1869 for (i = 0; i < val.type->structure.nfields; i++)
1870 free_value(val.structure.fields[i]);
1871 free(val.structure.fields);
1874 static void structure_free_type(struct type *t)
1877 for (i = 0; i < t->structure.nfields; i++)
1878 free_value(t->structure.fields[i].init);
1879 free(t->structure.fields);
1882 static struct type structure_prototype = {
1883 .prepare = structure_prepare,
1884 .init = structure_init,
1885 .free = structure_free,
1886 .free_type = structure_free_type,
1887 .print_type_decl = structure_print_type,
1901 ###### free exec cases
1903 free_exec(cast(fieldref, e)->left);
1907 ###### variable grammar
1909 | Variable . IDENTIFIER ${ {
1910 struct fieldref *fr = new_pos(fieldref, $2);
1917 ###### print exec cases
1921 struct fieldref *f = cast(fieldref, e);
1922 print_exec(f->left, -1, 0);
1923 printf(".%.*s", f->name.len, f->name.txt);
1927 ###### ast functions
1928 static int find_struct_index(struct type *type, struct text field)
1931 for (i = 0; i < type->structure.nfields; i++)
1932 if (text_cmp(type->structure.fields[i].name, field) == 0)
1937 ###### propagate exec cases
1941 struct fieldref *f = cast(fieldref, prog);
1942 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1945 type_err(c, "error: unknown type for field access", f->left,
1947 else if (st->prepare != structure_prepare)
1948 type_err(c, "error: field reference attempted on %1, not a struct",
1949 f->left, st, 0, NULL);
1950 else if (f->index == -2) {
1951 f->index = find_struct_index(st, f->name);
1953 type_err(c, "error: cannot find requested field in %1",
1954 f->left, st, 0, NULL);
1958 if (f->index >= 0) {
1959 struct type *ft = st->structure.fields[f->index].type;
1960 if (!type_compat(type, ft, rules)) {
1961 type_err(c, "error: have %1 but need %2", prog,
1970 ###### interp exec cases
1973 struct fieldref *f = cast(fieldref, e);
1974 struct value *lleft = linterp_exec(f->left);
1975 lrv = &lleft->structure.fields[f->index];
1981 struct fieldlist *prev;
1985 ###### ast functions
1986 static void free_fieldlist(struct fieldlist *f)
1990 free_fieldlist(f->prev);
1991 free_value(f->f.init);
1995 ###### top level grammar
1996 DeclareStruct -> struct IDENTIFIER FieldBlock ${ {
1998 add_type(config2context(config), $2.txt, &structure_prototype);
2000 struct fieldlist *f;
2002 for (f = $3; f; f=f->prev)
2005 t->structure.nfields = cnt;
2006 t->structure.fields = calloc(cnt, sizeof(struct field));
2010 t->structure.fields[cnt] = f->f;
2011 f->f.init = val_prepare(Tnone);
2017 FieldBlock -> Open SimpleFieldList Close ${ $0 = $<2; }$
2018 | Open Newlines SimpleFieldList Close ${ $0 = $<3; }$
2019 | : FieldList ${ $0 = $<2; }$
2021 FieldList -> Field NEWLINE ${ $0 = $<1; }$
2022 | FieldList NEWLINE ${ $0 = $<1; }$
2023 | FieldList Field NEWLINE ${
2028 SimpleFieldList -> Field ; ${ $0 = $<1; }$
2029 | SimpleFieldList Field ; ${
2034 Field -> IDENTIFIER : Type = Expression ${ {
2037 $0 = calloc(1, sizeof(struct fieldlist));
2038 $0->f.name = $1.txt;
2040 $0->f.init = val_prepare($0->f.type);
2043 propagate_types($<5, config2context(config), &ok, $3, 0);
2046 config2context(config)->parse_error = 1;
2048 $0->f.init = interp_exec($5);
2050 | IDENTIFIER : Type ${
2051 $0 = calloc(1, sizeof(struct fieldlist));
2052 $0->f.name = $1.txt;
2054 $0->f.init = val_init($3);
2057 ###### forward decls
2058 static void structure_print_type(struct type *t, FILE *f);
2060 ###### value functions
2061 static void structure_print_type(struct type *t, FILE *f)
2065 fprintf(f, "struct %.*s:\n", t->name.len, t->name.txt);
2067 for (i = 0; i < t->structure.nfields; i++) {
2068 struct field *fl = t->structure.fields + i;
2069 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2070 type_print(fl->type, f);
2071 if (fl->init.type->print) {
2073 if (fl->init.type == Tstr)
2075 print_value(fl->init);
2076 if (fl->init.type == Tstr)
2083 ###### print type decls
2088 while (target != 0) {
2090 for (t = context.typelist; t ; t=t->next)
2091 if (t->print_type_decl) {
2100 t->print_type_decl(t, stdout);
2106 ## Executables: the elements of code
2108 Each code element needs to be parsed, printed, analysed,
2109 interpreted, and freed. There are several, so let's just start with
2110 the easy ones and work our way up.
2114 We have already met values as separate objects. When manifest
2115 constants appear in the program text, that must result in an executable
2116 which has a constant value. So the `val` structure embeds a value in
2132 $0 = new_pos(val, $1);
2133 $0->val.type = Tbool;
2137 $0 = new_pos(val, $1);
2138 $0->val.type = Tbool;
2142 $0 = new_pos(val, $1);
2143 $0->val.type = Tnum;
2146 if (number_parse($0->val.num, tail, $1.txt) == 0)
2147 mpq_init($0->val.num);
2149 tok_err(config2context(config), "error: unsupported number suffix",
2154 $0 = new_pos(val, $1);
2155 $0->val.type = Tstr;
2158 string_parse(&$1, '\\', &$0->val.str, tail);
2160 tok_err(config2context(config), "error: unsupported string suffix",
2165 $0 = new_pos(val, $1);
2166 $0->val.type = Tstr;
2169 string_parse(&$1, '\\', &$0->val.str, tail);
2171 tok_err(config2context(config), "error: unsupported string suffix",
2176 ###### print exec cases
2179 struct val *v = cast(val, e);
2180 if (v->val.type == Tstr)
2182 print_value(v->val);
2183 if (v->val.type == Tstr)
2188 ###### propagate exec cases
2191 struct val *val = cast(val, prog);
2192 if (!type_compat(type, val->val.type, rules)) {
2193 type_err(c, "error: expected %1%r found %2",
2194 prog, type, rules, val->val.type);
2197 return val->val.type;
2200 ###### interp exec cases
2202 rv = dup_value(cast(val, e)->val);
2205 ###### ast functions
2206 static void free_val(struct val *v)
2214 ###### free exec cases
2215 case Xval: free_val(cast(val, e)); break;
2217 ###### ast functions
2218 // Move all nodes from 'b' to 'rv', reversing the order.
2219 // In 'b' 'left' is a list, and 'right' is the last node.
2220 // In 'rv', left' is the first node and 'right' is a list.
2221 static struct binode *reorder_bilist(struct binode *b)
2223 struct binode *rv = NULL;
2226 struct exec *t = b->right;
2230 b = cast(binode, b->left);
2240 Just as we used a `val` to wrap a value into an `exec`, we similarly
2241 need a `var` to wrap a `variable` into an exec. While each `val`
2242 contained a copy of the value, each `var` hold a link to the variable
2243 because it really is the same variable no matter where it appears.
2244 When a variable is used, we need to remember to follow the `->merged`
2245 link to find the primary instance.
2253 struct variable *var;
2259 VariableDecl -> IDENTIFIER : ${ {
2260 struct variable *v = var_decl(config2context(config), $1.txt);
2261 $0 = new_pos(var, $1);
2266 v = var_ref(config2context(config), $1.txt);
2268 type_err(config2context(config), "error: variable '%v' redeclared",
2270 type_err(config2context(config), "info: this is where '%v' was first declared",
2271 v->where_decl, NULL, 0, NULL);
2274 | IDENTIFIER :: ${ {
2275 struct variable *v = var_decl(config2context(config), $1.txt);
2276 $0 = new_pos(var, $1);
2282 v = var_ref(config2context(config), $1.txt);
2284 type_err(config2context(config), "error: variable '%v' redeclared",
2286 type_err(config2context(config), "info: this is where '%v' was first declared",
2287 v->where_decl, NULL, 0, NULL);
2290 | IDENTIFIER : Type ${ {
2291 struct variable *v = var_decl(config2context(config), $1.txt);
2292 $0 = new_pos(var, $1);
2297 v->val = val_prepare($<3);
2299 v = var_ref(config2context(config), $1.txt);
2301 type_err(config2context(config), "error: variable '%v' redeclared",
2303 type_err(config2context(config), "info: this is where '%v' was first declared",
2304 v->where_decl, NULL, 0, NULL);
2307 | IDENTIFIER :: Type ${ {
2308 struct variable *v = var_decl(config2context(config), $1.txt);
2309 $0 = new_pos(var, $1);
2314 v->val = val_prepare($<3);
2317 v = var_ref(config2context(config), $1.txt);
2319 type_err(config2context(config), "error: variable '%v' redeclared",
2321 type_err(config2context(config), "info: this is where '%v' was first declared",
2322 v->where_decl, NULL, 0, NULL);
2327 Variable -> IDENTIFIER ${ {
2328 struct variable *v = var_ref(config2context(config), $1.txt);
2329 $0 = new_pos(var, $1);
2331 /* This might be a label - allocate a var just in case */
2332 v = var_decl(config2context(config), $1.txt);
2334 v->val = val_prepare(Tlabel);
2335 v->val.label = &v->val;
2339 cast(var, $0)->var = v;
2344 Type -> IDENTIFIER ${
2345 $0 = find_type(config2context(config), $1.txt);
2347 tok_err(config2context(config),
2348 "error: undefined type", &$1);
2355 ###### print exec cases
2358 struct var *v = cast(var, e);
2360 struct binding *b = v->var->name;
2361 printf("%.*s", b->name.len, b->name.txt);
2368 if (loc->type == Xvar) {
2369 struct var *v = cast(var, loc);
2371 struct binding *b = v->var->name;
2372 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2374 fputs("???", stderr); // NOTEST
2376 fputs("NOTVAR", stderr); // NOTEST
2379 ###### propagate exec cases
2383 struct var *var = cast(var, prog);
2384 struct variable *v = var->var;
2386 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2388 return Tnone; // NOTEST
2392 if (v->constant && (rules & Rnoconstant)) {
2393 type_err(c, "error: Cannot assign to a constant: %v",
2394 prog, NULL, 0, NULL);
2395 type_err(c, "info: name was defined as a constant here",
2396 v->where_decl, NULL, 0, NULL);
2400 if (v->val.type == NULL) {
2401 if (type && *ok != 0) {
2402 v->val = val_prepare(type);
2403 v->where_set = prog;
2408 if (!type_compat(type, v->val.type, rules)) {
2409 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2410 type, rules, v->val.type);
2411 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2412 v->val.type, rules, NULL);
2420 ###### interp exec cases
2423 struct var *var = cast(var, e);
2424 struct variable *v = var->var;
2432 ###### ast functions
2434 static void free_var(struct var *v)
2439 ###### free exec cases
2440 case Xvar: free_var(cast(var, e)); break;
2442 ### Expressions: Conditional
2444 Our first user of the `binode` will be conditional expressions, which
2445 is a bit odd as they actually have three components. That will be
2446 handled by having 2 binodes for each expression. The conditional
2447 expression is the lowest precedence operatior, so it gets to define
2448 what an "Expression" is. The next level up is "BoolExpr", which
2451 Conditional expressions are of the form "value `if` condition `else`
2452 other_value". They associate to the right, so everything to the right
2453 of `else` is part of an else value, while only the BoolExpr to the
2454 left of `if` is the if values. Between `if` and `else` there is no
2455 room for ambiguity, so a full conditional expression is allowed in there.
2463 Expression -> BoolExpr if Expression else Expression ${ {
2464 struct binode *b1 = new(binode);
2465 struct binode *b2 = new(binode);
2474 | BoolExpr ${ $0 = $<1; }$
2476 ###### print binode cases
2479 b2 = cast(binode, b->right);
2480 print_exec(b2->left, -1, 0);
2482 print_exec(b->left, -1, 0);
2484 print_exec(b2->right, -1, 0);
2487 ###### propagate binode cases
2490 /* cond must be Tbool, others must match */
2491 struct binode *b2 = cast(binode, b->right);
2494 propagate_types(b->left, c, ok, Tbool, 0);
2495 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2496 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2500 ###### interp binode cases
2503 struct binode *b2 = cast(binode, b->right);
2504 left = interp_exec(b->left);
2506 rv = interp_exec(b2->left);
2508 rv = interp_exec(b2->right);
2512 ### Expressions: Boolean
2514 The next class of expressions to use the `binode` will be Boolean
2515 expressions. As I haven't implemented precedence in the parser
2516 generator yet, we need different names for each precedence level used
2517 by expressions. The outer most or lowest level precedence after
2518 conditional expressions are Boolean operators which form an `BoolExpr`
2519 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2520 have `and then` and `or else` which only evaluate the second operand
2521 if the result would make a difference.
2533 BoolExpr -> BoolExpr or BTerm ${ {
2534 struct binode *b = new(binode);
2540 | BoolExpr or else BTerm ${ {
2541 struct binode *b = new(binode);
2547 | BTerm ${ $0 = $<1; }$
2549 BTerm -> BTerm and BFact ${ {
2550 struct binode *b = new(binode);
2556 | BTerm and then BFact ${ {
2557 struct binode *b = new(binode);
2563 | BFact ${ $0 = $<1; }$
2565 BFact -> not BFact ${ {
2566 struct binode *b = new(binode);
2573 ###### print binode cases
2575 print_exec(b->left, -1, 0);
2577 print_exec(b->right, -1, 0);
2580 print_exec(b->left, -1, 0);
2581 printf(" and then ");
2582 print_exec(b->right, -1, 0);
2585 print_exec(b->left, -1, 0);
2587 print_exec(b->right, -1, 0);
2590 print_exec(b->left, -1, 0);
2591 printf(" or else ");
2592 print_exec(b->right, -1, 0);
2596 print_exec(b->right, -1, 0);
2599 ###### propagate binode cases
2605 /* both must be Tbool, result is Tbool */
2606 propagate_types(b->left, c, ok, Tbool, 0);
2607 propagate_types(b->right, c, ok, Tbool, 0);
2608 if (type && type != Tbool) {
2609 type_err(c, "error: %1 operation found where %2 expected", prog,
2615 ###### interp binode cases
2617 rv = interp_exec(b->left);
2618 right = interp_exec(b->right);
2619 rv.bool = rv.bool && right.bool;
2622 rv = interp_exec(b->left);
2624 rv = interp_exec(b->right);
2627 rv = interp_exec(b->left);
2628 right = interp_exec(b->right);
2629 rv.bool = rv.bool || right.bool;
2632 rv = interp_exec(b->left);
2634 rv = interp_exec(b->right);
2637 rv = interp_exec(b->right);
2641 ### Expressions: Comparison
2643 Of slightly higher precedence that Boolean expressions are
2645 A comparison takes arguments of any comparable type, but the two types must be
2648 To simplify the parsing we introduce an `eop` which can record an
2649 expression operator.
2656 ###### ast functions
2657 static void free_eop(struct eop *e)
2672 | Expr CMPop Expr ${ {
2673 struct binode *b = new(binode);
2679 | Expr ${ $0 = $<1; }$
2684 CMPop -> < ${ $0.op = Less; }$
2685 | > ${ $0.op = Gtr; }$
2686 | <= ${ $0.op = LessEq; }$
2687 | >= ${ $0.op = GtrEq; }$
2688 | == ${ $0.op = Eql; }$
2689 | != ${ $0.op = NEql; }$
2691 ###### print binode cases
2699 print_exec(b->left, -1, 0);
2701 case Less: printf(" < "); break;
2702 case LessEq: printf(" <= "); break;
2703 case Gtr: printf(" > "); break;
2704 case GtrEq: printf(" >= "); break;
2705 case Eql: printf(" == "); break;
2706 case NEql: printf(" != "); break;
2707 default: abort(); // NOTEST
2709 print_exec(b->right, -1, 0);
2712 ###### propagate binode cases
2719 /* Both must match but not be labels, result is Tbool */
2720 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2722 propagate_types(b->right, c, ok, t, 0);
2724 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2726 t = propagate_types(b->left, c, ok, t, 0);
2728 if (!type_compat(type, Tbool, 0)) {
2729 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2730 Tbool, rules, type);
2735 ###### interp binode cases
2744 left = interp_exec(b->left);
2745 right = interp_exec(b->right);
2746 cmp = value_cmp(left, right);
2749 case Less: rv.bool = cmp < 0; break;
2750 case LessEq: rv.bool = cmp <= 0; break;
2751 case Gtr: rv.bool = cmp > 0; break;
2752 case GtrEq: rv.bool = cmp >= 0; break;
2753 case Eql: rv.bool = cmp == 0; break;
2754 case NEql: rv.bool = cmp != 0; break;
2755 default: rv.bool = 0; break; // NOTEST
2760 ### Expressions: The rest
2762 The remaining expressions with the highest precedence are arithmetic
2763 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2764 The `Factor` is where the `Value` and `Variable` that we already have
2767 `+` and `-` are both infix and prefix operations (where they are
2768 absolute value and negation). These have different operator names.
2770 We also have a 'Bracket' operator which records where parentheses were
2771 found. This makes it easy to reproduce these when printing. Once
2772 precedence is handled better I might be able to discard this.
2784 Expr -> Expr Eop Term ${ {
2785 struct binode *b = new(binode);
2791 | Term ${ $0 = $<1; }$
2793 Term -> Term Top Factor ${ {
2794 struct binode *b = new(binode);
2800 | Factor ${ $0 = $<1; }$
2802 Factor -> ( Expression ) ${ {
2803 struct binode *b = new_pos(binode, $1);
2809 struct binode *b = new(binode);
2814 | Value ${ $0 = $<1; }$
2815 | Variable ${ $0 = $<1; }$
2818 Eop -> + ${ $0.op = Plus; }$
2819 | - ${ $0.op = Minus; }$
2821 Uop -> + ${ $0.op = Absolute; }$
2822 | - ${ $0.op = Negate; }$
2824 Top -> * ${ $0.op = Times; }$
2825 | / ${ $0.op = Divide; }$
2826 | % ${ $0.op = Rem; }$
2827 | ++ ${ $0.op = Concat; }$
2829 ###### print binode cases
2836 print_exec(b->left, indent, 0);
2838 case Plus: fputs(" + ", stdout); break;
2839 case Minus: fputs(" - ", stdout); break;
2840 case Times: fputs(" * ", stdout); break;
2841 case Divide: fputs(" / ", stdout); break;
2842 case Rem: fputs(" % ", stdout); break;
2843 case Concat: fputs(" ++ ", stdout); break;
2844 default: abort(); // NOTEST
2846 print_exec(b->right, indent, 0);
2850 print_exec(b->right, indent, 0);
2854 print_exec(b->right, indent, 0);
2858 print_exec(b->right, indent, 0);
2862 ###### propagate binode cases
2868 /* both must be numbers, result is Tnum */
2871 /* as propagate_types ignores a NULL,
2872 * unary ops fit here too */
2873 propagate_types(b->left, c, ok, Tnum, 0);
2874 propagate_types(b->right, c, ok, Tnum, 0);
2875 if (!type_compat(type, Tnum, 0)) {
2876 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2883 /* both must be Tstr, result is Tstr */
2884 propagate_types(b->left, c, ok, Tstr, 0);
2885 propagate_types(b->right, c, ok, Tstr, 0);
2886 if (!type_compat(type, Tstr, 0)) {
2887 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2894 return propagate_types(b->right, c, ok, type, 0);
2896 ###### interp binode cases
2899 rv = interp_exec(b->left);
2900 right = interp_exec(b->right);
2901 mpq_add(rv.num, rv.num, right.num);
2904 rv = interp_exec(b->left);
2905 right = interp_exec(b->right);
2906 mpq_sub(rv.num, rv.num, right.num);
2909 rv = interp_exec(b->left);
2910 right = interp_exec(b->right);
2911 mpq_mul(rv.num, rv.num, right.num);
2914 rv = interp_exec(b->left);
2915 right = interp_exec(b->right);
2916 mpq_div(rv.num, rv.num, right.num);
2921 left = interp_exec(b->left);
2922 right = interp_exec(b->right);
2923 mpz_init(l); mpz_init(r); mpz_init(rem);
2924 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2925 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2926 mpz_tdiv_r(rem, l, r);
2927 rv = val_init(Tnum);
2928 mpq_set_z(rv.num, rem);
2929 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2933 rv = interp_exec(b->right);
2934 mpq_neg(rv.num, rv.num);
2937 rv = interp_exec(b->right);
2938 mpq_abs(rv.num, rv.num);
2941 rv = interp_exec(b->right);
2944 left = interp_exec(b->left);
2945 right = interp_exec(b->right);
2947 rv.str = text_join(left.str, right.str);
2950 ###### value functions
2952 static struct text text_join(struct text a, struct text b)
2955 rv.len = a.len + b.len;
2956 rv.txt = malloc(rv.len);
2957 memcpy(rv.txt, a.txt, a.len);
2958 memcpy(rv.txt+a.len, b.txt, b.len);
2962 ### Blocks, Statements, and Statement lists.
2964 Now that we have expressions out of the way we need to turn to
2965 statements. There are simple statements and more complex statements.
2966 Simple statements do not contain (syntactic) newlines, complex statements do.
2968 Statements often come in sequences and we have corresponding simple
2969 statement lists and complex statement lists.
2970 The former comprise only simple statements separated by semicolons.
2971 The later comprise complex statements and simple statement lists. They are
2972 separated by newlines. Thus the semicolon is only used to separate
2973 simple statements on the one line. This may be overly restrictive,
2974 but I'm not sure I ever want a complex statement to share a line with
2977 Note that a simple statement list can still use multiple lines if
2978 subsequent lines are indented, so
2980 ###### Example: wrapped simple statement list
2985 is a single simple statement list. This might allow room for
2986 confusion, so I'm not set on it yet.
2988 A simple statement list needs no extra syntax. A complex statement
2989 list has two syntactic forms. It can be enclosed in braces (much like
2990 C blocks), or it can be introduced by a colon and continue until an
2991 unindented newline (much like Python blocks). With this extra syntax
2992 it is referred to as a block.
2994 Note that a block does not have to include any newlines if it only
2995 contains simple statements. So both of:
2997 if condition: a=b; d=f
2999 if condition { a=b; print f }
3003 In either case the list is constructed from a `binode` list with
3004 `Block` as the operator. When parsing the list it is most convenient
3005 to append to the end, so a list is a list and a statement. When using
3006 the list it is more convenient to consider a list to be a statement
3007 and a list. So we need a function to re-order a list.
3008 `reorder_bilist` serves this purpose.
3010 The only stand-alone statement we introduce at this stage is `pass`
3011 which does nothing and is represented as a `NULL` pointer in a `Block`
3012 list. Other stand-alone statements will follow once the infrastructure
3032 Block -> Open Statementlist Close ${ $0 = $<2; }$
3033 | Open Newlines Statementlist Close ${ $0 = $<3; }$
3034 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
3035 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
3036 | : Statementlist ${ $0 = $<2; }$
3037 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
3039 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
3041 ComplexStatements -> ComplexStatements ComplexStatement ${
3047 | ComplexStatements NEWLINE ${ $0 = $<1; }$
3048 | ComplexStatement ${
3056 ComplexStatement -> SimpleStatements NEWLINE ${
3057 $0 = reorder_bilist($<1);
3059 ## ComplexStatement Grammar
3062 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3068 | SimpleStatement ${
3074 | SimpleStatements ; ${ $0 = $<1; }$
3076 SimpleStatement -> pass ${ $0 = NULL; }$
3077 ## SimpleStatement Grammar
3079 ###### print binode cases
3083 if (b->left == NULL)
3086 print_exec(b->left, indent, 0);
3089 print_exec(b->right, indent, 0);
3092 // block, one per line
3093 if (b->left == NULL)
3094 do_indent(indent, "pass\n");
3096 print_exec(b->left, indent, bracket);
3098 print_exec(b->right, indent, bracket);
3102 ###### propagate binode cases
3105 /* If any statement returns something other than Tnone
3106 * or Tbool then all such must return same type.
3107 * As each statement may be Tnone or something else,
3108 * we must always pass NULL (unknown) down, otherwise an incorrect
3109 * error might occur. We never return Tnone unless it is
3114 for (e = b; e; e = cast(binode, e->right)) {
3115 t = propagate_types(e->left, c, ok, NULL, rules);
3116 if ((rules & Rboolok) && t == Tbool)
3118 if (t && t != Tnone && t != Tbool) {
3121 else if (t != type) {
3122 type_err(c, "error: expected %1%r, found %2",
3123 e->left, type, rules, t);
3131 ###### interp binode cases
3133 while (rv.type == Tnone &&
3136 rv = interp_exec(b->left);
3137 b = cast(binode, b->right);
3141 ### The Print statement
3143 `print` is a simple statement that takes a comma-separated list of
3144 expressions and prints the values separated by spaces and terminated
3145 by a newline. No control of formatting is possible.
3147 `print` faces the same list-ordering issue as blocks, and uses the
3153 ###### SimpleStatement Grammar
3155 | print ExpressionList ${
3156 $0 = reorder_bilist($<2);
3158 | print ExpressionList , ${
3163 $0 = reorder_bilist($0);
3174 ExpressionList -> ExpressionList , Expression ${
3187 ###### print binode cases
3190 do_indent(indent, "print");
3194 print_exec(b->left, -1, 0);
3198 b = cast(binode, b->right);
3204 ###### propagate binode cases
3207 /* don't care but all must be consistent */
3208 propagate_types(b->left, c, ok, NULL, Rnolabel);
3209 propagate_types(b->right, c, ok, NULL, Rnolabel);
3212 ###### interp binode cases
3218 for ( ; b; b = cast(binode, b->right))
3222 left = interp_exec(b->left);
3235 ###### Assignment statement
3237 An assignment will assign a value to a variable, providing it hasn't
3238 be declared as a constant. The analysis phase ensures that the type
3239 will be correct so the interpreter just needs to perform the
3240 calculation. There is a form of assignment which declares a new
3241 variable as well as assigning a value. If a name is assigned before
3242 it is declared, and error will be raised as the name is created as
3243 `Tlabel` and it is illegal to assign to such names.
3249 ###### SimpleStatement Grammar
3250 | Variable = Expression ${
3256 | VariableDecl = Expression ${
3264 if ($1->var->where_set == NULL) {
3265 type_err(config2context(config),
3266 "Variable declared with no type or value: %v",
3276 ###### print binode cases
3279 do_indent(indent, "");
3280 print_exec(b->left, indent, 0);
3282 print_exec(b->right, indent, 0);
3289 struct variable *v = cast(var, b->left)->var;
3290 do_indent(indent, "");
3291 print_exec(b->left, indent, 0);
3292 if (cast(var, b->left)->var->constant) {
3293 if (v->where_decl == v->where_set) {
3295 type_print(v->val.type, stdout);
3300 if (v->where_decl == v->where_set) {
3302 type_print(v->val.type, stdout);
3309 print_exec(b->right, indent, 0);
3316 ###### propagate binode cases
3320 /* Both must match and not be labels,
3321 * Type must support 'dup',
3322 * For Assign, left must not be constant.
3325 t = propagate_types(b->left, c, ok, NULL,
3326 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3331 if (propagate_types(b->right, c, ok, t, 0) != t)
3332 if (b->left->type == Xvar)
3333 type_err(c, "info: variable '%v' was set as %1 here.",
3334 cast(var, b->left)->var->where_set, t, rules, NULL);
3336 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3338 propagate_types(b->left, c, ok, t,
3339 (b->op == Assign ? Rnoconstant : 0));
3341 if (t && t->dup == NULL) {
3342 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3349 ###### interp binode cases
3352 lleft = linterp_exec(b->left);
3353 right = interp_exec(b->right);
3358 free_value(right); // NOTEST
3364 struct variable *v = cast(var, b->left)->var;
3368 right = interp_exec(b->right);
3370 right = val_init(v->val.type);
3377 ### The `use` statement
3379 The `use` statement is the last "simple" statement. It is needed when
3380 the condition in a conditional statement is a block. `use` works much
3381 like `return` in C, but only completes the `condition`, not the whole
3387 ###### SimpleStatement Grammar
3389 $0 = new_pos(binode, $1);
3394 ###### print binode cases
3397 do_indent(indent, "use ");
3398 print_exec(b->right, -1, 0);
3403 ###### propagate binode cases
3406 /* result matches value */
3407 return propagate_types(b->right, c, ok, type, 0);
3409 ###### interp binode cases
3412 rv = interp_exec(b->right);
3415 ### The Conditional Statement
3417 This is the biggy and currently the only complex statement. This
3418 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3419 It is comprised of a number of parts, all of which are optional though
3420 set combinations apply. Each part is (usually) a key word (`then` is
3421 sometimes optional) followed by either an expression or a code block,
3422 except the `casepart` which is a "key word and an expression" followed
3423 by a code block. The code-block option is valid for all parts and,
3424 where an expression is also allowed, the code block can use the `use`
3425 statement to report a value. If the code block does not report a value
3426 the effect is similar to reporting `True`.
3428 The `else` and `case` parts, as well as `then` when combined with
3429 `if`, can contain a `use` statement which will apply to some
3430 containing conditional statement. `for` parts, `do` parts and `then`
3431 parts used with `for` can never contain a `use`, except in some
3432 subordinate conditional statement.
3434 If there is a `forpart`, it is executed first, only once.
3435 If there is a `dopart`, then it is executed repeatedly providing
3436 always that the `condpart` or `cond`, if present, does not return a non-True
3437 value. `condpart` can fail to return any value if it simply executes
3438 to completion. This is treated the same as returning `True`.
3440 If there is a `thenpart` it will be executed whenever the `condpart`
3441 or `cond` returns True (or does not return any value), but this will happen
3442 *after* `dopart` (when present).
3444 If `elsepart` is present it will be executed at most once when the
3445 condition returns `False` or some value that isn't `True` and isn't
3446 matched by any `casepart`. If there are any `casepart`s, they will be
3447 executed when the condition returns a matching value.
3449 The particular sorts of values allowed in case parts has not yet been
3450 determined in the language design, so nothing is prohibited.
3452 The various blocks in this complex statement potentially provide scope
3453 for variables as described earlier. Each such block must include the
3454 "OpenScope" nonterminal before parsing the block, and must call
3455 `var_block_close()` when closing the block.
3457 The code following "`if`", "`switch`" and "`for`" does not get its own
3458 scope, but is in a scope covering the whole statement, so names
3459 declared there cannot be redeclared elsewhere. Similarly the
3460 condition following "`while`" is in a scope the covers the body
3461 ("`do`" part) of the loop, and which does not allow conditional scope
3462 extension. Code following "`then`" (both looping and non-looping),
3463 "`else`" and "`case`" each get their own local scope.
3465 The type requirements on the code block in a `whilepart` are quite
3466 unusal. It is allowed to return a value of some identifiable type, in
3467 which case the loop aborts and an appropriate `casepart` is run, or it
3468 can return a Boolean, in which case the loop either continues to the
3469 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3470 This is different both from the `ifpart` code block which is expected to
3471 return a Boolean, or the `switchpart` code block which is expected to
3472 return the same type as the casepart values. The correct analysis of
3473 the type of the `whilepart` code block is the reason for the
3474 `Rboolok` flag which is passed to `propagate_types()`.
3476 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3485 struct exec *action;
3486 struct casepart *next;
3488 struct cond_statement {
3490 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3491 struct casepart *casepart;
3494 ###### ast functions
3496 static void free_casepart(struct casepart *cp)
3500 free_exec(cp->value);
3501 free_exec(cp->action);
3508 static void free_cond_statement(struct cond_statement *s)
3512 free_exec(s->forpart);
3513 free_exec(s->condpart);
3514 free_exec(s->dopart);
3515 free_exec(s->thenpart);
3516 free_exec(s->elsepart);
3517 free_casepart(s->casepart);
3521 ###### free exec cases
3522 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3524 ###### ComplexStatement Grammar
3525 | CondStatement ${ $0 = $<1; }$
3530 // both ForThen and Whilepart open scopes, and CondSuffix only
3531 // closes one - so in the first branch here we have another to close.
3532 CondStatement -> ForThen WhilePart CondSuffix ${
3534 $0->forpart = $1.forpart; $1.forpart = NULL;
3535 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3536 $0->condpart = $2.condpart; $2.condpart = NULL;
3537 $0->dopart = $2.dopart; $2.dopart = NULL;
3538 var_block_close(config2context(config), CloseSequential);
3540 | WhilePart CondSuffix ${
3542 $0->condpart = $1.condpart; $1.condpart = NULL;
3543 $0->dopart = $1.dopart; $1.dopart = NULL;
3545 | SwitchPart CondSuffix ${
3549 | IfPart IfSuffix ${
3551 $0->condpart = $1.condpart; $1.condpart = NULL;
3552 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3553 // This is where we close an "if" statement
3554 var_block_close(config2context(config), CloseSequential);
3557 CondSuffix -> IfSuffix ${
3559 // This is where we close scope of the whole
3560 // "for" or "while" statement
3561 var_block_close(config2context(config), CloseSequential);
3563 | CasePart CondSuffix ${
3565 $1->next = $0->casepart;
3570 CasePart -> Newlines case Expression OpenScope Block ${
3571 $0 = calloc(1,sizeof(struct casepart));
3574 var_block_close(config2context(config), CloseParallel);
3576 | case Expression OpenScope Block ${
3577 $0 = calloc(1,sizeof(struct casepart));
3580 var_block_close(config2context(config), CloseParallel);
3584 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3585 | Newlines else OpenScope Block ${
3586 $0 = new(cond_statement);
3588 var_block_close(config2context(config), CloseElse);
3590 | else OpenScope Block ${
3591 $0 = new(cond_statement);
3593 var_block_close(config2context(config), CloseElse);
3595 | Newlines else OpenScope CondStatement ${
3596 $0 = new(cond_statement);
3598 var_block_close(config2context(config), CloseElse);
3600 | else OpenScope CondStatement ${
3601 $0 = new(cond_statement);
3603 var_block_close(config2context(config), CloseElse);
3607 // These scopes are closed in CondSuffix
3608 ForPart -> for OpenScope SimpleStatements ${
3609 $0 = reorder_bilist($<3);
3611 | for OpenScope Block ${
3615 ThenPart -> then OpenScope SimpleStatements ${
3616 $0 = reorder_bilist($<3);
3617 var_block_close(config2context(config), CloseSequential);
3619 | then OpenScope Block ${
3621 var_block_close(config2context(config), CloseSequential);
3624 ThenPartNL -> ThenPart OptNL ${
3628 // This scope is closed in CondSuffix
3629 WhileHead -> while OpenScope Block ${
3634 ForThen -> ForPart OptNL ThenPartNL ${
3642 // This scope is closed in CondSuffix
3643 WhilePart -> while OpenScope Expression Block ${
3644 $0.type = Xcond_statement;
3648 | WhileHead OptNL do Block ${
3649 $0.type = Xcond_statement;
3654 IfPart -> if OpenScope Expression OpenScope Block ${
3655 $0.type = Xcond_statement;
3658 var_block_close(config2context(config), CloseParallel);
3660 | if OpenScope Block OptNL then OpenScope Block ${
3661 $0.type = Xcond_statement;
3664 var_block_close(config2context(config), CloseParallel);
3668 // This scope is closed in CondSuffix
3669 SwitchPart -> switch OpenScope Expression ${
3672 | switch OpenScope Block ${
3676 ###### print exec cases
3678 case Xcond_statement:
3680 struct cond_statement *cs = cast(cond_statement, e);
3681 struct casepart *cp;
3683 do_indent(indent, "for");
3684 if (bracket) printf(" {\n"); else printf(":\n");
3685 print_exec(cs->forpart, indent+1, bracket);
3688 do_indent(indent, "} then {\n");
3690 do_indent(indent, "then:\n");
3691 print_exec(cs->thenpart, indent+1, bracket);
3693 if (bracket) do_indent(indent, "}\n");
3697 if (cs->condpart && cs->condpart->type == Xbinode &&
3698 cast(binode, cs->condpart)->op == Block) {
3700 do_indent(indent, "while {\n");
3702 do_indent(indent, "while:\n");
3703 print_exec(cs->condpart, indent+1, bracket);
3705 do_indent(indent, "} do {\n");
3707 do_indent(indent, "do:\n");
3708 print_exec(cs->dopart, indent+1, bracket);
3710 do_indent(indent, "}\n");
3712 do_indent(indent, "while ");
3713 print_exec(cs->condpart, 0, bracket);
3718 print_exec(cs->dopart, indent+1, bracket);
3720 do_indent(indent, "}\n");
3725 do_indent(indent, "switch");
3727 do_indent(indent, "if");
3728 if (cs->condpart && cs->condpart->type == Xbinode &&
3729 cast(binode, cs->condpart)->op == Block) {
3734 print_exec(cs->condpart, indent+1, bracket);
3736 do_indent(indent, "}\n");
3738 do_indent(indent, "then:\n");
3739 print_exec(cs->thenpart, indent+1, bracket);
3743 print_exec(cs->condpart, 0, bracket);
3749 print_exec(cs->thenpart, indent+1, bracket);
3751 do_indent(indent, "}\n");
3756 for (cp = cs->casepart; cp; cp = cp->next) {
3757 do_indent(indent, "case ");
3758 print_exec(cp->value, -1, 0);
3763 print_exec(cp->action, indent+1, bracket);
3765 do_indent(indent, "}\n");
3768 do_indent(indent, "else");
3773 print_exec(cs->elsepart, indent+1, bracket);
3775 do_indent(indent, "}\n");
3780 ###### propagate exec cases
3781 case Xcond_statement:
3783 // forpart and dopart must return Tnone
3784 // thenpart must return Tnone if there is a dopart,
3785 // otherwise it is like elsepart.
3787 // be bool if there is no casepart
3788 // match casepart->values if there is a switchpart
3789 // either be bool or match casepart->value if there
3791 // elsepart and casepart->action must match the return type
3792 // expected of this statement.
3793 struct cond_statement *cs = cast(cond_statement, prog);
3794 struct casepart *cp;
3796 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3797 if (!type_compat(Tnone, t, 0))
3799 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3800 if (!type_compat(Tnone, t, 0))
3803 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3804 if (!type_compat(Tnone, t, 0))
3807 if (cs->casepart == NULL)
3808 propagate_types(cs->condpart, c, ok, Tbool, 0);
3810 /* Condpart must match case values, with bool permitted */
3812 for (cp = cs->casepart;
3813 cp && !t; cp = cp->next)
3814 t = propagate_types(cp->value, c, ok, NULL, 0);
3815 if (!t && cs->condpart)
3816 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3817 // Now we have a type (I hope) push it down
3819 for (cp = cs->casepart; cp; cp = cp->next)
3820 propagate_types(cp->value, c, ok, t, 0);
3821 propagate_types(cs->condpart, c, ok, t, Rboolok);
3824 // (if)then, else, and case parts must return expected type.
3825 if (!cs->dopart && !type)
3826 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3828 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3829 for (cp = cs->casepart;
3832 type = propagate_types(cp->action, c, ok, NULL, rules);
3835 propagate_types(cs->thenpart, c, ok, type, rules);
3836 propagate_types(cs->elsepart, c, ok, type, rules);
3837 for (cp = cs->casepart; cp ; cp = cp->next)
3838 propagate_types(cp->action, c, ok, type, rules);
3844 ###### interp exec cases
3845 case Xcond_statement:
3847 struct value v, cnd;
3848 struct casepart *cp;
3849 struct cond_statement *c = cast(cond_statement, e);
3852 interp_exec(c->forpart);
3855 cnd = interp_exec(c->condpart);
3858 if (!(cnd.type == Tnone ||
3859 (cnd.type == Tbool && cnd.bool != 0)))
3861 // cnd is Tnone or Tbool, doesn't need to be freed
3863 interp_exec(c->dopart);
3866 rv = interp_exec(c->thenpart);
3867 if (rv.type != Tnone || !c->dopart)
3871 } while (c->dopart);
3873 for (cp = c->casepart; cp; cp = cp->next) {
3874 v = interp_exec(cp->value);
3875 if (value_cmp(v, cnd) == 0) {
3878 rv = interp_exec(cp->action);
3885 rv = interp_exec(c->elsepart);
3892 ### Top level structure
3894 All the language elements so far can be used in various places. Now
3895 it is time to clarify what those places are.
3897 At the top level of a file there will be a number of declarations.
3898 Many of the things that can be declared haven't been described yet,
3899 such as functions, procedures, imports, and probably more.
3900 For now there are two sorts of things that can appear at the top
3901 level. They are predefined constants, `struct` types, and the main
3902 program. While the syntax will allow the main program to appear
3903 multiple times, that will trigger an error if it is actually attempted.
3905 The various declarations do not return anything. They store the
3906 various declarations in the parse context.
3908 ###### Parser: grammar
3911 Ocean -> DeclarationList
3913 DeclarationList -> Declaration
3914 | DeclarationList Declaration
3916 Declaration -> DeclareConstant
3921 ## top level grammar
3923 ### The `const` section
3925 As well as being defined in with the code that uses them, constants
3926 can be declared at the top level. These have full-file scope, so they
3927 are always `InScope`. The value of a top level constant can be given
3928 as an expression, and this is evaluated immediately rather than in the
3929 later interpretation stage. Once we add functions to the language, we
3930 will need rules concern which, if any, can be used to define a top
3933 Constants are defined in a section that starts with the reserved word
3934 `const` and then has a block with a list of assignment statements.
3935 For syntactic consistency, these must use the double-colon syntax to
3936 make it clear that they are constants. Type can also be given: if
3937 not, the type will be determined during analysis, as with other
3940 As the types constants are inserted at the head of a list, printing
3941 them in the same order that they were read is not straight forward.
3942 We take a quadratic approach here and count the number of constants
3943 (variables of depth 0), then count down from there, each time
3944 searching through for the Nth constant for decreasing N.
3946 ###### top level grammar
3948 DeclareConstant -> const Open ConstList Close
3949 | const Open Newlines ConstList Close
3950 | const Open SimpleConstList }
3951 | const Open Newlines SimpleConstList }
3953 | const SimpleConstList
3955 ConstList -> ComplexConsts
3956 ComplexConsts -> ComplexConst ComplexConsts
3958 ComplexConst -> SimpleConstList NEWLINE
3959 SimpleConstList -> Const ; SimpleConstList
3961 | Const ; SimpleConstList ;
3964 CType -> Type ${ $0 = $<1; }$
3967 Const -> IDENTIFIER :: CType = Expression ${ {
3971 v = var_decl(config2context(config), $1.txt);
3973 struct var *var = new_pos(var, $1);
3974 v->where_decl = var;
3979 v = var_ref(config2context(config), $1.txt);
3980 tok_err(config2context(config), "error: name already declared", &$1);
3981 type_err(config2context(config), "info: this is where '%v' was first declared",
3982 v->where_decl, NULL, 0, NULL);
3986 propagate_types($5, config2context(config), &ok, $3, 0);
3989 config2context(config)->parse_error = 1;
3991 v->val = interp_exec($5);
3995 ###### print const decls
4000 while (target != 0) {
4002 for (v = context.in_scope; v; v=v->in_scope)
4003 if (v->depth == 0) {
4014 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4015 type_print(v->val.type, stdout);
4017 if (v->val.type == Tstr)
4019 print_value(v->val);
4020 if (v->val.type == Tstr)
4028 ### Finally the whole program.
4030 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4031 the keyword "program" and a list of variable names which are assigned
4032 values from command line arguments. Following this is a `block` which
4033 is the code to execute. Unlike Pascal, constants and other
4034 declarations come *before* the program.
4036 As this is the top level, several things are handled a bit
4038 The whole program is not interpreted by `interp_exec` as that isn't
4039 passed the argument list which the program requires. Similarly type
4040 analysis is a bit more interesting at this level.
4045 ###### top level grammar
4047 DeclareProgram -> Program ${ {
4048 struct parse_context *c = config2context(config);
4050 type_err(c, "Program defined a second time",
4057 Program -> program OpenScope Varlist Block OptNL ${
4060 $0->left = reorder_bilist($<3);
4062 var_block_close(config2context(config), CloseSequential);
4063 if (config2context(config)->scope_stack) abort();
4066 tok_err(config2context(config),
4067 "error: unhandled parse error", &$1);
4070 Varlist -> Varlist ArgDecl ${
4079 ArgDecl -> IDENTIFIER ${ {
4080 struct variable *v = var_decl(config2context(config), $1.txt);
4087 ###### print binode cases
4089 do_indent(indent, "program");
4090 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4092 print_exec(b2->left, 0, 0);
4098 print_exec(b->right, indent+1, bracket);
4100 do_indent(indent, "}\n");
4103 ###### propagate binode cases
4104 case Program: abort(); // NOTEST
4106 ###### core functions
4108 static int analyse_prog(struct exec *prog, struct parse_context *c)
4110 struct binode *b = cast(binode, prog);
4117 propagate_types(b->right, c, &ok, Tnone, 0);
4122 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4123 struct var *v = cast(var, b->left);
4124 if (!v->var->val.type) {
4125 v->var->where_set = b;
4126 v->var->val = val_prepare(Tstr);
4129 b = cast(binode, prog);
4132 propagate_types(b->right, c, &ok, Tnone, 0);
4137 /* Make sure everything is still consistent */
4138 propagate_types(b->right, c, &ok, Tnone, 0);
4142 static void interp_prog(struct exec *prog, char **argv)
4144 struct binode *p = cast(binode, prog);
4150 al = cast(binode, p->left);
4152 struct var *v = cast(var, al->left);
4153 struct value *vl = &v->var->val;
4155 if (argv[0] == NULL) {
4156 printf("Not enough args\n");
4159 al = cast(binode, al->right);
4161 *vl = parse_value(vl->type, argv[0]);
4162 if (vl->type == NULL)
4166 v = interp_exec(p->right);
4170 ###### interp binode cases
4171 case Program: abort(); // NOTEST
4173 ## And now to test it out.
4175 Having a language requires having a "hello world" program. I'll
4176 provide a little more than that: a program that prints "Hello world"
4177 finds the GCD of two numbers, prints the first few elements of
4178 Fibonacci, performs a binary search for a number, and a few other
4179 things which will likely grow as the languages grows.
4181 ###### File: oceani.mk
4184 @echo "===== DEMO ====="
4185 ./oceani --section "demo: hello" oceani.mdc 55 33
4191 four ::= 2 + 2 ; five ::= 10/2
4192 const pie ::= "I like Pie";
4193 cake ::= "The cake is"
4202 print "Hello World, what lovely oceans you have!"
4203 print "Are there", five, "?"
4204 print pi, pie, "but", cake
4206 /* When a variable is defined in both branches of an 'if',
4207 * and used afterwards, the variables are merged.
4213 print "Is", A, "bigger than", B,"? ", bigger
4214 /* If a variable is not used after the 'if', no
4215 * merge happens, so types can be different
4218 double:string = "yes"
4219 print A, "is more than twice", B, "?", double
4222 print "double", B, "is", double
4227 if a > 0 and then b > 0:
4233 print "GCD of", A, "and", B,"is", a
4235 print a, "is not positive, cannot calculate GCD"
4237 print b, "is not positive, cannot calculate GCD"
4242 print "Fibonacci:", f1,f2,
4243 then togo = togo - 1
4251 /* Binary search... */
4256 mid := (lo + hi) / 2
4268 print "Yay, I found", target
4270 print "Closest I found was", mid
4275 // "middle square" PRNG. Not particularly good, but one my
4276 // Dad taught me - the first one I ever heard of.
4277 for i:=1; then i = i + 1; while i < size:
4278 n := list[i-1] * list[i-1]
4279 list[i] = (n / 100) % 10 000
4281 print "Before sort:",
4282 for i:=0; then i = i + 1; while i < size:
4286 for i := 1; then i=i+1; while i < size:
4287 for j:=i-1; then j=j-1; while j >= 0:
4288 if list[j] > list[j+1]:
4292 print " After sort:",
4293 for i:=0; then i = i + 1; while i < size:
4299 bob.alive = (bob.name == "Hello")
4300 print "bob", "is" if bob.alive else "isn't", "alive"