1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 The "func" clause currently only allows a "main" function to be
45 declared. That will be extended when proper function support is added.
47 An element that is present purely to make a usable language, and
48 without any expectation that they will remain, is the "print" statement
49 which performs simple output.
51 The current scalar types are "number", "Boolean", and "string".
52 Boolean will likely stay in its current form, the other two might, but
53 could just as easily be changed.
57 Versions of the interpreter which obviously do not support a complete
58 language will be named after creeks and streams. This one is Jamison
61 Once we have something reasonably resembling a complete language, the
62 names of rivers will be used.
63 Early versions of the compiler will be named after seas. Major
64 releases of the compiler will be named after oceans. Hopefully I will
65 be finished once I get to the Pacific Ocean release.
69 As well as parsing and executing a program, the interpreter can print
70 out the program from the parsed internal structure. This is useful
71 for validating the parsing.
72 So the main requirements of the interpreter are:
74 - Parse the program, possibly with tracing,
75 - Analyse the parsed program to ensure consistency,
77 - Execute the "main" function in the program, if no parsing or
78 consistency errors were found.
80 This is all performed by a single C program extracted with
83 There will be two formats for printing the program: a default and one
84 that uses bracketing. So a `--bracket` command line option is needed
85 for that. Normally the first code section found is used, however an
86 alternate section can be requested so that a file (such as this one)
87 can contain multiple programs. This is effected with the `--section`
90 This code must be compiled with `-fplan9-extensions` so that anonymous
91 structures can be used.
93 ###### File: oceani.mk
95 myCFLAGS := -Wall -g -fplan9-extensions
96 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
97 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
98 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
100 all :: $(LDLIBS) oceani
101 oceani.c oceani.h : oceani.mdc parsergen
102 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
103 oceani.mk: oceani.mdc md2c
106 oceani: oceani.o $(LDLIBS)
107 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
109 ###### Parser: header
111 struct parse_context;
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 ###### Parser: reduce
131 struct parse_context *c = config2context(config);
139 #include <sys/mman.h>
158 static char Usage[] =
159 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
160 static const struct option long_options[] = {
161 {"trace", 0, NULL, 't'},
162 {"print", 0, NULL, 'p'},
163 {"noexec", 0, NULL, 'n'},
164 {"brackets", 0, NULL, 'b'},
165 {"section", 1, NULL, 's'},
168 const char *options = "tpnbs";
170 static void pr_err(char *msg) // NOTEST
172 fprintf(stderr, "%s\n", msg); // NOTEST
175 int main(int argc, char *argv[])
180 struct section *s, *ss;
181 char *section = NULL;
182 struct parse_context context = {
184 .ignored = (1 << TK_mark),
185 .number_chars = ".,_+- ",
190 int doprint=0, dotrace=0, doexec=1, brackets=0;
192 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
195 case 't': dotrace=1; break;
196 case 'p': doprint=1; break;
197 case 'n': doexec=0; break;
198 case 'b': brackets=1; break;
199 case 's': section = optarg; break;
200 default: fprintf(stderr, Usage);
204 if (optind >= argc) {
205 fprintf(stderr, "oceani: no input file given\n");
208 fd = open(argv[optind], O_RDONLY);
210 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
213 context.file_name = argv[optind];
214 len = lseek(fd, 0, 2);
215 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
216 s = code_extract(file, file+len, pr_err);
218 fprintf(stderr, "oceani: could not find any code in %s\n",
223 ## context initialization
226 for (ss = s; ss; ss = ss->next) {
227 struct text sec = ss->section;
228 if (sec.len == strlen(section) &&
229 strncmp(sec.txt, section, sec.len) == 0)
233 fprintf(stderr, "oceani: cannot find section %s\n",
240 fprintf(stderr, "oceani: no code found in requested section\n"); // NOTEST
244 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
247 fprintf(stderr, "oceani: no main function found.\n");
248 context.parse_error = 1;
250 if (context.prog && !context.parse_error) {
251 if (!analyse_prog(context.prog, &context)) {
252 fprintf(stderr, "oceani: type error in program - not running.\n");
253 context.parse_error = 1;
256 if (context.prog && doprint) {
259 print_exec(context.prog, 0, brackets);
261 if (context.prog && doexec && !context.parse_error)
262 interp_prog(&context, context.prog, argc - optind, argv+optind);
263 free_exec(context.prog);
266 struct section *t = s->next;
272 ## free context types
273 ## free context storage
274 exit(context.parse_error ? 1 : 0);
279 The four requirements of parse, analyse, print, interpret apply to
280 each language element individually so that is how most of the code
283 Three of the four are fairly self explanatory. The one that requires
284 a little explanation is the analysis step.
286 The current language design does not require the types of variables to
287 be declared, but they must still have a single type. Different
288 operations impose different requirements on the variables, for example
289 addition requires both arguments to be numeric, and assignment
290 requires the variable on the left to have the same type as the
291 expression on the right.
293 Analysis involves propagating these type requirements around and
294 consequently setting the type of each variable. If any requirements
295 are violated (e.g. a string is compared with a number) or if a
296 variable needs to have two different types, then an error is raised
297 and the program will not run.
299 If the same variable is declared in both branchs of an 'if/else', or
300 in all cases of a 'switch' then the multiple instances may be merged
301 into just one variable if the variable is referenced after the
302 conditional statement. When this happens, the types must naturally be
303 consistent across all the branches. When the variable is not used
304 outside the if, the variables in the different branches are distinct
305 and can be of different types.
307 Undeclared names may only appear in "use" statements and "case" expressions.
308 These names are given a type of "label" and a unique value.
309 This allows them to fill the role of a name in an enumerated type, which
310 is useful for testing the `switch` statement.
312 As we will see, the condition part of a `while` statement can return
313 either a Boolean or some other type. This requires that the expected
314 type that gets passed around comprises a type and a flag to indicate
315 that `Tbool` is also permitted.
317 As there are, as yet, no distinct types that are compatible, there
318 isn't much subtlety in the analysis. When we have distinct number
319 types, this will become more interesting.
323 When analysis discovers an inconsistency it needs to report an error;
324 just refusing to run the code ensures that the error doesn't cascade,
325 but by itself it isn't very useful. A clear understanding of the sort
326 of error message that are useful will help guide the process of
329 At a simplistic level, the only sort of error that type analysis can
330 report is that the type of some construct doesn't match a contextual
331 requirement. For example, in `4 + "hello"` the addition provides a
332 contextual requirement for numbers, but `"hello"` is not a number. In
333 this particular example no further information is needed as the types
334 are obvious from local information. When a variable is involved that
335 isn't the case. It may be helpful to explain why the variable has a
336 particular type, by indicating the location where the type was set,
337 whether by declaration or usage.
339 Using a recursive-descent analysis we can easily detect a problem at
340 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
341 will detect that one argument is not a number and the usage of `hello`
342 will detect that a number was wanted, but not provided. In this
343 (early) version of the language, we will generate error reports at
344 multiple locations, so the use of `hello` will report an error and
345 explain were the value was set, and the addition will report an error
346 and say why numbers are needed. To be able to report locations for
347 errors, each language element will need to record a file location
348 (line and column) and each variable will need to record the language
349 element where its type was set. For now we will assume that each line
350 of an error message indicates one location in the file, and up to 2
351 types. So we provide a `printf`-like function which takes a format, a
352 location (a `struct exec` which has not yet been introduced), and 2
353 types. "`%1`" reports the first type, "`%2`" reports the second. We
354 will need a function to print the location, once we know how that is
355 stored. e As will be explained later, there are sometimes extra rules for
356 type matching and they might affect error messages, we need to pass those
359 As well as type errors, we sometimes need to report problems with
360 tokens, which might be unexpected or might name a type that has not
361 been defined. For these we have `tok_err()` which reports an error
362 with a given token. Each of the error functions sets the flag in the
363 context so indicate that parsing failed.
367 static void fput_loc(struct exec *loc, FILE *f);
369 ###### core functions
371 static void type_err(struct parse_context *c,
372 char *fmt, struct exec *loc,
373 struct type *t1, int rules, struct type *t2)
375 fprintf(stderr, "%s:", c->file_name);
376 fput_loc(loc, stderr);
377 for (; *fmt ; fmt++) {
384 case '%': fputc(*fmt, stderr); break; // NOTEST
385 default: fputc('?', stderr); break; // NOTEST
387 type_print(t1, stderr);
390 type_print(t2, stderr);
399 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
401 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
402 t->txt.len, t->txt.txt);
406 ## Entities: declared and predeclared.
408 There are various "things" that the language and/or the interpreter
409 needs to know about to parse and execute a program. These include
410 types, variables, values, and executable code. These are all lumped
411 together under the term "entities" (calling them "objects" would be
412 confusing) and introduced here. The following section will present the
413 different specific code elements which comprise or manipulate these
418 Values come in a wide range of types, with more likely to be added.
419 Each type needs to be able to print its own values (for convenience at
420 least) as well as to compare two values, at least for equality and
421 possibly for order. For now, values might need to be duplicated and
422 freed, though eventually such manipulations will be better integrated
425 Rather than requiring every numeric type to support all numeric
426 operations (add, multiple, etc), we allow types to be able to present
427 as one of a few standard types: integer, float, and fraction. The
428 existence of these conversion functions eventually enable types to
429 determine if they are compatible with other types, though such types
430 have not yet been implemented.
432 Named type are stored in a simple linked list. Objects of each type are
433 "values" which are often passed around by value.
440 ## value union fields
448 void (*init)(struct type *type, struct value *val);
449 void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
450 void (*print)(struct type *type, struct value *val);
451 void (*print_type)(struct type *type, FILE *f);
452 int (*cmp_order)(struct type *t1, struct type *t2,
453 struct value *v1, struct value *v2);
454 int (*cmp_eq)(struct type *t1, struct type *t2,
455 struct value *v1, struct value *v2);
456 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
457 void (*free)(struct type *type, struct value *val);
458 void (*free_type)(struct type *t);
459 long long (*to_int)(struct value *v);
460 double (*to_float)(struct value *v);
461 int (*to_mpq)(mpq_t *q, struct value *v);
470 struct type *typelist;
474 static struct type *find_type(struct parse_context *c, struct text s)
476 struct type *l = c->typelist;
479 text_cmp(l->name, s) != 0)
484 static struct type *add_type(struct parse_context *c, struct text s,
489 n = calloc(1, sizeof(*n));
492 n->next = c->typelist;
497 static void free_type(struct type *t)
499 /* The type is always a reference to something in the
500 * context, so we don't need to free anything.
504 static void free_value(struct type *type, struct value *v)
508 memset(v, 0x5a, type->size);
512 static void type_print(struct type *type, FILE *f)
515 fputs("*unknown*type*", f); // NOTEST
516 else if (type->name.len)
517 fprintf(f, "%.*s", type->name.len, type->name.txt);
518 else if (type->print_type)
519 type->print_type(type, f);
521 fputs("*invalid*type*", f); // NOTEST
524 static void val_init(struct type *type, struct value *val)
526 if (type && type->init)
527 type->init(type, val);
530 static void dup_value(struct type *type,
531 struct value *vold, struct value *vnew)
533 if (type && type->dup)
534 type->dup(type, vold, vnew);
537 static int value_cmp(struct type *tl, struct type *tr,
538 struct value *left, struct value *right)
540 if (tl && tl->cmp_order)
541 return tl->cmp_order(tl, tr, left, right);
542 if (tl && tl->cmp_eq) // NOTEST
543 return tl->cmp_eq(tl, tr, left, right); // NOTEST
547 static void print_value(struct type *type, struct value *v)
549 if (type && type->print)
550 type->print(type, v);
552 printf("*Unknown*"); // NOTEST
557 static void free_value(struct type *type, struct value *v);
558 static int type_compat(struct type *require, struct type *have, int rules);
559 static void type_print(struct type *type, FILE *f);
560 static void val_init(struct type *type, struct value *v);
561 static void dup_value(struct type *type,
562 struct value *vold, struct value *vnew);
563 static int value_cmp(struct type *tl, struct type *tr,
564 struct value *left, struct value *right);
565 static void print_value(struct type *type, struct value *v);
567 ###### free context types
569 while (context.typelist) {
570 struct type *t = context.typelist;
572 context.typelist = t->next;
578 Type can be specified for local variables, for fields in a structure,
579 for formal parameters to functions, and possibly elsewhere. Different
580 rules may apply in different contexts. As a minimum, a named type may
581 always be used. Currently the type of a formal parameter can be
582 different from types in other contexts, so we have a separate grammar
588 Type -> IDENTIFIER ${
589 $0 = find_type(c, $1.txt);
592 "error: undefined type", &$1);
599 FormalType -> Type ${ $0 = $<1; }$
600 ## formal type grammar
604 Values of the base types can be numbers, which we represent as
605 multi-precision fractions, strings, Booleans and labels. When
606 analysing the program we also need to allow for places where no value
607 is meaningful (type `Tnone`) and where we don't know what type to
608 expect yet (type is `NULL`).
610 Values are never shared, they are always copied when used, and freed
611 when no longer needed.
613 When propagating type information around the program, we need to
614 determine if two types are compatible, where type `NULL` is compatible
615 with anything. There are two special cases with type compatibility,
616 both related to the Conditional Statement which will be described
617 later. In some cases a Boolean can be accepted as well as some other
618 primary type, and in others any type is acceptable except a label (`Vlabel`).
619 A separate function encoding these cases will simplify some code later.
621 ###### type functions
623 int (*compat)(struct type *this, struct type *other);
627 static int type_compat(struct type *require, struct type *have, int rules)
629 if ((rules & Rboolok) && have == Tbool)
631 if ((rules & Rnolabel) && have == Tlabel)
633 if (!require || !have)
637 return require->compat(require, have);
639 return require == have;
644 #include "parse_string.h"
645 #include "parse_number.h"
648 myLDLIBS := libnumber.o libstring.o -lgmp
649 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
651 ###### type union fields
652 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
654 ###### value union fields
661 static void _free_value(struct type *type, struct value *v)
665 switch (type->vtype) {
667 case Vstr: free(v->str.txt); break;
668 case Vnum: mpq_clear(v->num); break;
674 ###### value functions
676 static void _val_init(struct type *type, struct value *val)
678 switch(type->vtype) {
679 case Vnone: // NOTEST
682 mpq_init(val->num); break;
684 val->str.txt = malloc(1);
696 static void _dup_value(struct type *type,
697 struct value *vold, struct value *vnew)
699 switch (type->vtype) {
700 case Vnone: // NOTEST
703 vnew->label = vold->label;
706 vnew->bool = vold->bool;
710 mpq_set(vnew->num, vold->num);
713 vnew->str.len = vold->str.len;
714 vnew->str.txt = malloc(vnew->str.len);
715 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
720 static int _value_cmp(struct type *tl, struct type *tr,
721 struct value *left, struct value *right)
725 return tl - tr; // NOTEST
727 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
728 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
729 case Vstr: cmp = text_cmp(left->str, right->str); break;
730 case Vbool: cmp = left->bool - right->bool; break;
731 case Vnone: cmp = 0; // NOTEST
736 static void _print_value(struct type *type, struct value *v)
738 switch (type->vtype) {
739 case Vnone: // NOTEST
740 printf("*no-value*"); break; // NOTEST
741 case Vlabel: // NOTEST
742 printf("*label-%p*", v->label); break; // NOTEST
744 printf("%.*s", v->str.len, v->str.txt); break;
746 printf("%s", v->bool ? "True":"False"); break;
751 mpf_set_q(fl, v->num);
752 gmp_printf("%Fg", fl);
759 static void _free_value(struct type *type, struct value *v);
761 static struct type base_prototype = {
763 .print = _print_value,
764 .cmp_order = _value_cmp,
765 .cmp_eq = _value_cmp,
770 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
773 static struct type *add_base_type(struct parse_context *c, char *n,
774 enum vtype vt, int size)
776 struct text txt = { n, strlen(n) };
779 t = add_type(c, txt, &base_prototype);
782 t->align = size > sizeof(void*) ? sizeof(void*) : size;
783 if (t->size & (t->align - 1))
784 t->size = (t->size | (t->align - 1)) + 1; // NOTEST
788 ###### context initialization
790 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
791 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
792 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
793 Tnone = add_base_type(&context, "none", Vnone, 0);
794 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
798 Variables are scoped named values. We store the names in a linked list
799 of "bindings" sorted in lexical order, and use sequential search and
806 struct binding *next; // in lexical order
810 This linked list is stored in the parse context so that "reduce"
811 functions can find or add variables, and so the analysis phase can
812 ensure that every variable gets a type.
816 struct binding *varlist; // In lexical order
820 static struct binding *find_binding(struct parse_context *c, struct text s)
822 struct binding **l = &c->varlist;
827 (cmp = text_cmp((*l)->name, s)) < 0)
831 n = calloc(1, sizeof(*n));
838 Each name can be linked to multiple variables defined in different
839 scopes. Each scope starts where the name is declared and continues
840 until the end of the containing code block. Scopes of a given name
841 cannot nest, so a declaration while a name is in-scope is an error.
843 ###### binding fields
844 struct variable *var;
848 struct variable *previous;
850 struct binding *name;
851 struct exec *where_decl;// where name was declared
852 struct exec *where_set; // where type was set
856 When a scope closes, the values of the variables might need to be freed.
857 This happens in the context of some `struct exec` and each `exec` will
858 need to know which variables need to be freed when it completes.
861 struct variable *to_free;
863 ####### variable fields
864 struct exec *cleanup_exec;
865 struct variable *next_free;
867 ####### interp exec cleanup
870 for (v = e->to_free; v; v = v->next_free) {
871 struct value *val = var_value(c, v);
872 free_value(v->type, val);
877 static void variable_unlink_exec(struct variable *v)
879 struct variable **vp;
880 if (!v->cleanup_exec)
882 for (vp = &v->cleanup_exec->to_free;
883 *vp; vp = &(*vp)->next_free) {
887 v->cleanup_exec = NULL;
892 While the naming seems strange, we include local constants in the
893 definition of variables. A name declared `var := value` can
894 subsequently be changed, but a name declared `var ::= value` cannot -
897 ###### variable fields
900 Scopes in parallel branches can be partially merged. More
901 specifically, if a given name is declared in both branches of an
902 if/else then its scope is a candidate for merging. Similarly if
903 every branch of an exhaustive switch (e.g. has an "else" clause)
904 declares a given name, then the scopes from the branches are
905 candidates for merging.
907 Note that names declared inside a loop (which is only parallel to
908 itself) are never visible after the loop. Similarly names defined in
909 scopes which are not parallel, such as those started by `for` and
910 `switch`, are never visible after the scope. Only variables defined in
911 both `then` and `else` (including the implicit then after an `if`, and
912 excluding `then` used with `for`) and in all `case`s and `else` of a
913 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
915 Labels, which are a bit like variables, follow different rules.
916 Labels are not explicitly declared, but if an undeclared name appears
917 in a context where a label is legal, that effectively declares the
918 name as a label. The declaration remains in force (or in scope) at
919 least to the end of the immediately containing block and conditionally
920 in any larger containing block which does not declare the name in some
921 other way. Importantly, the conditional scope extension happens even
922 if the label is only used in one parallel branch of a conditional --
923 when used in one branch it is treated as having been declared in all
926 Merge candidates are tentatively visible beyond the end of the
927 branching statement which creates them. If the name is used, the
928 merge is affirmed and they become a single variable visible at the
929 outer layer. If not - if it is redeclared first - the merge lapses.
931 To track scopes we have an extra stack, implemented as a linked list,
932 which roughly parallels the parse stack and which is used exclusively
933 for scoping. When a new scope is opened, a new frame is pushed and
934 the child-count of the parent frame is incremented. This child-count
935 is used to distinguish between the first of a set of parallel scopes,
936 in which declared variables must not be in scope, and subsequent
937 branches, whether they may already be conditionally scoped.
939 To push a new frame *before* any code in the frame is parsed, we need a
940 grammar reduction. This is most easily achieved with a grammar
941 element which derives the empty string, and creates the new scope when
942 it is recognised. This can be placed, for example, between a keyword
943 like "if" and the code following it.
947 struct scope *parent;
953 struct scope *scope_stack;
956 static void scope_pop(struct parse_context *c)
958 struct scope *s = c->scope_stack;
960 c->scope_stack = s->parent;
965 static void scope_push(struct parse_context *c)
967 struct scope *s = calloc(1, sizeof(*s));
969 c->scope_stack->child_count += 1;
970 s->parent = c->scope_stack;
978 OpenScope -> ${ scope_push(c); }$
980 Each variable records a scope depth and is in one of four states:
982 - "in scope". This is the case between the declaration of the
983 variable and the end of the containing block, and also between
984 the usage with affirms a merge and the end of that block.
986 The scope depth is not greater than the current parse context scope
987 nest depth. When the block of that depth closes, the state will
988 change. To achieve this, all "in scope" variables are linked
989 together as a stack in nesting order.
991 - "pending". The "in scope" block has closed, but other parallel
992 scopes are still being processed. So far, every parallel block at
993 the same level that has closed has declared the name.
995 The scope depth is the depth of the last parallel block that
996 enclosed the declaration, and that has closed.
998 - "conditionally in scope". The "in scope" block and all parallel
999 scopes have closed, and no further mention of the name has been seen.
1000 This state includes a secondary nest depth (`min_depth`) which records
1001 the outermost scope seen since the variable became conditionally in
1002 scope. If a use of the name is found, the variable becomes "in scope"
1003 and that secondary depth becomes the recorded scope depth. If the
1004 name is declared as a new variable, the old variable becomes "out of
1005 scope" and the recorded scope depth stays unchanged.
1007 - "out of scope". The variable is neither in scope nor conditionally
1008 in scope. It is permanently out of scope now and can be removed from
1009 the "in scope" stack.
1011 ###### variable fields
1012 int depth, min_depth;
1013 enum { OutScope, PendingScope, CondScope, InScope } scope;
1014 struct variable *in_scope;
1016 ###### parse context
1018 struct variable *in_scope;
1020 All variables with the same name are linked together using the
1021 'previous' link. Those variable that have been affirmatively merged all
1022 have a 'merged' pointer that points to one primary variable - the most
1023 recently declared instance. When merging variables, we need to also
1024 adjust the 'merged' pointer on any other variables that had previously
1025 been merged with the one that will no longer be primary.
1027 A variable that is no longer the most recent instance of a name may
1028 still have "pending" scope, if it might still be merged with most
1029 recent instance. These variables don't really belong in the
1030 "in_scope" list, but are not immediately removed when a new instance
1031 is found. Instead, they are detected and ignored when considering the
1032 list of in_scope names.
1034 The storage of the value of a variable will be described later. For now
1035 we just need to know that when a variable goes out of scope, it might
1036 need to be freed. For this we need to be able to find it, so assume that
1037 `var_value()` will provide that.
1039 ###### variable fields
1040 struct variable *merged;
1042 ###### ast functions
1044 static void variable_merge(struct variable *primary, struct variable *secondary)
1048 primary = primary->merged;
1050 for (v = primary->previous; v; v=v->previous)
1051 if (v == secondary || v == secondary->merged ||
1052 v->merged == secondary ||
1053 v->merged == secondary->merged) {
1054 v->scope = OutScope;
1055 v->merged = primary;
1056 variable_unlink_exec(v);
1060 ###### forward decls
1061 static struct value *var_value(struct parse_context *c, struct variable *v);
1063 ###### free global vars
1065 while (context.varlist) {
1066 struct binding *b = context.varlist;
1067 struct variable *v = b->var;
1068 context.varlist = b->next;
1071 struct variable *next = v->previous;
1074 free_value(v->type, var_value(&context, v));
1076 // This is a global constant
1077 free_exec(v->where_decl);
1084 #### Manipulating Bindings
1086 When a name is conditionally visible, a new declaration discards the
1087 old binding - the condition lapses. Conversely a usage of the name
1088 affirms the visibility and extends it to the end of the containing
1089 block - i.e. the block that contains both the original declaration and
1090 the latest usage. This is determined from `min_depth`. When a
1091 conditionally visible variable gets affirmed like this, it is also
1092 merged with other conditionally visible variables with the same name.
1094 When we parse a variable declaration we either report an error if the
1095 name is currently bound, or create a new variable at the current nest
1096 depth if the name is unbound or bound to a conditionally scoped or
1097 pending-scope variable. If the previous variable was conditionally
1098 scoped, it and its homonyms becomes out-of-scope.
1100 When we parse a variable reference (including non-declarative assignment
1101 "foo = bar") we report an error if the name is not bound or is bound to
1102 a pending-scope variable; update the scope if the name is bound to a
1103 conditionally scoped variable; or just proceed normally if the named
1104 variable is in scope.
1106 When we exit a scope, any variables bound at this level are either
1107 marked out of scope or pending-scoped, depending on whether the scope
1108 was sequential or parallel. Here a "parallel" scope means the "then"
1109 or "else" part of a conditional, or any "case" or "else" branch of a
1110 switch. Other scopes are "sequential".
1112 When exiting a parallel scope we check if there are any variables that
1113 were previously pending and are still visible. If there are, then
1114 they weren't redeclared in the most recent scope, so they cannot be
1115 merged and must become out-of-scope. If it is not the first of
1116 parallel scopes (based on `child_count`), we check that there was a
1117 previous binding that is still pending-scope. If there isn't, the new
1118 variable must now be out-of-scope.
1120 When exiting a sequential scope that immediately enclosed parallel
1121 scopes, we need to resolve any pending-scope variables. If there was
1122 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1123 we need to mark all pending-scope variable as out-of-scope. Otherwise
1124 all pending-scope variables become conditionally scoped.
1127 enum closetype { CloseSequential, CloseParallel, CloseElse };
1129 ###### ast functions
1131 static struct variable *var_decl(struct parse_context *c, struct text s)
1133 struct binding *b = find_binding(c, s);
1134 struct variable *v = b->var;
1136 switch (v ? v->scope : OutScope) {
1138 /* Caller will report the error */
1142 v && v->scope == CondScope;
1144 v->scope = OutScope;
1148 v = calloc(1, sizeof(*v));
1149 v->previous = b->var;
1153 v->min_depth = v->depth = c->scope_depth;
1155 v->in_scope = c->in_scope;
1161 static struct variable *var_ref(struct parse_context *c, struct text s)
1163 struct binding *b = find_binding(c, s);
1164 struct variable *v = b->var;
1165 struct variable *v2;
1167 switch (v ? v->scope : OutScope) {
1170 /* Caller will report the error */
1173 /* All CondScope variables of this name need to be merged
1174 * and become InScope
1176 v->depth = v->min_depth;
1178 for (v2 = v->previous;
1179 v2 && v2->scope == CondScope;
1181 variable_merge(v, v2);
1189 static void var_block_close(struct parse_context *c, enum closetype ct,
1192 /* Close off all variables that are in_scope.
1193 * Some variables in c->scope may already be not-in-scope,
1194 * such as when a PendingScope variable is hidden by a new
1195 * variable with the same name.
1196 * So we check for v->name->var != v and drop them.
1197 * If we choose to make a variable OutScope, we drop it
1200 struct variable *v, **vp, *v2;
1203 for (vp = &c->in_scope;
1204 (v = *vp) && v->min_depth > c->scope_depth;
1205 (v->scope == OutScope || v->name->var != v)
1206 ? (*vp = v->in_scope, 0)
1207 : ( vp = &v->in_scope, 0)) {
1208 v->min_depth = c->scope_depth;
1209 if (v->name->var != v)
1210 /* This is still in scope, but we haven't just
1214 v->min_depth = c->scope_depth;
1215 if (v->scope == InScope) {
1216 /* This variable gets cleaned up when 'e' finishes */
1217 variable_unlink_exec(v);
1218 v->cleanup_exec = e;
1219 v->next_free = e->to_free;
1224 case CloseParallel: /* handle PendingScope */
1228 if (c->scope_stack->child_count == 1)
1229 /* first among parallel branches */
1230 v->scope = PendingScope;
1231 else if (v->previous &&
1232 v->previous->scope == PendingScope)
1233 /* all previous branches used name */
1234 v->scope = PendingScope;
1235 else if (v->type == Tlabel)
1236 /* Labels remain pending even when not used */
1237 v->scope = PendingScope; // UNTESTED
1239 v->scope = OutScope;
1240 if (ct == CloseElse) {
1241 /* All Pending variables with this name
1242 * are now Conditional */
1244 v2 && v2->scope == PendingScope;
1246 v2->scope = CondScope;
1250 /* Not possible as it would require
1251 * parallel scope to be nested immediately
1252 * in a parallel scope, and that never
1256 /* Not possible as we already tested for
1262 case CloseSequential:
1263 if (v->type == Tlabel)
1264 v->scope = PendingScope;
1267 v->scope = OutScope;
1270 /* There was no 'else', so we can only become
1271 * conditional if we know the cases were exhaustive,
1272 * and that doesn't mean anything yet.
1273 * So only labels become conditional..
1276 v2 && v2->scope == PendingScope;
1278 if (v2->type == Tlabel)
1279 v2->scope = CondScope;
1281 v2->scope = OutScope;
1284 case OutScope: break;
1293 The value of a variable is store separately from the variable, on an
1294 analogue of a stack frame. There are (currently) two frames that can be
1295 active. A global frame which currently only stores constants, and a
1296 stacked frame which stores local variables. Each variable knows if it
1297 is global or not, and what its index into the frame is.
1299 Values in the global frame are known immediately they are relevant, so
1300 the frame needs to be reallocated as it grows so it can store those
1301 values. The local frame doesn't get values until the interpreted phase
1302 is started, so there is no need to allocate until the size is known.
1304 We initialize the `frame_pos` to an impossible value, so that we can
1305 tell if it was set or not later.
1307 ###### variable fields
1311 ###### variable init
1314 ###### parse context
1316 short global_size, global_alloc;
1318 void *global, *local;
1320 ###### ast functions
1322 static struct value *var_value(struct parse_context *c, struct variable *v)
1325 if (!c->local || !v->type)
1326 return NULL; // NOTEST
1327 if (v->frame_pos + v->type->size > c->local_size) {
1328 printf("INVALID frame_pos\n"); // NOTEST
1331 return c->local + v->frame_pos;
1333 if (c->global_size > c->global_alloc) {
1334 int old = c->global_alloc;
1335 c->global_alloc = (c->global_size | 1023) + 1024;
1336 c->global = realloc(c->global, c->global_alloc);
1337 memset(c->global + old, 0, c->global_alloc - old);
1339 return c->global + v->frame_pos;
1342 static struct value *global_alloc(struct parse_context *c, struct type *t,
1343 struct variable *v, struct value *init)
1346 struct variable scratch;
1348 if (t->prepare_type)
1349 t->prepare_type(c, t, 1); // NOTEST
1351 if (c->global_size & (t->align - 1))
1352 c->global_size = (c->global_size + t->align) & ~(t->align-1); // UNTESTED
1357 v->frame_pos = c->global_size;
1359 c->global_size += v->type->size;
1360 ret = var_value(c, v);
1362 memcpy(ret, init, t->size);
1368 As global values are found -- struct field initializers, labels etc --
1369 `global_alloc()` is called to record the value in the global frame.
1371 When the program is fully parsed, we need to walk the list of variables
1372 to find any that weren't merged away and that aren't global, and to
1373 calculate the frame size and assign a frame position for each variable.
1374 For this we have `scope_finalize()`.
1376 ###### ast functions
1378 static void scope_finalize(struct parse_context *c)
1382 for (b = c->varlist; b; b = b->next) {
1384 for (v = b->var; v; v = v->previous) {
1385 struct type *t = v->type;
1390 if (c->local_size & (t->align - 1))
1391 c->local_size = (c->local_size + t->align) & ~(t->align-1);
1392 v->frame_pos = c->local_size;
1393 c->local_size += v->type->size;
1396 c->local = calloc(1, c->local_size);
1399 ###### free context storage
1400 free(context.global);
1401 free(context.local);
1405 Executables can be lots of different things. In many cases an
1406 executable is just an operation combined with one or two other
1407 executables. This allows for expressions and lists etc. Other times an
1408 executable is something quite specific like a constant or variable name.
1409 So we define a `struct exec` to be a general executable with a type, and
1410 a `struct binode` which is a subclass of `exec`, forms a node in a
1411 binary tree, and holds an operation. There will be other subclasses,
1412 and to access these we need to be able to `cast` the `exec` into the
1413 various other types. The first field in any `struct exec` is the type
1414 from the `exec_types` enum.
1417 #define cast(structname, pointer) ({ \
1418 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1419 if (__mptr && *__mptr != X##structname) abort(); \
1420 (struct structname *)( (char *)__mptr);})
1422 #define new(structname) ({ \
1423 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1424 __ptr->type = X##structname; \
1425 __ptr->line = -1; __ptr->column = -1; \
1428 #define new_pos(structname, token) ({ \
1429 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1430 __ptr->type = X##structname; \
1431 __ptr->line = token.line; __ptr->column = token.col; \
1440 enum exec_types type;
1449 struct exec *left, *right;
1452 ###### ast functions
1454 static int __fput_loc(struct exec *loc, FILE *f)
1458 if (loc->line >= 0) {
1459 fprintf(f, "%d:%d: ", loc->line, loc->column);
1462 if (loc->type == Xbinode)
1463 return __fput_loc(cast(binode,loc)->left, f) ||
1464 __fput_loc(cast(binode,loc)->right, f); // NOTEST
1467 static void fput_loc(struct exec *loc, FILE *f)
1469 if (!__fput_loc(loc, f))
1470 fprintf(f, "??:??: "); // NOTEST
1473 Each different type of `exec` node needs a number of functions defined,
1474 a bit like methods. We must be able to free it, print it, analyse it
1475 and execute it. Once we have specific `exec` types we will need to
1476 parse them too. Let's take this a bit more slowly.
1480 The parser generator requires a `free_foo` function for each struct
1481 that stores attributes and they will often be `exec`s and subtypes
1482 there-of. So we need `free_exec` which can handle all the subtypes,
1483 and we need `free_binode`.
1485 ###### ast functions
1487 static void free_binode(struct binode *b)
1492 free_exec(b->right);
1496 ###### core functions
1497 static void free_exec(struct exec *e)
1506 ###### forward decls
1508 static void free_exec(struct exec *e);
1510 ###### free exec cases
1511 case Xbinode: free_binode(cast(binode, e)); break;
1515 Printing an `exec` requires that we know the current indent level for
1516 printing line-oriented components. As will become clear later, we
1517 also want to know what sort of bracketing to use.
1519 ###### ast functions
1521 static void do_indent(int i, char *str)
1528 ###### core functions
1529 static void print_binode(struct binode *b, int indent, int bracket)
1533 ## print binode cases
1537 static void print_exec(struct exec *e, int indent, int bracket)
1543 print_binode(cast(binode, e), indent, bracket); break;
1548 do_indent(indent, "/* FREE");
1549 for (v = e->to_free; v; v = v->next_free) {
1550 printf(" %.*s", v->name->name.len, v->name->name.txt);
1551 if (v->frame_pos >= 0)
1552 printf("(%d+%d)", v->frame_pos,
1553 v->type ? v->type->size:0);
1559 ###### forward decls
1561 static void print_exec(struct exec *e, int indent, int bracket);
1565 As discussed, analysis involves propagating type requirements around the
1566 program and looking for errors.
1568 So `propagate_types` is passed an expected type (being a `struct type`
1569 pointer together with some `val_rules` flags) that the `exec` is
1570 expected to return, and returns the type that it does return, either
1571 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1572 by reference. It is set to `0` when an error is found, and `2` when
1573 any change is made. If it remains unchanged at `1`, then no more
1574 propagation is needed.
1578 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1582 if (rules & Rnolabel)
1583 fputs(" (labels not permitted)", stderr);
1586 ###### core functions
1588 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1589 struct type *type, int rules);
1590 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1591 struct type *type, int rules)
1598 switch (prog->type) {
1601 struct binode *b = cast(binode, prog);
1603 ## propagate binode cases
1607 ## propagate exec cases
1612 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1613 struct type *type, int rules)
1615 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1624 Interpreting an `exec` doesn't require anything but the `exec`. State
1625 is stored in variables and each variable will be directly linked from
1626 within the `exec` tree. The exception to this is the `main` function
1627 which needs to look at command line arguments. This function will be
1628 interpreted separately.
1630 Each `exec` can return a value combined with a type in `struct lrval`.
1631 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1632 the location of a value, which can be updated, in `lval`. Others will
1633 set `lval` to NULL indicating that there is a value of appropriate type
1636 ###### core functions
1640 struct value rval, *lval;
1643 static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
1645 static struct value interp_exec(struct parse_context *c, struct exec *e,
1646 struct type **typeret)
1648 struct lrval ret = _interp_exec(c, e);
1650 if (!ret.type) abort();
1652 *typeret = ret.type;
1654 dup_value(ret.type, ret.lval, &ret.rval);
1658 static struct value *linterp_exec(struct parse_context *c, struct exec *e,
1659 struct type **typeret)
1661 struct lrval ret = _interp_exec(c, e);
1664 *typeret = ret.type;
1666 free_value(ret.type, &ret.rval);
1670 static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
1673 struct value rv = {}, *lrv = NULL;
1674 struct type *rvtype;
1676 rvtype = ret.type = Tnone;
1686 struct binode *b = cast(binode, e);
1687 struct value left, right, *lleft;
1688 struct type *ltype, *rtype;
1689 ltype = rtype = Tnone;
1691 ## interp binode cases
1693 free_value(ltype, &left);
1694 free_value(rtype, &right);
1697 ## interp exec cases
1702 ## interp exec cleanup
1708 Now that we have the shape of the interpreter in place we can add some
1709 complex types and connected them in to the data structures and the
1710 different phases of parse, analyse, print, interpret.
1712 Thus far we have arrays and structs.
1716 Arrays can be declared by giving a size and a type, as `[size]type' so
1717 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1718 size can be either a literal number, or a named constant. Some day an
1719 arbitrary expression will be supported.
1721 As a formal parameter to a function, the array can be declared with a
1722 new variable as the size: `name:[size::number]string`. The `size`
1723 variable is set to the size of the array and must be a constant. As
1724 `number` is the only supported type, it can be left out:
1725 `name:[size::]string`.
1727 Arrays cannot be assigned. When pointers are introduced we will also
1728 introduce array slices which can refer to part or all of an array -
1729 the assignment syntax will create a slice. For now, an array can only
1730 ever be referenced by the name it is declared with. It is likely that
1731 a "`copy`" primitive will eventually be define which can be used to
1732 make a copy of an array with controllable recursive depth.
1734 For now we have two sorts of array, those with fixed size either because
1735 it is given as a literal number or because it is a struct member (which
1736 cannot have a runtime-changing size), and those with a size that is
1737 determined at runtime - local variables with a const size. The former
1738 have their size calculated at parse time, the latter at run time.
1740 For the latter type, the `size` field of the type is the size of a
1741 pointer, and the array is reallocated every time it comes into scope.
1743 We differentiate struct fields with a const size from local variables
1744 with a const size by whether they are prepared at parse time or not.
1746 ###### type union fields
1749 int unspec; // size is unspecified - vsize must be set.
1752 struct variable *vsize;
1753 struct type *member;
1756 ###### value union fields
1757 void *array; // used if not static_size
1759 ###### value functions
1761 static void array_prepare_type(struct parse_context *c, struct type *type,
1764 struct value *vsize;
1766 if (!type->array.vsize || type->array.static_size)
1769 vsize = var_value(c, type->array.vsize);
1771 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
1772 type->array.size = mpz_get_si(q);
1776 type->array.static_size = 1;
1777 type->size = type->array.size * type->array.member->size;
1778 type->align = type->array.member->align;
1782 static void array_init(struct type *type, struct value *val)
1785 void *ptr = val->ptr;
1789 if (!type->array.static_size) {
1790 val->array = calloc(type->array.size,
1791 type->array.member->size);
1794 for (i = 0; i < type->array.size; i++) {
1796 v = (void*)ptr + i * type->array.member->size;
1797 val_init(type->array.member, v);
1801 static void array_free(struct type *type, struct value *val)
1804 void *ptr = val->ptr;
1806 if (!type->array.static_size)
1808 for (i = 0; i < type->array.size; i++) {
1810 v = (void*)ptr + i * type->array.member->size;
1811 free_value(type->array.member, v);
1813 if (!type->array.static_size)
1817 static int array_compat(struct type *require, struct type *have)
1819 if (have->compat != require->compat)
1820 return 0; // UNTESTED
1821 /* Both are arrays, so we can look at details */
1822 if (!type_compat(require->array.member, have->array.member, 0))
1824 if (have->array.unspec && require->array.unspec) {
1825 if (have->array.vsize && require->array.vsize &&
1826 have->array.vsize != require->array.vsize) // UNTESTED
1827 /* sizes might not be the same */
1828 return 0; // UNTESTED
1831 if (have->array.unspec || require->array.unspec)
1832 return 1; // UNTESTED
1833 if (require->array.vsize == NULL && have->array.vsize == NULL)
1834 return require->array.size == have->array.size;
1836 return require->array.vsize == have->array.vsize; // UNTESTED
1839 static void array_print_type(struct type *type, FILE *f)
1842 if (type->array.vsize) {
1843 struct binding *b = type->array.vsize->name;
1844 fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
1845 type->array.unspec ? "::" : "");
1847 fprintf(f, "%d]", type->array.size);
1848 type_print(type->array.member, f);
1851 static struct type array_prototype = {
1853 .prepare_type = array_prepare_type,
1854 .print_type = array_print_type,
1855 .compat = array_compat,
1857 .size = sizeof(void*),
1858 .align = sizeof(void*),
1861 ###### declare terminals
1866 | [ NUMBER ] Type ${ {
1869 struct text noname = { "", 0 };
1872 $0 = t = add_type(c, noname, &array_prototype);
1873 t->array.member = $<4;
1874 t->array.vsize = NULL;
1875 if (number_parse(num, tail, $2.txt) == 0)
1876 tok_err(c, "error: unrecognised number", &$2);
1878 tok_err(c, "error: unsupported number suffix", &$2);
1880 t->array.size = mpz_get_ui(mpq_numref(num));
1881 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1882 tok_err(c, "error: array size must be an integer",
1884 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1885 tok_err(c, "error: array size is too large",
1889 t->array.static_size = 1;
1890 t->size = t->array.size * t->array.member->size;
1891 t->align = t->array.member->align;
1894 | [ IDENTIFIER ] Type ${ {
1895 struct variable *v = var_ref(c, $2.txt);
1896 struct text noname = { "", 0 };
1899 tok_err(c, "error: name undeclared", &$2);
1900 else if (!v->constant)
1901 tok_err(c, "error: array size must be a constant", &$2);
1903 $0 = add_type(c, noname, &array_prototype);
1904 $0->array.member = $<4;
1906 $0->array.vsize = v;
1911 OptType -> Type ${ $0 = $<1; }$
1914 ###### formal type grammar
1916 | [ IDENTIFIER :: OptType ] Type ${ {
1917 struct variable *v = var_decl(c, $ID.txt);
1918 struct text noname = { "", 0 };
1924 $0 = add_type(c, noname, &array_prototype);
1925 $0->array.member = $<6;
1927 $0->array.unspec = 1;
1928 $0->array.vsize = v;
1934 ###### variable grammar
1936 | Variable [ Expression ] ${ {
1937 struct binode *b = new(binode);
1944 ###### print binode cases
1946 print_exec(b->left, -1, bracket);
1948 print_exec(b->right, -1, bracket);
1952 ###### propagate binode cases
1954 /* left must be an array, right must be a number,
1955 * result is the member type of the array
1957 propagate_types(b->right, c, ok, Tnum, 0);
1958 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1959 if (!t || t->compat != array_compat) {
1960 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1963 if (!type_compat(type, t->array.member, rules)) {
1964 type_err(c, "error: have %1 but need %2", prog,
1965 t->array.member, rules, type);
1967 return t->array.member;
1971 ###### interp binode cases
1977 lleft = linterp_exec(c, b->left, <ype);
1978 right = interp_exec(c, b->right, &rtype);
1980 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1984 if (ltype->array.static_size)
1987 ptr = *(void**)lleft;
1988 rvtype = ltype->array.member;
1989 if (i >= 0 && i < ltype->array.size)
1990 lrv = ptr + i * rvtype->size;
1992 val_init(ltype->array.member, &rv);
1999 A `struct` is a data-type that contains one or more other data-types.
2000 It differs from an array in that each member can be of a different
2001 type, and they are accessed by name rather than by number. Thus you
2002 cannot choose an element by calculation, you need to know what you
2005 The language makes no promises about how a given structure will be
2006 stored in memory - it is free to rearrange fields to suit whatever
2007 criteria seems important.
2009 Structs are declared separately from program code - they cannot be
2010 declared in-line in a variable declaration like arrays can. A struct
2011 is given a name and this name is used to identify the type - the name
2012 is not prefixed by the word `struct` as it would be in C.
2014 Structs are only treated as the same if they have the same name.
2015 Simply having the same fields in the same order is not enough. This
2016 might change once we can create structure initializers from a list of
2019 Each component datum is identified much like a variable is declared,
2020 with a name, one or two colons, and a type. The type cannot be omitted
2021 as there is no opportunity to deduce the type from usage. An initial
2022 value can be given following an equals sign, so
2024 ##### Example: a struct type
2030 would declare a type called "complex" which has two number fields,
2031 each initialised to zero.
2033 Struct will need to be declared separately from the code that uses
2034 them, so we will need to be able to print out the declaration of a
2035 struct when reprinting the whole program. So a `print_type_decl` type
2036 function will be needed.
2038 ###### type union fields
2050 ###### type functions
2051 void (*print_type_decl)(struct type *type, FILE *f);
2053 ###### value functions
2055 static void structure_init(struct type *type, struct value *val)
2059 for (i = 0; i < type->structure.nfields; i++) {
2061 v = (void*) val->ptr + type->structure.fields[i].offset;
2062 if (type->structure.fields[i].init)
2063 dup_value(type->structure.fields[i].type,
2064 type->structure.fields[i].init,
2067 val_init(type->structure.fields[i].type, v);
2071 static void structure_free(struct type *type, struct value *val)
2075 for (i = 0; i < type->structure.nfields; i++) {
2077 v = (void*)val->ptr + type->structure.fields[i].offset;
2078 free_value(type->structure.fields[i].type, v);
2082 static void structure_free_type(struct type *t)
2085 for (i = 0; i < t->structure.nfields; i++)
2086 if (t->structure.fields[i].init) {
2087 free_value(t->structure.fields[i].type,
2088 t->structure.fields[i].init);
2090 free(t->structure.fields);
2093 static struct type structure_prototype = {
2094 .init = structure_init,
2095 .free = structure_free,
2096 .free_type = structure_free_type,
2097 .print_type_decl = structure_print_type,
2111 ###### free exec cases
2113 free_exec(cast(fieldref, e)->left);
2117 ###### declare terminals
2120 ###### variable grammar
2122 | Variable . IDENTIFIER ${ {
2123 struct fieldref *fr = new_pos(fieldref, $2);
2130 ###### print exec cases
2134 struct fieldref *f = cast(fieldref, e);
2135 print_exec(f->left, -1, bracket);
2136 printf(".%.*s", f->name.len, f->name.txt);
2140 ###### ast functions
2141 static int find_struct_index(struct type *type, struct text field)
2144 for (i = 0; i < type->structure.nfields; i++)
2145 if (text_cmp(type->structure.fields[i].name, field) == 0)
2150 ###### propagate exec cases
2154 struct fieldref *f = cast(fieldref, prog);
2155 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
2158 type_err(c, "error: unknown type for field access", f->left, // UNTESTED
2160 else if (st->init != structure_init)
2161 type_err(c, "error: field reference attempted on %1, not a struct",
2162 f->left, st, 0, NULL);
2163 else if (f->index == -2) {
2164 f->index = find_struct_index(st, f->name);
2166 type_err(c, "error: cannot find requested field in %1",
2167 f->left, st, 0, NULL);
2169 if (f->index >= 0) {
2170 struct type *ft = st->structure.fields[f->index].type;
2171 if (!type_compat(type, ft, rules))
2172 type_err(c, "error: have %1 but need %2", prog,
2179 ###### interp exec cases
2182 struct fieldref *f = cast(fieldref, e);
2184 struct value *lleft = linterp_exec(c, f->left, <ype);
2185 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
2186 rvtype = ltype->structure.fields[f->index].type;
2192 struct fieldlist *prev;
2196 ###### ast functions
2197 static void free_fieldlist(struct fieldlist *f)
2201 free_fieldlist(f->prev);
2203 free_value(f->f.type, f->f.init); // UNTESTED
2204 free(f->f.init); // UNTESTED
2209 ###### top level grammar
2210 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2212 add_type(c, $2.txt, &structure_prototype);
2214 struct fieldlist *f;
2216 for (f = $3; f; f=f->prev)
2219 t->structure.nfields = cnt;
2220 t->structure.fields = calloc(cnt, sizeof(struct field));
2223 int a = f->f.type->align;
2225 t->structure.fields[cnt] = f->f;
2226 if (t->size & (a-1))
2227 t->size = (t->size | (a-1)) + 1;
2228 t->structure.fields[cnt].offset = t->size;
2229 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2238 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2239 | { SimpleFieldList } ${ $0 = $<SFL; }$
2240 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2241 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2243 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2244 | FieldLines SimpleFieldList Newlines ${
2249 SimpleFieldList -> Field ${ $0 = $<F; }$
2250 | SimpleFieldList ; Field ${
2254 | SimpleFieldList ; ${
2257 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2259 Field -> IDENTIFIER : Type = Expression ${ {
2262 $0 = calloc(1, sizeof(struct fieldlist));
2263 $0->f.name = $1.txt;
2268 propagate_types($<5, c, &ok, $3, 0);
2271 c->parse_error = 1; // UNTESTED
2273 struct value vl = interp_exec(c, $5, NULL);
2274 $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
2277 | IDENTIFIER : Type ${
2278 $0 = calloc(1, sizeof(struct fieldlist));
2279 $0->f.name = $1.txt;
2281 if ($0->f.type->prepare_type)
2282 $0->f.type->prepare_type(c, $0->f.type, 1);
2285 ###### forward decls
2286 static void structure_print_type(struct type *t, FILE *f);
2288 ###### value functions
2289 static void structure_print_type(struct type *t, FILE *f) // UNTESTED
2293 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2295 for (i = 0; i < t->structure.nfields; i++) {
2296 struct field *fl = t->structure.fields + i;
2297 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2298 type_print(fl->type, f);
2299 if (fl->type->print && fl->init) {
2301 if (fl->type == Tstr)
2302 fprintf(f, "\""); // UNTESTED
2303 print_value(fl->type, fl->init);
2304 if (fl->type == Tstr)
2305 fprintf(f, "\""); // UNTESTED
2311 ###### print type decls
2313 struct type *t; // UNTESTED
2316 while (target != 0) {
2318 for (t = context.typelist; t ; t=t->next)
2319 if (t->print_type_decl) {
2328 t->print_type_decl(t, stdout);
2336 A function is a named chunk of code which can be passed parameters and
2337 can return results. Each function has an implicit type which includes
2338 the set of parameters and the return value. As yet these types cannot
2339 be declared separate from the function itself.
2341 In fact, only one function is currently possible - `main`. `main` is
2342 passed an array of strings together with the size of the array, and
2343 doesn't return anything. The strings are command line arguments.
2345 The parameters can be specified either in parentheses as a list, such as
2347 ##### Example: function 1
2349 func main(av:[ac::number]string)
2352 or as an indented list of one parameter per line
2354 ##### Example: function 2
2357 argv:[argc::number]string
2361 For constructing these lists we use a `List` binode, which will be
2362 further detailed when Expression Lists are introduced.
2372 MainFunction -> func main ( OpenScope Args ) Block Newlines ${
2375 $0->left = reorder_bilist($<Ar);
2377 var_block_close(c, CloseSequential, $0);
2378 if (c->scope_stack && !c->parse_error) abort();
2380 | func main IN OpenScope OptNL Args OUT OptNL do Block Newlines ${
2383 $0->left = reorder_bilist($<Ar);
2385 var_block_close(c, CloseSequential, $0);
2386 if (c->scope_stack && !c->parse_error) abort();
2388 | func main NEWLINE OpenScope OptNL do Block Newlines ${
2393 var_block_close(c, CloseSequential, $0);
2394 if (c->scope_stack && !c->parse_error) abort();
2397 Args -> ${ $0 = NULL; }$
2398 | Varlist ${ $0 = $<1; }$
2399 | Varlist ; ${ $0 = $<1; }$
2400 | Varlist NEWLINE ${ $0 = $<1; }$
2402 Varlist -> Varlist ; ArgDecl ${ // UNTESTED
2416 ArgDecl -> IDENTIFIER : FormalType ${ {
2417 struct variable *v = var_decl(c, $1.txt);
2423 ## Executables: the elements of code
2425 Each code element needs to be parsed, printed, analysed,
2426 interpreted, and freed. There are several, so let's just start with
2427 the easy ones and work our way up.
2431 We have already met values as separate objects. When manifest
2432 constants appear in the program text, that must result in an executable
2433 which has a constant value. So the `val` structure embeds a value in
2446 ###### ast functions
2447 struct val *new_val(struct type *T, struct token tk)
2449 struct val *v = new_pos(val, tk);
2460 $0 = new_val(Tbool, $1);
2464 $0 = new_val(Tbool, $1);
2468 $0 = new_val(Tnum, $1);
2471 if (number_parse($0->val.num, tail, $1.txt) == 0)
2472 mpq_init($0->val.num); // UNTESTED
2474 tok_err(c, "error: unsupported number suffix",
2479 $0 = new_val(Tstr, $1);
2482 string_parse(&$1, '\\', &$0->val.str, tail);
2484 tok_err(c, "error: unsupported string suffix",
2489 $0 = new_val(Tstr, $1);
2492 string_parse(&$1, '\\', &$0->val.str, tail);
2494 tok_err(c, "error: unsupported string suffix",
2499 ###### print exec cases
2502 struct val *v = cast(val, e);
2503 if (v->vtype == Tstr)
2505 print_value(v->vtype, &v->val);
2506 if (v->vtype == Tstr)
2511 ###### propagate exec cases
2514 struct val *val = cast(val, prog);
2515 if (!type_compat(type, val->vtype, rules))
2516 type_err(c, "error: expected %1%r found %2",
2517 prog, type, rules, val->vtype);
2521 ###### interp exec cases
2523 rvtype = cast(val, e)->vtype;
2524 dup_value(rvtype, &cast(val, e)->val, &rv);
2527 ###### ast functions
2528 static void free_val(struct val *v)
2531 free_value(v->vtype, &v->val);
2535 ###### free exec cases
2536 case Xval: free_val(cast(val, e)); break;
2538 ###### ast functions
2539 // Move all nodes from 'b' to 'rv', reversing their order.
2540 // In 'b' 'left' is a list, and 'right' is the last node.
2541 // In 'rv', left' is the first node and 'right' is a list.
2542 static struct binode *reorder_bilist(struct binode *b)
2544 struct binode *rv = NULL;
2547 struct exec *t = b->right;
2551 b = cast(binode, b->left);
2561 Just as we used a `val` to wrap a value into an `exec`, we similarly
2562 need a `var` to wrap a `variable` into an exec. While each `val`
2563 contained a copy of the value, each `var` holds a link to the variable
2564 because it really is the same variable no matter where it appears.
2565 When a variable is used, we need to remember to follow the `->merged`
2566 link to find the primary instance.
2574 struct variable *var;
2582 VariableDecl -> IDENTIFIER : ${ {
2583 struct variable *v = var_decl(c, $1.txt);
2584 $0 = new_pos(var, $1);
2589 v = var_ref(c, $1.txt);
2591 type_err(c, "error: variable '%v' redeclared",
2593 type_err(c, "info: this is where '%v' was first declared",
2594 v->where_decl, NULL, 0, NULL);
2597 | IDENTIFIER :: ${ {
2598 struct variable *v = var_decl(c, $1.txt);
2599 $0 = new_pos(var, $1);
2605 v = var_ref(c, $1.txt);
2607 type_err(c, "error: variable '%v' redeclared",
2609 type_err(c, "info: this is where '%v' was first declared",
2610 v->where_decl, NULL, 0, NULL);
2613 | IDENTIFIER : Type ${ {
2614 struct variable *v = var_decl(c, $1.txt);
2615 $0 = new_pos(var, $1);
2622 v = var_ref(c, $1.txt);
2624 type_err(c, "error: variable '%v' redeclared",
2626 type_err(c, "info: this is where '%v' was first declared",
2627 v->where_decl, NULL, 0, NULL);
2630 | IDENTIFIER :: Type ${ {
2631 struct variable *v = var_decl(c, $1.txt);
2632 $0 = new_pos(var, $1);
2640 v = var_ref(c, $1.txt);
2642 type_err(c, "error: variable '%v' redeclared",
2644 type_err(c, "info: this is where '%v' was first declared",
2645 v->where_decl, NULL, 0, NULL);
2650 Variable -> IDENTIFIER ${ {
2651 struct variable *v = var_ref(c, $1.txt);
2652 $0 = new_pos(var, $1);
2654 /* This might be a label - allocate a var just in case */
2655 v = var_decl(c, $1.txt);
2662 cast(var, $0)->var = v;
2666 ###### print exec cases
2669 struct var *v = cast(var, e);
2671 struct binding *b = v->var->name;
2672 printf("%.*s", b->name.len, b->name.txt);
2679 if (loc && loc->type == Xvar) {
2680 struct var *v = cast(var, loc);
2682 struct binding *b = v->var->name;
2683 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2685 fputs("???", stderr); // NOTEST
2687 fputs("NOTVAR", stderr); // NOTEST
2690 ###### propagate exec cases
2694 struct var *var = cast(var, prog);
2695 struct variable *v = var->var;
2697 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2698 return Tnone; // NOTEST
2701 if (v->constant && (rules & Rnoconstant)) {
2702 type_err(c, "error: Cannot assign to a constant: %v",
2703 prog, NULL, 0, NULL);
2704 type_err(c, "info: name was defined as a constant here",
2705 v->where_decl, NULL, 0, NULL);
2708 if (v->type == Tnone && v->where_decl == prog)
2709 type_err(c, "error: variable used but not declared: %v",
2710 prog, NULL, 0, NULL);
2711 if (v->type == NULL) {
2712 if (type && *ok != 0) {
2714 v->where_set = prog;
2719 if (!type_compat(type, v->type, rules)) {
2720 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2721 type, rules, v->type);
2722 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2723 v->type, rules, NULL);
2730 ###### interp exec cases
2733 struct var *var = cast(var, e);
2734 struct variable *v = var->var;
2737 lrv = var_value(c, v);
2742 ###### ast functions
2744 static void free_var(struct var *v)
2749 ###### free exec cases
2750 case Xvar: free_var(cast(var, e)); break;
2752 ### Expressions: Conditional
2754 Our first user of the `binode` will be conditional expressions, which
2755 is a bit odd as they actually have three components. That will be
2756 handled by having 2 binodes for each expression. The conditional
2757 expression is the lowest precedence operator which is why we define it
2758 first - to start the precedence list.
2760 Conditional expressions are of the form "value `if` condition `else`
2761 other_value". They associate to the right, so everything to the right
2762 of `else` is part of an else value, while only a higher-precedence to
2763 the left of `if` is the if values. Between `if` and `else` there is no
2764 room for ambiguity, so a full conditional expression is allowed in
2776 Expression -> Expression if Expression else Expression $$ifelse ${ {
2777 struct binode *b1 = new(binode);
2778 struct binode *b2 = new(binode);
2787 ## expression grammar
2789 ###### print binode cases
2792 b2 = cast(binode, b->right);
2793 if (bracket) printf("(");
2794 print_exec(b2->left, -1, bracket);
2796 print_exec(b->left, -1, bracket);
2798 print_exec(b2->right, -1, bracket);
2799 if (bracket) printf(")");
2802 ###### propagate binode cases
2805 /* cond must be Tbool, others must match */
2806 struct binode *b2 = cast(binode, b->right);
2809 propagate_types(b->left, c, ok, Tbool, 0);
2810 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2811 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2815 ###### interp binode cases
2818 struct binode *b2 = cast(binode, b->right);
2819 left = interp_exec(c, b->left, <ype);
2821 rv = interp_exec(c, b2->left, &rvtype); // UNTESTED
2823 rv = interp_exec(c, b2->right, &rvtype);
2829 We take a brief detour, now that we have expressions, to describe lists
2830 of expressions. These will be needed for function parameters and
2831 possibly other situations. They seem generic enough to introduce here
2832 to be used elsewhere.
2834 And ExpressionList will use the `List` type of `binode`, building up at
2835 the end. And place where they are used will probably call
2836 `reorder_bilist()` to get a more normal first/next arrangement.
2838 ###### declare terminals
2841 `List` execs have no implicit semantics, so they are never propagated or
2842 interpreted. The can be printed as a comma separate list, which is how
2843 they are parsed. Note they are also used for function formal parameter
2844 lists. In that case a separate function is used to print them.
2846 ###### print binode cases
2850 print_exec(b->left, -1, bracket);
2853 b = cast(binode, b->right);
2857 ###### propagate binode cases
2858 case List: abort(); // NOTEST
2859 ###### interp binode cases
2860 case List: abort(); // NOTEST
2865 ExpressionList -> ExpressionList , Expression ${
2878 ### Expressions: Boolean
2880 The next class of expressions to use the `binode` will be Boolean
2881 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2882 have same corresponding precendence. The difference is that they don't
2883 evaluate the second expression if not necessary.
2892 ###### expr precedence
2897 ###### expression grammar
2898 | Expression or Expression ${ {
2899 struct binode *b = new(binode);
2905 | Expression or else Expression ${ {
2906 struct binode *b = new(binode);
2913 | Expression and Expression ${ {
2914 struct binode *b = new(binode);
2920 | Expression and then Expression ${ {
2921 struct binode *b = new(binode);
2928 | not Expression ${ {
2929 struct binode *b = new(binode);
2935 ###### print binode cases
2937 if (bracket) printf("(");
2938 print_exec(b->left, -1, bracket);
2940 print_exec(b->right, -1, bracket);
2941 if (bracket) printf(")");
2944 if (bracket) printf("(");
2945 print_exec(b->left, -1, bracket);
2946 printf(" and then ");
2947 print_exec(b->right, -1, bracket);
2948 if (bracket) printf(")");
2951 if (bracket) printf("(");
2952 print_exec(b->left, -1, bracket);
2954 print_exec(b->right, -1, bracket);
2955 if (bracket) printf(")");
2958 if (bracket) printf("(");
2959 print_exec(b->left, -1, bracket);
2960 printf(" or else ");
2961 print_exec(b->right, -1, bracket);
2962 if (bracket) printf(")");
2965 if (bracket) printf("(");
2967 print_exec(b->right, -1, bracket);
2968 if (bracket) printf(")");
2971 ###### propagate binode cases
2977 /* both must be Tbool, result is Tbool */
2978 propagate_types(b->left, c, ok, Tbool, 0);
2979 propagate_types(b->right, c, ok, Tbool, 0);
2980 if (type && type != Tbool)
2981 type_err(c, "error: %1 operation found where %2 expected", prog,
2985 ###### interp binode cases
2987 rv = interp_exec(c, b->left, &rvtype);
2988 right = interp_exec(c, b->right, &rtype);
2989 rv.bool = rv.bool && right.bool;
2992 rv = interp_exec(c, b->left, &rvtype);
2994 rv = interp_exec(c, b->right, NULL);
2997 rv = interp_exec(c, b->left, &rvtype);
2998 right = interp_exec(c, b->right, &rtype);
2999 rv.bool = rv.bool || right.bool;
3002 rv = interp_exec(c, b->left, &rvtype);
3004 rv = interp_exec(c, b->right, NULL);
3007 rv = interp_exec(c, b->right, &rvtype);
3011 ### Expressions: Comparison
3013 Of slightly higher precedence that Boolean expressions are Comparisons.
3014 A comparison takes arguments of any comparable type, but the two types
3017 To simplify the parsing we introduce an `eop` which can record an
3018 expression operator, and the `CMPop` non-terminal will match one of them.
3025 ###### ast functions
3026 static void free_eop(struct eop *e)
3040 ###### expr precedence
3041 $LEFT < > <= >= == != CMPop
3043 ###### expression grammar
3044 | Expression CMPop Expression ${ {
3045 struct binode *b = new(binode);
3055 CMPop -> < ${ $0.op = Less; }$
3056 | > ${ $0.op = Gtr; }$
3057 | <= ${ $0.op = LessEq; }$
3058 | >= ${ $0.op = GtrEq; }$
3059 | == ${ $0.op = Eql; }$
3060 | != ${ $0.op = NEql; }$
3062 ###### print binode cases
3070 if (bracket) printf("(");
3071 print_exec(b->left, -1, bracket);
3073 case Less: printf(" < "); break;
3074 case LessEq: printf(" <= "); break;
3075 case Gtr: printf(" > "); break;
3076 case GtrEq: printf(" >= "); break;
3077 case Eql: printf(" == "); break;
3078 case NEql: printf(" != "); break;
3079 default: abort(); // NOTEST
3081 print_exec(b->right, -1, bracket);
3082 if (bracket) printf(")");
3085 ###### propagate binode cases
3092 /* Both must match but not be labels, result is Tbool */
3093 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
3095 propagate_types(b->right, c, ok, t, 0);
3097 t = propagate_types(b->right, c, ok, NULL, Rnolabel); // UNTESTED
3099 t = propagate_types(b->left, c, ok, t, 0); // UNTESTED
3101 if (!type_compat(type, Tbool, 0))
3102 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
3103 Tbool, rules, type);
3106 ###### interp binode cases
3115 left = interp_exec(c, b->left, <ype);
3116 right = interp_exec(c, b->right, &rtype);
3117 cmp = value_cmp(ltype, rtype, &left, &right);
3120 case Less: rv.bool = cmp < 0; break;
3121 case LessEq: rv.bool = cmp <= 0; break;
3122 case Gtr: rv.bool = cmp > 0; break;
3123 case GtrEq: rv.bool = cmp >= 0; break;
3124 case Eql: rv.bool = cmp == 0; break;
3125 case NEql: rv.bool = cmp != 0; break;
3126 default: rv.bool = 0; break; // NOTEST
3131 ### Expressions: The rest
3133 The remaining expressions with the highest precedence are arithmetic,
3134 string concatenation, and string conversion. String concatenation
3135 (`++`) has the same precedence as multiplication and division, but lower
3138 String conversion is a temporary feature until I get a better type
3139 system. `$` is a prefix operator which expects a string and returns
3142 `+` and `-` are both infix and prefix operations (where they are
3143 absolute value and negation). These have different operator names.
3145 We also have a 'Bracket' operator which records where parentheses were
3146 found. This makes it easy to reproduce these when printing. Possibly I
3147 should only insert brackets were needed for precedence.
3157 ###### expr precedence
3163 ###### expression grammar
3164 | Expression Eop Expression ${ {
3165 struct binode *b = new(binode);
3172 | Expression Top Expression ${ {
3173 struct binode *b = new(binode);
3180 | ( Expression ) ${ {
3181 struct binode *b = new_pos(binode, $1);
3186 | Uop Expression ${ {
3187 struct binode *b = new(binode);
3192 | Value ${ $0 = $<1; }$
3193 | Variable ${ $0 = $<1; }$
3196 Eop -> + ${ $0.op = Plus; }$
3197 | - ${ $0.op = Minus; }$
3199 Uop -> + ${ $0.op = Absolute; }$
3200 | - ${ $0.op = Negate; }$
3201 | $ ${ $0.op = StringConv; }$
3203 Top -> * ${ $0.op = Times; }$
3204 | / ${ $0.op = Divide; }$
3205 | % ${ $0.op = Rem; }$
3206 | ++ ${ $0.op = Concat; }$
3208 ###### print binode cases
3215 if (bracket) printf("(");
3216 print_exec(b->left, indent, bracket);
3218 case Plus: fputs(" + ", stdout); break;
3219 case Minus: fputs(" - ", stdout); break;
3220 case Times: fputs(" * ", stdout); break;
3221 case Divide: fputs(" / ", stdout); break;
3222 case Rem: fputs(" % ", stdout); break;
3223 case Concat: fputs(" ++ ", stdout); break;
3224 default: abort(); // NOTEST
3226 print_exec(b->right, indent, bracket);
3227 if (bracket) printf(")");
3232 if (bracket) printf("(");
3234 case Absolute: fputs("+", stdout); break;
3235 case Negate: fputs("-", stdout); break;
3236 case StringConv: fputs("$", stdout); break;
3237 default: abort(); // NOTEST
3239 print_exec(b->right, indent, bracket);
3240 if (bracket) printf(")");
3244 print_exec(b->right, indent, bracket);
3248 ###### propagate binode cases
3254 /* both must be numbers, result is Tnum */
3257 /* as propagate_types ignores a NULL,
3258 * unary ops fit here too */
3259 propagate_types(b->left, c, ok, Tnum, 0);
3260 propagate_types(b->right, c, ok, Tnum, 0);
3261 if (!type_compat(type, Tnum, 0))
3262 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
3267 /* both must be Tstr, result is Tstr */
3268 propagate_types(b->left, c, ok, Tstr, 0);
3269 propagate_types(b->right, c, ok, Tstr, 0);
3270 if (!type_compat(type, Tstr, 0))
3271 type_err(c, "error: Concat returns %1 but %2 expected", prog,
3276 /* op must be string, result is number */
3277 propagate_types(b->left, c, ok, Tstr, 0);
3278 if (!type_compat(type, Tnum, 0))
3279 type_err(c, // UNTESTED
3280 "error: Can only convert string to number, not %1",
3281 prog, type, 0, NULL);
3285 return propagate_types(b->right, c, ok, type, 0);
3287 ###### interp binode cases
3290 rv = interp_exec(c, b->left, &rvtype);
3291 right = interp_exec(c, b->right, &rtype);
3292 mpq_add(rv.num, rv.num, right.num);
3295 rv = interp_exec(c, b->left, &rvtype);
3296 right = interp_exec(c, b->right, &rtype);
3297 mpq_sub(rv.num, rv.num, right.num);
3300 rv = interp_exec(c, b->left, &rvtype);
3301 right = interp_exec(c, b->right, &rtype);
3302 mpq_mul(rv.num, rv.num, right.num);
3305 rv = interp_exec(c, b->left, &rvtype);
3306 right = interp_exec(c, b->right, &rtype);
3307 mpq_div(rv.num, rv.num, right.num);
3312 left = interp_exec(c, b->left, <ype);
3313 right = interp_exec(c, b->right, &rtype);
3314 mpz_init(l); mpz_init(r); mpz_init(rem);
3315 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
3316 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
3317 mpz_tdiv_r(rem, l, r);
3318 val_init(Tnum, &rv);
3319 mpq_set_z(rv.num, rem);
3320 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
3325 rv = interp_exec(c, b->right, &rvtype);
3326 mpq_neg(rv.num, rv.num);
3329 rv = interp_exec(c, b->right, &rvtype);
3330 mpq_abs(rv.num, rv.num);
3333 rv = interp_exec(c, b->right, &rvtype);
3336 left = interp_exec(c, b->left, <ype);
3337 right = interp_exec(c, b->right, &rtype);
3339 rv.str = text_join(left.str, right.str);
3342 right = interp_exec(c, b->right, &rvtype);
3346 struct text tx = right.str;
3349 if (tx.txt[0] == '-') {
3350 neg = 1; // UNTESTED
3351 tx.txt++; // UNTESTED
3352 tx.len--; // UNTESTED
3354 if (number_parse(rv.num, tail, tx) == 0)
3355 mpq_init(rv.num); // UNTESTED
3357 mpq_neg(rv.num, rv.num); // UNTESTED
3359 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt); // UNTESTED
3363 ###### value functions
3365 static struct text text_join(struct text a, struct text b)
3368 rv.len = a.len + b.len;
3369 rv.txt = malloc(rv.len);
3370 memcpy(rv.txt, a.txt, a.len);
3371 memcpy(rv.txt+a.len, b.txt, b.len);
3375 ### Blocks, Statements, and Statement lists.
3377 Now that we have expressions out of the way we need to turn to
3378 statements. There are simple statements and more complex statements.
3379 Simple statements do not contain (syntactic) newlines, complex statements do.
3381 Statements often come in sequences and we have corresponding simple
3382 statement lists and complex statement lists.
3383 The former comprise only simple statements separated by semicolons.
3384 The later comprise complex statements and simple statement lists. They are
3385 separated by newlines. Thus the semicolon is only used to separate
3386 simple statements on the one line. This may be overly restrictive,
3387 but I'm not sure I ever want a complex statement to share a line with
3390 Note that a simple statement list can still use multiple lines if
3391 subsequent lines are indented, so
3393 ###### Example: wrapped simple statement list
3398 is a single simple statement list. This might allow room for
3399 confusion, so I'm not set on it yet.
3401 A simple statement list needs no extra syntax. A complex statement
3402 list has two syntactic forms. It can be enclosed in braces (much like
3403 C blocks), or it can be introduced by an indent and continue until an
3404 unindented newline (much like Python blocks). With this extra syntax
3405 it is referred to as a block.
3407 Note that a block does not have to include any newlines if it only
3408 contains simple statements. So both of:
3410 if condition: a=b; d=f
3412 if condition { a=b; print f }
3416 In either case the list is constructed from a `binode` list with
3417 `Block` as the operator. When parsing the list it is most convenient
3418 to append to the end, so a list is a list and a statement. When using
3419 the list it is more convenient to consider a list to be a statement
3420 and a list. So we need a function to re-order a list.
3421 `reorder_bilist` serves this purpose.
3423 The only stand-alone statement we introduce at this stage is `pass`
3424 which does nothing and is represented as a `NULL` pointer in a `Block`
3425 list. Other stand-alone statements will follow once the infrastructure
3436 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3437 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3438 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3439 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3440 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3442 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3443 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3444 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3445 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3446 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3448 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3449 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3450 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3452 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3453 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3454 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3455 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3456 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3458 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3460 ComplexStatements -> ComplexStatements ComplexStatement ${
3470 | ComplexStatement ${
3482 ComplexStatement -> SimpleStatements Newlines ${
3483 $0 = reorder_bilist($<SS);
3485 | SimpleStatements ; Newlines ${
3486 $0 = reorder_bilist($<SS);
3488 ## ComplexStatement Grammar
3491 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3497 | SimpleStatement ${
3505 SimpleStatement -> pass ${ $0 = NULL; }$
3506 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3507 ## SimpleStatement Grammar
3509 ###### print binode cases
3513 if (b->left == NULL) // UNTESTED
3514 printf("pass"); // UNTESTED
3516 print_exec(b->left, indent, bracket); // UNTESTED
3517 if (b->right) { // UNTESTED
3518 printf("; "); // UNTESTED
3519 print_exec(b->right, indent, bracket); // UNTESTED
3522 // block, one per line
3523 if (b->left == NULL)
3524 do_indent(indent, "pass\n");
3526 print_exec(b->left, indent, bracket);
3528 print_exec(b->right, indent, bracket);
3532 ###### propagate binode cases
3535 /* If any statement returns something other than Tnone
3536 * or Tbool then all such must return same type.
3537 * As each statement may be Tnone or something else,
3538 * we must always pass NULL (unknown) down, otherwise an incorrect
3539 * error might occur. We never return Tnone unless it is
3544 for (e = b; e; e = cast(binode, e->right)) {
3545 t = propagate_types(e->left, c, ok, NULL, rules);
3546 if ((rules & Rboolok) && t == Tbool)
3548 if (t && t != Tnone && t != Tbool) {
3552 type_err(c, "error: expected %1%r, found %2",
3553 e->left, type, rules, t);
3559 ###### interp binode cases
3561 while (rvtype == Tnone &&
3564 rv = interp_exec(c, b->left, &rvtype);
3565 b = cast(binode, b->right);
3569 ### The Print statement
3571 `print` is a simple statement that takes a comma-separated list of
3572 expressions and prints the values separated by spaces and terminated
3573 by a newline. No control of formatting is possible.
3575 `print` uses `ExpressionList` to collect the expressions and stores them
3576 on the left side of a `Print` binode unlessthere is a trailing comma
3577 when the list is stored on the `right` side and no trailing newline is
3583 ##### expr precedence
3586 ###### SimpleStatement Grammar
3588 | print ExpressionList ${
3592 $0->left = reorder_bilist($<EL);
3594 | print ExpressionList , ${ {
3597 $0->right = reorder_bilist($<EL);
3607 ###### print binode cases
3610 do_indent(indent, "print");
3612 print_exec(b->right, -1, bracket);
3615 print_exec(b->left, -1, bracket);
3620 ###### propagate binode cases
3623 /* don't care but all must be consistent */
3625 b = cast(binode, b->left);
3627 b = cast(binode, b->right);
3629 propagate_types(b->left, c, ok, NULL, Rnolabel);
3630 b = cast(binode, b->right);
3634 ###### interp binode cases
3638 struct binode *b2 = cast(binode, b->left);
3640 b2 = cast(binode, b->right);
3641 for (; b2; b2 = cast(binode, b2->right)) {
3642 left = interp_exec(c, b2->left, <ype);
3643 print_value(ltype, &left);
3644 free_value(ltype, &left);
3648 if (b->right == NULL)
3654 ###### Assignment statement
3656 An assignment will assign a value to a variable, providing it hasn't
3657 been declared as a constant. The analysis phase ensures that the type
3658 will be correct so the interpreter just needs to perform the
3659 calculation. There is a form of assignment which declares a new
3660 variable as well as assigning a value. If a name is assigned before
3661 it is declared, and error will be raised as the name is created as
3662 `Tlabel` and it is illegal to assign to such names.
3668 ###### declare terminals
3671 ###### SimpleStatement Grammar
3672 | Variable = Expression ${
3678 | VariableDecl = Expression ${
3686 if ($1->var->where_set == NULL) {
3688 "Variable declared with no type or value: %v",
3698 ###### print binode cases
3701 do_indent(indent, "");
3702 print_exec(b->left, indent, bracket);
3704 print_exec(b->right, indent, bracket);
3711 struct variable *v = cast(var, b->left)->var;
3712 do_indent(indent, "");
3713 print_exec(b->left, indent, bracket);
3714 if (cast(var, b->left)->var->constant) {
3716 if (v->where_decl == v->where_set) {
3717 type_print(v->type, stdout);
3722 if (v->where_decl == v->where_set) {
3723 type_print(v->type, stdout);
3729 print_exec(b->right, indent, bracket);
3736 ###### propagate binode cases
3740 /* Both must match and not be labels,
3741 * Type must support 'dup',
3742 * For Assign, left must not be constant.
3745 t = propagate_types(b->left, c, ok, NULL,
3746 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3751 if (propagate_types(b->right, c, ok, t, 0) != t)
3752 if (b->left->type == Xvar)
3753 type_err(c, "info: variable '%v' was set as %1 here.",
3754 cast(var, b->left)->var->where_set, t, rules, NULL);
3756 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3758 propagate_types(b->left, c, ok, t,
3759 (b->op == Assign ? Rnoconstant : 0));
3761 if (t && t->dup == NULL)
3762 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3767 ###### interp binode cases
3770 lleft = linterp_exec(c, b->left, <ype);
3771 right = interp_exec(c, b->right, &rtype);
3773 free_value(ltype, lleft);
3774 dup_value(ltype, &right, lleft);
3781 struct variable *v = cast(var, b->left)->var;
3784 val = var_value(c, v);
3785 if (v->type->prepare_type)
3786 v->type->prepare_type(c, v->type, 0);
3788 right = interp_exec(c, b->right, &rtype);
3789 memcpy(val, &right, rtype->size);
3792 val_init(v->type, val);
3797 ### The `use` statement
3799 The `use` statement is the last "simple" statement. It is needed when
3800 the condition in a conditional statement is a block. `use` works much
3801 like `return` in C, but only completes the `condition`, not the whole
3807 ###### expr precedence
3810 ###### SimpleStatement Grammar
3812 $0 = new_pos(binode, $1);
3815 if ($0->right->type == Xvar) {
3816 struct var *v = cast(var, $0->right);
3817 if (v->var->type == Tnone) {
3818 /* Convert this to a label */
3821 v->var->type = Tlabel;
3822 val = global_alloc(c, Tlabel, v->var, NULL);
3828 ###### print binode cases
3831 do_indent(indent, "use ");
3832 print_exec(b->right, -1, bracket);
3837 ###### propagate binode cases
3840 /* result matches value */
3841 return propagate_types(b->right, c, ok, type, 0);
3843 ###### interp binode cases
3846 rv = interp_exec(c, b->right, &rvtype);
3849 ### The Conditional Statement
3851 This is the biggy and currently the only complex statement. This
3852 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3853 It is comprised of a number of parts, all of which are optional though
3854 set combinations apply. Each part is (usually) a key word (`then` is
3855 sometimes optional) followed by either an expression or a code block,
3856 except the `casepart` which is a "key word and an expression" followed
3857 by a code block. The code-block option is valid for all parts and,
3858 where an expression is also allowed, the code block can use the `use`
3859 statement to report a value. If the code block does not report a value
3860 the effect is similar to reporting `True`.
3862 The `else` and `case` parts, as well as `then` when combined with
3863 `if`, can contain a `use` statement which will apply to some
3864 containing conditional statement. `for` parts, `do` parts and `then`
3865 parts used with `for` can never contain a `use`, except in some
3866 subordinate conditional statement.
3868 If there is a `forpart`, it is executed first, only once.
3869 If there is a `dopart`, then it is executed repeatedly providing
3870 always that the `condpart` or `cond`, if present, does not return a non-True
3871 value. `condpart` can fail to return any value if it simply executes
3872 to completion. This is treated the same as returning `True`.
3874 If there is a `thenpart` it will be executed whenever the `condpart`
3875 or `cond` returns True (or does not return any value), but this will happen
3876 *after* `dopart` (when present).
3878 If `elsepart` is present it will be executed at most once when the
3879 condition returns `False` or some value that isn't `True` and isn't
3880 matched by any `casepart`. If there are any `casepart`s, they will be
3881 executed when the condition returns a matching value.
3883 The particular sorts of values allowed in case parts has not yet been
3884 determined in the language design, so nothing is prohibited.
3886 The various blocks in this complex statement potentially provide scope
3887 for variables as described earlier. Each such block must include the
3888 "OpenScope" nonterminal before parsing the block, and must call
3889 `var_block_close()` when closing the block.
3891 The code following "`if`", "`switch`" and "`for`" does not get its own
3892 scope, but is in a scope covering the whole statement, so names
3893 declared there cannot be redeclared elsewhere. Similarly the
3894 condition following "`while`" is in a scope the covers the body
3895 ("`do`" part) of the loop, and which does not allow conditional scope
3896 extension. Code following "`then`" (both looping and non-looping),
3897 "`else`" and "`case`" each get their own local scope.
3899 The type requirements on the code block in a `whilepart` are quite
3900 unusal. It is allowed to return a value of some identifiable type, in
3901 which case the loop aborts and an appropriate `casepart` is run, or it
3902 can return a Boolean, in which case the loop either continues to the
3903 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3904 This is different both from the `ifpart` code block which is expected to
3905 return a Boolean, or the `switchpart` code block which is expected to
3906 return the same type as the casepart values. The correct analysis of
3907 the type of the `whilepart` code block is the reason for the
3908 `Rboolok` flag which is passed to `propagate_types()`.
3910 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3911 defined. As there are two scopes which cover multiple parts - one for
3912 the whole statement and one for "while" and "do" - and as we will use
3913 the 'struct exec' to track scopes, we actually need two new types of
3914 exec. One is a `binode` for the looping part, the rest is the
3915 `cond_statement`. The `cond_statement` will use an auxilliary `struct
3916 casepart` to track a list of case parts.
3927 struct exec *action;
3928 struct casepart *next;
3930 struct cond_statement {
3932 struct exec *forpart, *condpart, *thenpart, *elsepart;
3933 struct binode *looppart;
3934 struct casepart *casepart;
3937 ###### ast functions
3939 static void free_casepart(struct casepart *cp)
3943 free_exec(cp->value);
3944 free_exec(cp->action);
3951 static void free_cond_statement(struct cond_statement *s)
3955 free_exec(s->forpart);
3956 free_exec(s->condpart);
3957 free_exec(s->looppart);
3958 free_exec(s->thenpart);
3959 free_exec(s->elsepart);
3960 free_casepart(s->casepart);
3964 ###### free exec cases
3965 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3967 ###### ComplexStatement Grammar
3968 | CondStatement ${ $0 = $<1; }$
3970 ###### expr precedence
3971 $TERM for then while do
3978 // A CondStatement must end with EOL, as does CondSuffix and
3980 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3981 // may or may not end with EOL
3982 // WhilePart and IfPart include an appropriate Suffix
3984 // ForPart, SwitchPart, and IfPart open scopes, o we have to close
3985 // them. WhilePart opens and closes its own scope.
3986 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3989 $0->thenpart = $<TP;
3990 $0->looppart = $<WP;
3991 var_block_close(c, CloseSequential, $0);
3993 | ForPart OptNL WhilePart CondSuffix ${
3996 $0->looppart = $<WP;
3997 var_block_close(c, CloseSequential, $0);
3999 | WhilePart CondSuffix ${
4001 $0->looppart = $<WP;
4003 | SwitchPart OptNL CasePart CondSuffix ${
4005 $0->condpart = $<SP;
4006 $CP->next = $0->casepart;
4007 $0->casepart = $<CP;
4008 var_block_close(c, CloseSequential, $0);
4010 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
4012 $0->condpart = $<SP;
4013 $CP->next = $0->casepart;
4014 $0->casepart = $<CP;
4015 var_block_close(c, CloseSequential, $0);
4017 | IfPart IfSuffix ${
4019 $0->condpart = $IP.condpart; $IP.condpart = NULL;
4020 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
4021 // This is where we close an "if" statement
4022 var_block_close(c, CloseSequential, $0);
4025 CondSuffix -> IfSuffix ${
4028 | Newlines CasePart CondSuffix ${
4030 $CP->next = $0->casepart;
4031 $0->casepart = $<CP;
4033 | CasePart CondSuffix ${
4035 $CP->next = $0->casepart;
4036 $0->casepart = $<CP;
4039 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
4040 | Newlines ElsePart ${ $0 = $<EP; }$
4041 | ElsePart ${$0 = $<EP; }$
4043 ElsePart -> else OpenBlock Newlines ${
4044 $0 = new(cond_statement);
4045 $0->elsepart = $<OB;
4046 var_block_close(c, CloseElse, $0->elsepart);
4048 | else OpenScope CondStatement ${
4049 $0 = new(cond_statement);
4050 $0->elsepart = $<CS;
4051 var_block_close(c, CloseElse, $0->elsepart);
4055 CasePart -> case Expression OpenScope ColonBlock ${
4056 $0 = calloc(1,sizeof(struct casepart));
4059 var_block_close(c, CloseParallel, $0->action);
4063 // These scopes are closed in CondStatement
4064 ForPart -> for OpenBlock ${
4068 ThenPart -> then OpenBlock ${
4070 var_block_close(c, CloseSequential, $0);
4074 // This scope is closed in CondStatement
4075 WhilePart -> while UseBlock OptNL do OpenBlock ${
4080 var_block_close(c, CloseSequential, $0->right);
4081 var_block_close(c, CloseSequential, $0);
4083 | while OpenScope Expression OpenScope ColonBlock ${
4088 var_block_close(c, CloseSequential, $0->right);
4089 var_block_close(c, CloseSequential, $0);
4093 IfPart -> if UseBlock OptNL then OpenBlock ${
4096 var_block_close(c, CloseParallel, $0.thenpart);
4098 | if OpenScope Expression OpenScope ColonBlock ${
4101 var_block_close(c, CloseParallel, $0.thenpart);
4103 | if OpenScope Expression OpenScope OptNL then Block ${
4106 var_block_close(c, CloseParallel, $0.thenpart);
4110 // This scope is closed in CondStatement
4111 SwitchPart -> switch OpenScope Expression ${
4114 | switch UseBlock ${
4118 ###### print binode cases
4120 if (b->left && b->left->type == Xbinode &&
4121 cast(binode, b->left)->op == Block) {
4123 do_indent(indent, "while {\n");
4125 do_indent(indent, "while\n");
4126 print_exec(b->left, indent+1, bracket);
4128 do_indent(indent, "} do {\n");
4130 do_indent(indent, "do\n");
4131 print_exec(b->right, indent+1, bracket);
4133 do_indent(indent, "}\n");
4135 do_indent(indent, "while ");
4136 print_exec(b->left, 0, bracket);
4141 print_exec(b->right, indent+1, bracket);
4143 do_indent(indent, "}\n");
4147 ###### print exec cases
4149 case Xcond_statement:
4151 struct cond_statement *cs = cast(cond_statement, e);
4152 struct casepart *cp;
4154 do_indent(indent, "for");
4155 if (bracket) printf(" {\n"); else printf("\n");
4156 print_exec(cs->forpart, indent+1, bracket);
4159 do_indent(indent, "} then {\n");
4161 do_indent(indent, "then\n");
4162 print_exec(cs->thenpart, indent+1, bracket);
4164 if (bracket) do_indent(indent, "}\n");
4167 print_exec(cs->looppart, indent, bracket);
4171 do_indent(indent, "switch");
4173 do_indent(indent, "if");
4174 if (cs->condpart && cs->condpart->type == Xbinode &&
4175 cast(binode, cs->condpart)->op == Block) {
4180 print_exec(cs->condpart, indent+1, bracket);
4182 do_indent(indent, "}\n");
4184 do_indent(indent, "then\n");
4185 print_exec(cs->thenpart, indent+1, bracket);
4189 print_exec(cs->condpart, 0, bracket);
4195 print_exec(cs->thenpart, indent+1, bracket);
4197 do_indent(indent, "}\n");
4202 for (cp = cs->casepart; cp; cp = cp->next) {
4203 do_indent(indent, "case ");
4204 print_exec(cp->value, -1, 0);
4209 print_exec(cp->action, indent+1, bracket);
4211 do_indent(indent, "}\n");
4214 do_indent(indent, "else");
4219 print_exec(cs->elsepart, indent+1, bracket);
4221 do_indent(indent, "}\n");
4226 ###### propagate binode cases
4228 t = propagate_types(b->right, c, ok, Tnone, 0);
4229 if (!type_compat(Tnone, t, 0))
4230 *ok = 0; // UNTESTED
4231 return propagate_types(b->left, c, ok, type, rules);
4233 ###### propagate exec cases
4234 case Xcond_statement:
4236 // forpart and looppart->right must return Tnone
4237 // thenpart must return Tnone if there is a loopart,
4238 // otherwise it is like elsepart.
4240 // be bool if there is no casepart
4241 // match casepart->values if there is a switchpart
4242 // either be bool or match casepart->value if there
4244 // elsepart and casepart->action must match the return type
4245 // expected of this statement.
4246 struct cond_statement *cs = cast(cond_statement, prog);
4247 struct casepart *cp;
4249 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
4250 if (!type_compat(Tnone, t, 0))
4251 *ok = 0; // UNTESTED
4254 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
4255 if (!type_compat(Tnone, t, 0))
4256 *ok = 0; // UNTESTED
4258 if (cs->casepart == NULL) {
4259 propagate_types(cs->condpart, c, ok, Tbool, 0);
4260 propagate_types(cs->looppart, c, ok, Tbool, 0);
4262 /* Condpart must match case values, with bool permitted */
4264 for (cp = cs->casepart;
4265 cp && !t; cp = cp->next)
4266 t = propagate_types(cp->value, c, ok, NULL, 0);
4267 if (!t && cs->condpart)
4268 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok); // UNTESTED
4269 if (!t && cs->looppart)
4270 t = propagate_types(cs->looppart, c, ok, NULL, Rboolok); // UNTESTED
4271 // Now we have a type (I hope) push it down
4273 for (cp = cs->casepart; cp; cp = cp->next)
4274 propagate_types(cp->value, c, ok, t, 0);
4275 propagate_types(cs->condpart, c, ok, t, Rboolok);
4276 propagate_types(cs->looppart, c, ok, t, Rboolok);
4279 // (if)then, else, and case parts must return expected type.
4280 if (!cs->looppart && !type)
4281 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
4283 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
4284 for (cp = cs->casepart;
4286 cp = cp->next) // UNTESTED
4287 type = propagate_types(cp->action, c, ok, NULL, rules); // UNTESTED
4290 propagate_types(cs->thenpart, c, ok, type, rules);
4291 propagate_types(cs->elsepart, c, ok, type, rules);
4292 for (cp = cs->casepart; cp ; cp = cp->next)
4293 propagate_types(cp->action, c, ok, type, rules);
4299 ###### interp binode cases
4301 // This just performs one iterration of the loop
4302 rv = interp_exec(c, b->left, &rvtype);
4303 if (rvtype == Tnone ||
4304 (rvtype == Tbool && rv.bool != 0))
4305 // cnd is Tnone or Tbool, doesn't need to be freed
4306 interp_exec(c, b->right, NULL);
4309 ###### interp exec cases
4310 case Xcond_statement:
4312 struct value v, cnd;
4313 struct type *vtype, *cndtype;
4314 struct casepart *cp;
4315 struct cond_statement *cs = cast(cond_statement, e);
4318 interp_exec(c, cs->forpart, NULL);
4320 while ((cnd = interp_exec(c, cs->looppart, &cndtype)),
4321 cndtype == Tnone || (cndtype == Tbool && cnd.bool != 0))
4322 interp_exec(c, cs->thenpart, NULL);
4324 cnd = interp_exec(c, cs->condpart, &cndtype);
4325 if ((cndtype == Tnone ||
4326 (cndtype == Tbool && cnd.bool != 0))) {
4327 // cnd is Tnone or Tbool, doesn't need to be freed
4328 rv = interp_exec(c, cs->thenpart, &rvtype);
4329 // skip else (and cases)
4333 for (cp = cs->casepart; cp; cp = cp->next) {
4334 v = interp_exec(c, cp->value, &vtype);
4335 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
4336 free_value(vtype, &v);
4337 free_value(cndtype, &cnd);
4338 rv = interp_exec(c, cp->action, &rvtype);
4341 free_value(vtype, &v);
4343 free_value(cndtype, &cnd);
4345 rv = interp_exec(c, cs->elsepart, &rvtype);
4352 ### Top level structure
4354 All the language elements so far can be used in various places. Now
4355 it is time to clarify what those places are.
4357 At the top level of a file there will be a number of declarations.
4358 Many of the things that can be declared haven't been described yet,
4359 such as functions, procedures, imports, and probably more.
4360 For now there are two sorts of things that can appear at the top
4361 level. They are predefined constants, `struct` types, and the `main`
4362 function. While the syntax will allow the `main` function to appear
4363 multiple times, that will trigger an error if it is actually attempted.
4365 The various declarations do not return anything. They store the
4366 various declarations in the parse context.
4368 ###### Parser: grammar
4371 Ocean -> OptNL DeclarationList
4373 ## declare terminals
4380 DeclarationList -> Declaration
4381 | DeclarationList Declaration
4383 Declaration -> ERROR Newlines ${
4384 tok_err(c, // UNTESTED
4385 "error: unhandled parse error", &$1);
4391 ## top level grammar
4395 ### The `const` section
4397 As well as being defined in with the code that uses them, constants
4398 can be declared at the top level. These have full-file scope, so they
4399 are always `InScope`. The value of a top level constant can be given
4400 as an expression, and this is evaluated immediately rather than in the
4401 later interpretation stage. Once we add functions to the language, we
4402 will need rules concern which, if any, can be used to define a top
4405 Constants are defined in a section that starts with the reserved word
4406 `const` and then has a block with a list of assignment statements.
4407 For syntactic consistency, these must use the double-colon syntax to
4408 make it clear that they are constants. Type can also be given: if
4409 not, the type will be determined during analysis, as with other
4412 As the types constants are inserted at the head of a list, printing
4413 them in the same order that they were read is not straight forward.
4414 We take a quadratic approach here and count the number of constants
4415 (variables of depth 0), then count down from there, each time
4416 searching through for the Nth constant for decreasing N.
4418 ###### top level grammar
4422 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4423 | const { SimpleConstList } Newlines
4424 | const IN OptNL ConstList OUT Newlines
4425 | const SimpleConstList Newlines
4427 ConstList -> ConstList SimpleConstLine
4429 SimpleConstList -> SimpleConstList ; Const
4432 SimpleConstLine -> SimpleConstList Newlines
4433 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4436 CType -> Type ${ $0 = $<1; }$
4439 Const -> IDENTIFIER :: CType = Expression ${ {
4443 v = var_decl(c, $1.txt);
4445 struct var *var = new_pos(var, $1);
4446 v->where_decl = var;
4451 v = var_ref(c, $1.txt);
4452 tok_err(c, "error: name already declared", &$1);
4453 type_err(c, "info: this is where '%v' was first declared",
4454 v->where_decl, NULL, 0, NULL);
4458 propagate_types($5, c, &ok, $3, 0);
4463 struct value res = interp_exec(c, $5, &v->type);
4464 global_alloc(c, v->type, v, &res);
4468 ###### print const decls
4473 while (target != 0) {
4475 for (v = context.in_scope; v; v=v->in_scope)
4476 if (v->depth == 0) {
4487 struct value *val = var_value(&context, v);
4488 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4489 type_print(v->type, stdout);
4491 if (v->type == Tstr)
4493 print_value(v->type, val);
4494 if (v->type == Tstr)
4502 ### Finally the whole `main` function.
4504 An Ocean program can currently have only one function - `main` - and
4505 that must exist. It expects an array of strings with a provided size.
4506 Following this is a `block` which is the code to execute.
4508 As this is the top level, several things are handled a bit
4510 The function is not interpreted by `interp_exec` as that isn't
4511 passed the argument list which the program requires. Similarly type
4512 analysis is a bit more interesting at this level.
4514 ###### top level grammar
4516 DeclareFunction -> MainFunction ${ {
4518 type_err(c, "\"main\" defined a second time",
4524 ###### print binode cases
4526 do_indent(indent, "func main(");
4527 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4528 struct variable *v = cast(var, b2->left)->var;
4530 print_exec(b2->left, 0, 0);
4532 type_print(v->type, stdout);
4538 print_exec(b->right, indent+1, bracket);
4540 do_indent(indent, "}\n");
4543 ###### propagate binode cases
4544 case Func: abort(); // NOTEST
4546 ###### core functions
4548 static int analyse_prog(struct exec *prog, struct parse_context *c)
4550 struct binode *bp = cast(binode, prog);
4554 struct type *argv_type;
4555 struct text argv_type_name = { " argv", 5 };
4560 argv_type = add_type(c, argv_type_name, &array_prototype);
4561 argv_type->array.member = Tstr;
4562 argv_type->array.unspec = 1;
4564 for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
4568 propagate_types(b->left, c, &ok, argv_type, 0);
4570 default: /* invalid */ // NOTEST
4571 propagate_types(b->left, c, &ok, Tnone, 0); // NOTEST
4577 propagate_types(bp->right, c, &ok, Tnone, 0);
4582 /* Make sure everything is still consistent */
4583 propagate_types(bp->right, c, &ok, Tnone, 0);
4585 return 0; // UNTESTED
4590 static void interp_prog(struct parse_context *c, struct exec *prog,
4591 int argc, char **argv)
4593 struct binode *p = cast(binode, prog);
4601 al = cast(binode, p->left);
4603 struct var *v = cast(var, al->left);
4604 struct value *vl = var_value(c, v->var);
4614 mpq_set_ui(argcq, argc, 1);
4615 memcpy(var_value(c, t->array.vsize), &argcq, sizeof(argcq));
4616 t->prepare_type(c, t, 0);
4617 array_init(v->var->type, vl);
4618 for (i = 0; i < argc; i++) {
4619 struct value *vl2 = vl->array + i * v->var->type->array.member->size;
4622 arg.str.txt = argv[i];
4623 arg.str.len = strlen(argv[i]);
4624 free_value(Tstr, vl2);
4625 dup_value(Tstr, &arg, vl2);
4629 al = cast(binode, al->right);
4631 v = interp_exec(c, p, &vtype);
4632 free_value(vtype, &v);
4635 ###### interp binode cases
4637 rv = interp_exec(c, b->right, &rvtype);
4640 ## And now to test it out.
4642 Having a language requires having a "hello world" program. I'll
4643 provide a little more than that: a program that prints "Hello world"
4644 finds the GCD of two numbers, prints the first few elements of
4645 Fibonacci, performs a binary search for a number, and a few other
4646 things which will likely grow as the languages grows.
4648 ###### File: oceani.mk
4651 @echo "===== DEMO ====="
4652 ./oceani --section "demo: hello" oceani.mdc 55 33
4658 four ::= 2 + 2 ; five ::= 10/2
4659 const pie ::= "I like Pie";
4660 cake ::= "The cake is"
4671 print "Hello World, what lovely oceans you have!"
4672 print "Are there", five, "?"
4673 print pi, pie, "but", cake
4675 A := $argv[1]; B := $argv[2]
4677 /* When a variable is defined in both branches of an 'if',
4678 * and used afterwards, the variables are merged.
4684 print "Is", A, "bigger than", B,"? ", bigger
4685 /* If a variable is not used after the 'if', no
4686 * merge happens, so types can be different
4689 double:string = "yes"
4690 print A, "is more than twice", B, "?", double
4693 print "double", B, "is", double
4698 if a > 0 and then b > 0:
4704 print "GCD of", A, "and", B,"is", a
4706 print a, "is not positive, cannot calculate GCD"
4708 print b, "is not positive, cannot calculate GCD"
4713 print "Fibonacci:", f1,f2,
4714 then togo = togo - 1
4722 /* Binary search... */
4727 mid := (lo + hi) / 2
4740 print "Yay, I found", target
4742 print "Closest I found was", lo
4747 // "middle square" PRNG. Not particularly good, but one my
4748 // Dad taught me - the first one I ever heard of.
4749 for i:=1; then i = i + 1; while i < size:
4750 n := list[i-1] * list[i-1]
4751 list[i] = (n / 100) % 10 000
4753 print "Before sort:",
4754 for i:=0; then i = i + 1; while i < size:
4758 for i := 1; then i=i+1; while i < size:
4759 for j:=i-1; then j=j-1; while j >= 0:
4760 if list[j] > list[j+1]:
4764 print " After sort:",
4765 for i:=0; then i = i + 1; while i < size:
4769 if 1 == 2 then print "yes"; else print "no"
4773 bob.alive = (bob.name == "Hello")
4774 print "bob", "is" if bob.alive else "isn't", "alive"