1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 The "func" clause currently only allows a "main" function to be
45 declared. That will be extended when proper function support is added.
47 An element that is present purely to make a usable language, and
48 without any expectation that they will remain, is the "print" statement
49 which performs simple output.
51 The current scalar types are "number", "Boolean", and "string".
52 Boolean will likely stay in its current form, the other two might, but
53 could just as easily be changed.
57 Versions of the interpreter which obviously do not support a complete
58 language will be named after creeks and streams. This one is Jamison
61 Once we have something reasonably resembling a complete language, the
62 names of rivers will be used.
63 Early versions of the compiler will be named after seas. Major
64 releases of the compiler will be named after oceans. Hopefully I will
65 be finished once I get to the Pacific Ocean release.
69 As well as parsing and executing a program, the interpreter can print
70 out the program from the parsed internal structure. This is useful
71 for validating the parsing.
72 So the main requirements of the interpreter are:
74 - Parse the program, possibly with tracing,
75 - Analyse the parsed program to ensure consistency,
77 - Execute the "main" function in the program, if no parsing or
78 consistency errors were found.
80 This is all performed by a single C program extracted with
83 There will be two formats for printing the program: a default and one
84 that uses bracketing. So a `--bracket` command line option is needed
85 for that. Normally the first code section found is used, however an
86 alternate section can be requested so that a file (such as this one)
87 can contain multiple programs. This is effected with the `--section`
90 This code must be compiled with `-fplan9-extensions` so that anonymous
91 structures can be used.
93 ###### File: oceani.mk
95 myCFLAGS := -Wall -g -fplan9-extensions
96 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
97 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
98 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
100 all :: $(LDLIBS) oceani
101 oceani.c oceani.h : oceani.mdc parsergen
102 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
103 oceani.mk: oceani.mdc md2c
106 oceani: oceani.o $(LDLIBS)
107 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
109 ###### Parser: header
111 struct parse_context;
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 ###### Parser: reduce
131 struct parse_context *c = config2context(config);
139 #include <sys/mman.h>
158 static char Usage[] =
159 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
160 static const struct option long_options[] = {
161 {"trace", 0, NULL, 't'},
162 {"print", 0, NULL, 'p'},
163 {"noexec", 0, NULL, 'n'},
164 {"brackets", 0, NULL, 'b'},
165 {"section", 1, NULL, 's'},
168 const char *options = "tpnbs";
170 static void pr_err(char *msg) // NOTEST
172 fprintf(stderr, "%s\n", msg); // NOTEST
175 int main(int argc, char *argv[])
180 struct section *s, *ss;
181 char *section = NULL;
182 struct parse_context context = {
184 .ignored = (1 << TK_mark),
185 .number_chars = ".,_+- ",
190 int doprint=0, dotrace=0, doexec=1, brackets=0;
192 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
195 case 't': dotrace=1; break;
196 case 'p': doprint=1; break;
197 case 'n': doexec=0; break;
198 case 'b': brackets=1; break;
199 case 's': section = optarg; break;
200 default: fprintf(stderr, Usage);
204 if (optind >= argc) {
205 fprintf(stderr, "oceani: no input file given\n");
208 fd = open(argv[optind], O_RDONLY);
210 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
213 context.file_name = argv[optind];
214 len = lseek(fd, 0, 2);
215 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
216 s = code_extract(file, file+len, pr_err);
218 fprintf(stderr, "oceani: could not find any code in %s\n",
223 ## context initialization
226 for (ss = s; ss; ss = ss->next) {
227 struct text sec = ss->section;
228 if (sec.len == strlen(section) &&
229 strncmp(sec.txt, section, sec.len) == 0)
233 fprintf(stderr, "oceani: cannot find section %s\n",
239 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
242 fprintf(stderr, "oceani: no main function found.\n");
243 context.parse_error = 1;
245 if (context.prog && !context.parse_error) {
246 if (!analyse_prog(context.prog, &context)) {
247 fprintf(stderr, "oceani: type error in program - not running.\n");
248 context.parse_error = 1;
251 if (context.prog && doprint) {
254 print_exec(context.prog, 0, brackets);
256 if (context.prog && doexec && !context.parse_error)
257 interp_prog(&context, context.prog, argc - optind, argv+optind);
258 free_exec(context.prog);
261 struct section *t = s->next;
267 ## free context types
268 ## free context storage
269 exit(context.parse_error ? 1 : 0);
274 The four requirements of parse, analyse, print, interpret apply to
275 each language element individually so that is how most of the code
278 Three of the four are fairly self explanatory. The one that requires
279 a little explanation is the analysis step.
281 The current language design does not require the types of variables to
282 be declared, but they must still have a single type. Different
283 operations impose different requirements on the variables, for example
284 addition requires both arguments to be numeric, and assignment
285 requires the variable on the left to have the same type as the
286 expression on the right.
288 Analysis involves propagating these type requirements around and
289 consequently setting the type of each variable. If any requirements
290 are violated (e.g. a string is compared with a number) or if a
291 variable needs to have two different types, then an error is raised
292 and the program will not run.
294 If the same variable is declared in both branchs of an 'if/else', or
295 in all cases of a 'switch' then the multiple instances may be merged
296 into just one variable if the variable is referenced after the
297 conditional statement. When this happens, the types must naturally be
298 consistent across all the branches. When the variable is not used
299 outside the if, the variables in the different branches are distinct
300 and can be of different types.
302 Undeclared names may only appear in "use" statements and "case" expressions.
303 These names are given a type of "label" and a unique value.
304 This allows them to fill the role of a name in an enumerated type, which
305 is useful for testing the `switch` statement.
307 As we will see, the condition part of a `while` statement can return
308 either a Boolean or some other type. This requires that the expected
309 type that gets passed around comprises a type and a flag to indicate
310 that `Tbool` is also permitted.
312 As there are, as yet, no distinct types that are compatible, there
313 isn't much subtlety in the analysis. When we have distinct number
314 types, this will become more interesting.
318 When analysis discovers an inconsistency it needs to report an error;
319 just refusing to run the code ensures that the error doesn't cascade,
320 but by itself it isn't very useful. A clear understanding of the sort
321 of error message that are useful will help guide the process of
324 At a simplistic level, the only sort of error that type analysis can
325 report is that the type of some construct doesn't match a contextual
326 requirement. For example, in `4 + "hello"` the addition provides a
327 contextual requirement for numbers, but `"hello"` is not a number. In
328 this particular example no further information is needed as the types
329 are obvious from local information. When a variable is involved that
330 isn't the case. It may be helpful to explain why the variable has a
331 particular type, by indicating the location where the type was set,
332 whether by declaration or usage.
334 Using a recursive-descent analysis we can easily detect a problem at
335 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
336 will detect that one argument is not a number and the usage of `hello`
337 will detect that a number was wanted, but not provided. In this
338 (early) version of the language, we will generate error reports at
339 multiple locations, so the use of `hello` will report an error and
340 explain were the value was set, and the addition will report an error
341 and say why numbers are needed. To be able to report locations for
342 errors, each language element will need to record a file location
343 (line and column) and each variable will need to record the language
344 element where its type was set. For now we will assume that each line
345 of an error message indicates one location in the file, and up to 2
346 types. So we provide a `printf`-like function which takes a format, a
347 location (a `struct exec` which has not yet been introduced), and 2
348 types. "`%1`" reports the first type, "`%2`" reports the second. We
349 will need a function to print the location, once we know how that is
350 stored. e As will be explained later, there are sometimes extra rules for
351 type matching and they might affect error messages, we need to pass those
354 As well as type errors, we sometimes need to report problems with
355 tokens, which might be unexpected or might name a type that has not
356 been defined. For these we have `tok_err()` which reports an error
357 with a given token. Each of the error functions sets the flag in the
358 context so indicate that parsing failed.
362 static void fput_loc(struct exec *loc, FILE *f);
364 ###### core functions
366 static void type_err(struct parse_context *c,
367 char *fmt, struct exec *loc,
368 struct type *t1, int rules, struct type *t2)
370 fprintf(stderr, "%s:", c->file_name);
371 fput_loc(loc, stderr);
372 for (; *fmt ; fmt++) {
379 case '%': fputc(*fmt, stderr); break; // NOTEST
380 default: fputc('?', stderr); break; // NOTEST
382 type_print(t1, stderr);
385 type_print(t2, stderr);
394 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
396 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
397 t->txt.len, t->txt.txt);
401 ## Entities: declared and predeclared.
403 There are various "things" that the language and/or the interpreter
404 needs to know about to parse and execute a program. These include
405 types, variables, values, and executable code. These are all lumped
406 together under the term "entities" (calling them "objects" would be
407 confusing) and introduced here. The following section will present the
408 different specific code elements which comprise or manipulate these
413 Values come in a wide range of types, with more likely to be added.
414 Each type needs to be able to print its own values (for convenience at
415 least) as well as to compare two values, at least for equality and
416 possibly for order. For now, values might need to be duplicated and
417 freed, though eventually such manipulations will be better integrated
420 Rather than requiring every numeric type to support all numeric
421 operations (add, multiple, etc), we allow types to be able to present
422 as one of a few standard types: integer, float, and fraction. The
423 existence of these conversion functions eventually enable types to
424 determine if they are compatible with other types, though such types
425 have not yet been implemented.
427 Named type are stored in a simple linked list. Objects of each type are
428 "values" which are often passed around by value.
435 ## value union fields
443 void (*init)(struct type *type, struct value *val);
444 void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
445 void (*print)(struct type *type, struct value *val);
446 void (*print_type)(struct type *type, FILE *f);
447 int (*cmp_order)(struct type *t1, struct type *t2,
448 struct value *v1, struct value *v2);
449 int (*cmp_eq)(struct type *t1, struct type *t2,
450 struct value *v1, struct value *v2);
451 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
452 void (*free)(struct type *type, struct value *val);
453 void (*free_type)(struct type *t);
454 long long (*to_int)(struct value *v);
455 double (*to_float)(struct value *v);
456 int (*to_mpq)(mpq_t *q, struct value *v);
465 struct type *typelist;
469 static struct type *find_type(struct parse_context *c, struct text s)
471 struct type *l = c->typelist;
474 text_cmp(l->name, s) != 0)
479 static struct type *add_type(struct parse_context *c, struct text s,
484 n = calloc(1, sizeof(*n));
487 n->next = c->typelist;
492 static void free_type(struct type *t)
494 /* The type is always a reference to something in the
495 * context, so we don't need to free anything.
499 static void free_value(struct type *type, struct value *v)
503 memset(v, 0x5a, type->size);
507 static void type_print(struct type *type, FILE *f)
510 fputs("*unknown*type*", f); // NOTEST
511 else if (type->name.len)
512 fprintf(f, "%.*s", type->name.len, type->name.txt);
513 else if (type->print_type)
514 type->print_type(type, f);
516 fputs("*invalid*type*", f); // NOTEST
519 static void val_init(struct type *type, struct value *val)
521 if (type && type->init)
522 type->init(type, val);
525 static void dup_value(struct type *type,
526 struct value *vold, struct value *vnew)
528 if (type && type->dup)
529 type->dup(type, vold, vnew);
532 static int value_cmp(struct type *tl, struct type *tr,
533 struct value *left, struct value *right)
535 if (tl && tl->cmp_order)
536 return tl->cmp_order(tl, tr, left, right);
537 if (tl && tl->cmp_eq) // NOTEST
538 return tl->cmp_eq(tl, tr, left, right); // NOTEST
542 static void print_value(struct type *type, struct value *v)
544 if (type && type->print)
545 type->print(type, v);
547 printf("*Unknown*"); // NOTEST
552 static void free_value(struct type *type, struct value *v);
553 static int type_compat(struct type *require, struct type *have, int rules);
554 static void type_print(struct type *type, FILE *f);
555 static void val_init(struct type *type, struct value *v);
556 static void dup_value(struct type *type,
557 struct value *vold, struct value *vnew);
558 static int value_cmp(struct type *tl, struct type *tr,
559 struct value *left, struct value *right);
560 static void print_value(struct type *type, struct value *v);
562 ###### free context types
564 while (context.typelist) {
565 struct type *t = context.typelist;
567 context.typelist = t->next;
573 Type can be specified for local variables, for fields in a structure,
574 for formal parameters to functions, and possibly elsewhere. Different
575 rules may apply in different contexts. As a minimum, a named type may
576 always be used. Currently the type of a formal parameter can be
577 different from types in other contexts, so we have a separate grammar
583 Type -> IDENTIFIER ${
584 $0 = find_type(c, $1.txt);
587 "error: undefined type", &$1);
594 FormalType -> Type ${ $0 = $<1; }$
595 ## formal type grammar
599 Values of the base types can be numbers, which we represent as
600 multi-precision fractions, strings, Booleans and labels. When
601 analysing the program we also need to allow for places where no value
602 is meaningful (type `Tnone`) and where we don't know what type to
603 expect yet (type is `NULL`).
605 Values are never shared, they are always copied when used, and freed
606 when no longer needed.
608 When propagating type information around the program, we need to
609 determine if two types are compatible, where type `NULL` is compatible
610 with anything. There are two special cases with type compatibility,
611 both related to the Conditional Statement which will be described
612 later. In some cases a Boolean can be accepted as well as some other
613 primary type, and in others any type is acceptable except a label (`Vlabel`).
614 A separate function encoding these cases will simplify some code later.
616 ###### type functions
618 int (*compat)(struct type *this, struct type *other);
622 static int type_compat(struct type *require, struct type *have, int rules)
624 if ((rules & Rboolok) && have == Tbool)
626 if ((rules & Rnolabel) && have == Tlabel)
628 if (!require || !have)
632 return require->compat(require, have);
634 return require == have;
639 #include "parse_string.h"
640 #include "parse_number.h"
643 myLDLIBS := libnumber.o libstring.o -lgmp
644 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
646 ###### type union fields
647 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
649 ###### value union fields
656 static void _free_value(struct type *type, struct value *v)
660 switch (type->vtype) {
662 case Vstr: free(v->str.txt); break;
663 case Vnum: mpq_clear(v->num); break;
669 ###### value functions
671 static void _val_init(struct type *type, struct value *val)
673 switch(type->vtype) {
674 case Vnone: // NOTEST
677 mpq_init(val->num); break;
679 val->str.txt = malloc(1);
691 static void _dup_value(struct type *type,
692 struct value *vold, struct value *vnew)
694 switch (type->vtype) {
695 case Vnone: // NOTEST
698 vnew->label = vold->label;
701 vnew->bool = vold->bool;
705 mpq_set(vnew->num, vold->num);
708 vnew->str.len = vold->str.len;
709 vnew->str.txt = malloc(vnew->str.len);
710 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
715 static int _value_cmp(struct type *tl, struct type *tr,
716 struct value *left, struct value *right)
720 return tl - tr; // NOTEST
722 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
723 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
724 case Vstr: cmp = text_cmp(left->str, right->str); break;
725 case Vbool: cmp = left->bool - right->bool; break;
726 case Vnone: cmp = 0; // NOTEST
731 static void _print_value(struct type *type, struct value *v)
733 switch (type->vtype) {
734 case Vnone: // NOTEST
735 printf("*no-value*"); break; // NOTEST
736 case Vlabel: // NOTEST
737 printf("*label-%p*", v->label); break; // NOTEST
739 printf("%.*s", v->str.len, v->str.txt); break;
741 printf("%s", v->bool ? "True":"False"); break;
746 mpf_set_q(fl, v->num);
747 gmp_printf("%Fg", fl);
754 static void _free_value(struct type *type, struct value *v);
756 static struct type base_prototype = {
758 .print = _print_value,
759 .cmp_order = _value_cmp,
760 .cmp_eq = _value_cmp,
765 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
768 static struct type *add_base_type(struct parse_context *c, char *n,
769 enum vtype vt, int size)
771 struct text txt = { n, strlen(n) };
774 t = add_type(c, txt, &base_prototype);
777 t->align = size > sizeof(void*) ? sizeof(void*) : size;
778 if (t->size & (t->align - 1))
779 t->size = (t->size | (t->align - 1)) + 1; // NOTEST
783 ###### context initialization
785 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
786 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
787 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
788 Tnone = add_base_type(&context, "none", Vnone, 0);
789 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
793 Variables are scoped named values. We store the names in a linked list
794 of "bindings" sorted in lexical order, and use sequential search and
801 struct binding *next; // in lexical order
805 This linked list is stored in the parse context so that "reduce"
806 functions can find or add variables, and so the analysis phase can
807 ensure that every variable gets a type.
811 struct binding *varlist; // In lexical order
815 static struct binding *find_binding(struct parse_context *c, struct text s)
817 struct binding **l = &c->varlist;
822 (cmp = text_cmp((*l)->name, s)) < 0)
826 n = calloc(1, sizeof(*n));
833 Each name can be linked to multiple variables defined in different
834 scopes. Each scope starts where the name is declared and continues
835 until the end of the containing code block. Scopes of a given name
836 cannot nest, so a declaration while a name is in-scope is an error.
838 ###### binding fields
839 struct variable *var;
843 struct variable *previous;
845 struct binding *name;
846 struct exec *where_decl;// where name was declared
847 struct exec *where_set; // where type was set
851 When a scope closes, the values of the variables might need to be freed.
852 This happens in the context of some `struct exec` and each `exec` will
853 need to know which variables need to be freed when it completes.
856 struct variable *to_free;
858 ####### variable fields
859 struct exec *cleanup_exec;
860 struct variable *next_free;
862 ####### interp exec cleanup
865 for (v = e->to_free; v; v = v->next_free) {
866 struct value *val = var_value(c, v);
867 free_value(v->type, val);
872 static void variable_unlink_exec(struct variable *v)
874 struct variable **vp;
875 if (!v->cleanup_exec)
877 for (vp = &v->cleanup_exec->to_free;
878 *vp; vp = &(*vp)->next_free) {
882 v->cleanup_exec = NULL;
887 While the naming seems strange, we include local constants in the
888 definition of variables. A name declared `var := value` can
889 subsequently be changed, but a name declared `var ::= value` cannot -
892 ###### variable fields
895 Scopes in parallel branches can be partially merged. More
896 specifically, if a given name is declared in both branches of an
897 if/else then its scope is a candidate for merging. Similarly if
898 every branch of an exhaustive switch (e.g. has an "else" clause)
899 declares a given name, then the scopes from the branches are
900 candidates for merging.
902 Note that names declared inside a loop (which is only parallel to
903 itself) are never visible after the loop. Similarly names defined in
904 scopes which are not parallel, such as those started by `for` and
905 `switch`, are never visible after the scope. Only variables defined in
906 both `then` and `else` (including the implicit then after an `if`, and
907 excluding `then` used with `for`) and in all `case`s and `else` of a
908 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
910 Labels, which are a bit like variables, follow different rules.
911 Labels are not explicitly declared, but if an undeclared name appears
912 in a context where a label is legal, that effectively declares the
913 name as a label. The declaration remains in force (or in scope) at
914 least to the end of the immediately containing block and conditionally
915 in any larger containing block which does not declare the name in some
916 other way. Importantly, the conditional scope extension happens even
917 if the label is only used in one parallel branch of a conditional --
918 when used in one branch it is treated as having been declared in all
921 Merge candidates are tentatively visible beyond the end of the
922 branching statement which creates them. If the name is used, the
923 merge is affirmed and they become a single variable visible at the
924 outer layer. If not - if it is redeclared first - the merge lapses.
926 To track scopes we have an extra stack, implemented as a linked list,
927 which roughly parallels the parse stack and which is used exclusively
928 for scoping. When a new scope is opened, a new frame is pushed and
929 the child-count of the parent frame is incremented. This child-count
930 is used to distinguish between the first of a set of parallel scopes,
931 in which declared variables must not be in scope, and subsequent
932 branches, whether they may already be conditionally scoped.
934 To push a new frame *before* any code in the frame is parsed, we need a
935 grammar reduction. This is most easily achieved with a grammar
936 element which derives the empty string, and creates the new scope when
937 it is recognised. This can be placed, for example, between a keyword
938 like "if" and the code following it.
942 struct scope *parent;
948 struct scope *scope_stack;
951 static void scope_pop(struct parse_context *c)
953 struct scope *s = c->scope_stack;
955 c->scope_stack = s->parent;
960 static void scope_push(struct parse_context *c)
962 struct scope *s = calloc(1, sizeof(*s));
964 c->scope_stack->child_count += 1;
965 s->parent = c->scope_stack;
973 OpenScope -> ${ scope_push(c); }$
975 Each variable records a scope depth and is in one of four states:
977 - "in scope". This is the case between the declaration of the
978 variable and the end of the containing block, and also between
979 the usage with affirms a merge and the end of that block.
981 The scope depth is not greater than the current parse context scope
982 nest depth. When the block of that depth closes, the state will
983 change. To achieve this, all "in scope" variables are linked
984 together as a stack in nesting order.
986 - "pending". The "in scope" block has closed, but other parallel
987 scopes are still being processed. So far, every parallel block at
988 the same level that has closed has declared the name.
990 The scope depth is the depth of the last parallel block that
991 enclosed the declaration, and that has closed.
993 - "conditionally in scope". The "in scope" block and all parallel
994 scopes have closed, and no further mention of the name has been seen.
995 This state includes a secondary nest depth (`min_depth`) which records
996 the outermost scope seen since the variable became conditionally in
997 scope. If a use of the name is found, the variable becomes "in scope"
998 and that secondary depth becomes the recorded scope depth. If the
999 name is declared as a new variable, the old variable becomes "out of
1000 scope" and the recorded scope depth stays unchanged.
1002 - "out of scope". The variable is neither in scope nor conditionally
1003 in scope. It is permanently out of scope now and can be removed from
1004 the "in scope" stack.
1006 ###### variable fields
1007 int depth, min_depth;
1008 enum { OutScope, PendingScope, CondScope, InScope } scope;
1009 struct variable *in_scope;
1011 ###### parse context
1013 struct variable *in_scope;
1015 All variables with the same name are linked together using the
1016 'previous' link. Those variable that have been affirmatively merged all
1017 have a 'merged' pointer that points to one primary variable - the most
1018 recently declared instance. When merging variables, we need to also
1019 adjust the 'merged' pointer on any other variables that had previously
1020 been merged with the one that will no longer be primary.
1022 A variable that is no longer the most recent instance of a name may
1023 still have "pending" scope, if it might still be merged with most
1024 recent instance. These variables don't really belong in the
1025 "in_scope" list, but are not immediately removed when a new instance
1026 is found. Instead, they are detected and ignored when considering the
1027 list of in_scope names.
1029 The storage of the value of a variable will be described later. For now
1030 we just need to know that when a variable goes out of scope, it might
1031 need to be freed. For this we need to be able to find it, so assume that
1032 `var_value()` will provide that.
1034 ###### variable fields
1035 struct variable *merged;
1037 ###### ast functions
1039 static void variable_merge(struct variable *primary, struct variable *secondary)
1043 primary = primary->merged;
1045 for (v = primary->previous; v; v=v->previous)
1046 if (v == secondary || v == secondary->merged ||
1047 v->merged == secondary ||
1048 v->merged == secondary->merged) {
1049 v->scope = OutScope;
1050 v->merged = primary;
1051 variable_unlink_exec(v);
1055 ###### forward decls
1056 static struct value *var_value(struct parse_context *c, struct variable *v);
1058 ###### free global vars
1060 while (context.varlist) {
1061 struct binding *b = context.varlist;
1062 struct variable *v = b->var;
1063 context.varlist = b->next;
1066 struct variable *t = v;
1070 free_value(t->type, var_value(&context, t));
1072 free_exec(t->where_decl);
1078 #### Manipulating Bindings
1080 When a name is conditionally visible, a new declaration discards the
1081 old binding - the condition lapses. Conversely a usage of the name
1082 affirms the visibility and extends it to the end of the containing
1083 block - i.e. the block that contains both the original declaration and
1084 the latest usage. This is determined from `min_depth`. When a
1085 conditionally visible variable gets affirmed like this, it is also
1086 merged with other conditionally visible variables with the same name.
1088 When we parse a variable declaration we either report an error if the
1089 name is currently bound, or create a new variable at the current nest
1090 depth if the name is unbound or bound to a conditionally scoped or
1091 pending-scope variable. If the previous variable was conditionally
1092 scoped, it and its homonyms becomes out-of-scope.
1094 When we parse a variable reference (including non-declarative assignment
1095 "foo = bar") we report an error if the name is not bound or is bound to
1096 a pending-scope variable; update the scope if the name is bound to a
1097 conditionally scoped variable; or just proceed normally if the named
1098 variable is in scope.
1100 When we exit a scope, any variables bound at this level are either
1101 marked out of scope or pending-scoped, depending on whether the scope
1102 was sequential or parallel. Here a "parallel" scope means the "then"
1103 or "else" part of a conditional, or any "case" or "else" branch of a
1104 switch. Other scopes are "sequential".
1106 When exiting a parallel scope we check if there are any variables that
1107 were previously pending and are still visible. If there are, then
1108 they weren't redeclared in the most recent scope, so they cannot be
1109 merged and must become out-of-scope. If it is not the first of
1110 parallel scopes (based on `child_count`), we check that there was a
1111 previous binding that is still pending-scope. If there isn't, the new
1112 variable must now be out-of-scope.
1114 When exiting a sequential scope that immediately enclosed parallel
1115 scopes, we need to resolve any pending-scope variables. If there was
1116 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1117 we need to mark all pending-scope variable as out-of-scope. Otherwise
1118 all pending-scope variables become conditionally scoped.
1121 enum closetype { CloseSequential, CloseParallel, CloseElse };
1123 ###### ast functions
1125 static struct variable *var_decl(struct parse_context *c, struct text s)
1127 struct binding *b = find_binding(c, s);
1128 struct variable *v = b->var;
1130 switch (v ? v->scope : OutScope) {
1132 /* Caller will report the error */
1136 v && v->scope == CondScope;
1138 v->scope = OutScope;
1142 v = calloc(1, sizeof(*v));
1143 v->previous = b->var;
1147 v->min_depth = v->depth = c->scope_depth;
1149 v->in_scope = c->in_scope;
1154 static struct variable *var_ref(struct parse_context *c, struct text s)
1156 struct binding *b = find_binding(c, s);
1157 struct variable *v = b->var;
1158 struct variable *v2;
1160 switch (v ? v->scope : OutScope) {
1163 /* Caller will report the error */
1166 /* All CondScope variables of this name need to be merged
1167 * and become InScope
1169 v->depth = v->min_depth;
1171 for (v2 = v->previous;
1172 v2 && v2->scope == CondScope;
1174 variable_merge(v, v2);
1182 static void var_block_close(struct parse_context *c, enum closetype ct,
1185 /* Close off all variables that are in_scope.
1186 * Some variables in c->scope may already be not-in-scope,
1187 * such as when a PendingScope variable is hidden by a new
1188 * variable with the same name.
1189 * So we check for v->name->var != v and drop them.
1190 * If we choose to make a variable OutScope, we drop it
1193 struct variable *v, **vp, *v2;
1196 for (vp = &c->in_scope;
1197 (v = *vp) && v->min_depth > c->scope_depth;
1198 (v->scope == OutScope || v->name->var != v)
1199 ? (*vp = v->in_scope, 0)
1200 : ( vp = &v->in_scope, 0)) {
1201 v->min_depth = c->scope_depth;
1202 if (v->name->var != v)
1203 /* This is still in scope, but we haven't just
1207 v->min_depth = c->scope_depth;
1208 if (v->scope == InScope) {
1209 /* This variable gets cleaned up when 'e' finishes */
1210 variable_unlink_exec(v);
1211 v->cleanup_exec = e;
1212 v->next_free = e->to_free;
1217 case CloseParallel: /* handle PendingScope */
1221 if (c->scope_stack->child_count == 1)
1222 /* first among parallel branches */
1223 v->scope = PendingScope;
1224 else if (v->previous &&
1225 v->previous->scope == PendingScope)
1226 /* all previous branches used name */
1227 v->scope = PendingScope;
1228 else if (v->type == Tlabel)
1229 /* Labels remain pending even when not used */
1230 v->scope = PendingScope; // UNTESTED
1232 v->scope = OutScope;
1233 if (ct == CloseElse) {
1234 /* All Pending variables with this name
1235 * are now Conditional */
1237 v2 && v2->scope == PendingScope;
1239 v2->scope = CondScope;
1243 /* Not possible as it would require
1244 * parallel scope to be nested immediately
1245 * in a parallel scope, and that never
1249 /* Not possible as we already tested for
1255 case CloseSequential:
1256 if (v->type == Tlabel)
1257 v->scope = PendingScope;
1260 v->scope = OutScope;
1263 /* There was no 'else', so we can only become
1264 * conditional if we know the cases were exhaustive,
1265 * and that doesn't mean anything yet.
1266 * So only labels become conditional..
1269 v2 && v2->scope == PendingScope;
1271 if (v2->type == Tlabel)
1272 v2->scope = CondScope;
1274 v2->scope = OutScope;
1277 case OutScope: break;
1286 The value of a variable is store separately from the variable, on an
1287 analogue of a stack frame. There are (currently) two frames that can be
1288 active. A global frame which currently only stores constants, and a
1289 stacked frame which stores local variables. Each variable knows if it
1290 is global or not, and what its index into the frame is.
1292 Values in the global frame are known immediately they are relevant, so
1293 the frame needs to be reallocated as it grows so it can store those
1294 values. The local frame doesn't get values until the interpreted phase
1295 is started, so there is no need to allocate until the size is known.
1297 ###### variable fields
1301 ###### parse context
1303 short global_size, global_alloc;
1305 void *global, *local;
1307 ###### ast functions
1309 static struct value *var_value(struct parse_context *c, struct variable *v)
1312 if (!c->local || !v->type)
1313 return NULL; // NOTEST
1314 if (v->frame_pos + v->type->size > c->local_size) {
1315 printf("INVALID frame_pos\n"); // NOTEST
1318 return c->local + v->frame_pos;
1320 if (c->global_size > c->global_alloc) {
1321 int old = c->global_alloc;
1322 c->global_alloc = (c->global_size | 1023) + 1024;
1323 c->global = realloc(c->global, c->global_alloc);
1324 memset(c->global + old, 0, c->global_alloc - old);
1326 return c->global + v->frame_pos;
1329 static struct value *global_alloc(struct parse_context *c, struct type *t,
1330 struct variable *v, struct value *init)
1333 struct variable scratch;
1335 if (t->prepare_type)
1336 t->prepare_type(c, t, 1); // NOTEST
1338 if (c->global_size & (t->align - 1))
1339 c->global_size = (c->global_size + t->align) & ~(t->align-1); // UNTESTED
1344 v->frame_pos = c->global_size;
1346 c->global_size += v->type->size;
1347 ret = var_value(c, v);
1349 memcpy(ret, init, t->size);
1355 As global values are found -- struct field initializers, labels etc --
1356 `global_alloc()` is called to record the value in the global frame.
1358 When the program is fully parsed, we need to walk the list of variables
1359 to find any that weren't merged away and that aren't global, and to
1360 calculate the frame size and assign a frame position for each variable.
1361 For this we have `scope_finalize()`.
1363 ###### ast functions
1365 static void scope_finalize(struct parse_context *c)
1369 for (b = c->varlist; b; b = b->next) {
1371 for (v = b->var; v; v = v->previous) {
1372 struct type *t = v->type;
1377 if (c->local_size & (t->align - 1))
1378 c->local_size = (c->local_size + t->align) & ~(t->align-1);
1379 v->frame_pos = c->local_size;
1380 c->local_size += v->type->size;
1383 c->local = calloc(1, c->local_size);
1386 ###### free context storage
1387 free(context.global);
1388 free(context.local);
1392 Executables can be lots of different things. In many cases an
1393 executable is just an operation combined with one or two other
1394 executables. This allows for expressions and lists etc. Other times an
1395 executable is something quite specific like a constant or variable name.
1396 So we define a `struct exec` to be a general executable with a type, and
1397 a `struct binode` which is a subclass of `exec`, forms a node in a
1398 binary tree, and holds an operation. There will be other subclasses,
1399 and to access these we need to be able to `cast` the `exec` into the
1400 various other types. The first field in any `struct exec` is the type
1401 from the `exec_types` enum.
1404 #define cast(structname, pointer) ({ \
1405 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1406 if (__mptr && *__mptr != X##structname) abort(); \
1407 (struct structname *)( (char *)__mptr);})
1409 #define new(structname) ({ \
1410 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1411 __ptr->type = X##structname; \
1412 __ptr->line = -1; __ptr->column = -1; \
1415 #define new_pos(structname, token) ({ \
1416 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1417 __ptr->type = X##structname; \
1418 __ptr->line = token.line; __ptr->column = token.col; \
1427 enum exec_types type;
1436 struct exec *left, *right;
1439 ###### ast functions
1441 static int __fput_loc(struct exec *loc, FILE *f)
1445 if (loc->line >= 0) {
1446 fprintf(f, "%d:%d: ", loc->line, loc->column);
1449 if (loc->type == Xbinode)
1450 return __fput_loc(cast(binode,loc)->left, f) ||
1451 __fput_loc(cast(binode,loc)->right, f); // NOTEST
1454 static void fput_loc(struct exec *loc, FILE *f)
1456 if (!__fput_loc(loc, f))
1457 fprintf(f, "??:??: "); // NOTEST
1460 Each different type of `exec` node needs a number of functions defined,
1461 a bit like methods. We must be able to free it, print it, analyse it
1462 and execute it. Once we have specific `exec` types we will need to
1463 parse them too. Let's take this a bit more slowly.
1467 The parser generator requires a `free_foo` function for each struct
1468 that stores attributes and they will often be `exec`s and subtypes
1469 there-of. So we need `free_exec` which can handle all the subtypes,
1470 and we need `free_binode`.
1472 ###### ast functions
1474 static void free_binode(struct binode *b)
1479 free_exec(b->right);
1483 ###### core functions
1484 static void free_exec(struct exec *e)
1493 ###### forward decls
1495 static void free_exec(struct exec *e);
1497 ###### free exec cases
1498 case Xbinode: free_binode(cast(binode, e)); break;
1502 Printing an `exec` requires that we know the current indent level for
1503 printing line-oriented components. As will become clear later, we
1504 also want to know what sort of bracketing to use.
1506 ###### ast functions
1508 static void do_indent(int i, char *str)
1515 ###### core functions
1516 static void print_binode(struct binode *b, int indent, int bracket)
1520 ## print binode cases
1524 static void print_exec(struct exec *e, int indent, int bracket)
1530 print_binode(cast(binode, e), indent, bracket); break;
1535 do_indent(indent, "/* FREE");
1536 for (v = e->to_free; v; v = v->next_free)
1537 printf(" %.*s(%c%d+%d)", v->name->name.len, v->name->name.txt,
1538 v->global ? 'G':'L',
1539 v->frame_pos, v->type ? v->type->size:0);
1544 ###### forward decls
1546 static void print_exec(struct exec *e, int indent, int bracket);
1550 As discussed, analysis involves propagating type requirements around the
1551 program and looking for errors.
1553 So `propagate_types` is passed an expected type (being a `struct type`
1554 pointer together with some `val_rules` flags) that the `exec` is
1555 expected to return, and returns the type that it does return, either
1556 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1557 by reference. It is set to `0` when an error is found, and `2` when
1558 any change is made. If it remains unchanged at `1`, then no more
1559 propagation is needed.
1563 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1567 if (rules & Rnolabel)
1568 fputs(" (labels not permitted)", stderr);
1571 ###### core functions
1573 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1574 struct type *type, int rules);
1575 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1576 struct type *type, int rules)
1583 switch (prog->type) {
1586 struct binode *b = cast(binode, prog);
1588 ## propagate binode cases
1592 ## propagate exec cases
1597 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1598 struct type *type, int rules)
1600 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1609 Interpreting an `exec` doesn't require anything but the `exec`. State
1610 is stored in variables and each variable will be directly linked from
1611 within the `exec` tree. The exception to this is the `main` function
1612 which needs to look at command line arguments. This function will be
1613 interpreted separately.
1615 Each `exec` can return a value combined with a type in `struct lrval`.
1616 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1617 the location of a value, which can be updated, in `lval`. Others will
1618 set `lval` to NULL indicating that there is a value of appropriate type
1621 ###### core functions
1625 struct value rval, *lval;
1628 static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
1630 static struct value interp_exec(struct parse_context *c, struct exec *e,
1631 struct type **typeret)
1633 struct lrval ret = _interp_exec(c, e);
1635 if (!ret.type) abort();
1637 *typeret = ret.type;
1639 dup_value(ret.type, ret.lval, &ret.rval);
1643 static struct value *linterp_exec(struct parse_context *c, struct exec *e,
1644 struct type **typeret)
1646 struct lrval ret = _interp_exec(c, e);
1649 *typeret = ret.type;
1651 free_value(ret.type, &ret.rval);
1655 static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
1658 struct value rv = {}, *lrv = NULL;
1659 struct type *rvtype;
1661 rvtype = ret.type = Tnone;
1671 struct binode *b = cast(binode, e);
1672 struct value left, right, *lleft;
1673 struct type *ltype, *rtype;
1674 ltype = rtype = Tnone;
1676 ## interp binode cases
1678 free_value(ltype, &left);
1679 free_value(rtype, &right);
1682 ## interp exec cases
1687 ## interp exec cleanup
1693 Now that we have the shape of the interpreter in place we can add some
1694 complex types and connected them in to the data structures and the
1695 different phases of parse, analyse, print, interpret.
1697 Thus far we have arrays and structs.
1701 Arrays can be declared by giving a size and a type, as `[size]type' so
1702 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1703 size can be either a literal number, or a named constant. Some day an
1704 arbitrary expression will be supported.
1706 As a formal parameter to a function, the array can be declared with a
1707 new variable as the size: `name:[size::number]string`. The `size`
1708 variable is set to the size of the array and must be a constant. As
1709 `number` is the only supported type, it can be left out:
1710 `name:[size::]string`.
1712 Arrays cannot be assigned. When pointers are introduced we will also
1713 introduce array slices which can refer to part or all of an array -
1714 the assignment syntax will create a slice. For now, an array can only
1715 ever be referenced by the name it is declared with. It is likely that
1716 a "`copy`" primitive will eventually be define which can be used to
1717 make a copy of an array with controllable recursive depth.
1719 For now we have two sorts of array, those with fixed size either because
1720 it is given as a literal number or because it is a struct member (which
1721 cannot have a runtime-changing size), and those with a size that is
1722 determined at runtime - local variables with a const size. The former
1723 have their size calculated at parse time, the latter at run time.
1725 For the latter type, the `size` field of the type is the size of a
1726 pointer, and the array is reallocated every time it comes into scope.
1728 We differentiate struct fields with a const size from local variables
1729 with a const size by whether they are prepared at parse time or not.
1731 ###### type union fields
1734 int unspec; // size is unspecified - vsize must be set.
1737 struct variable *vsize;
1738 struct type *member;
1741 ###### value union fields
1742 void *array; // used if not static_size
1744 ###### value functions
1746 static void array_prepare_type(struct parse_context *c, struct type *type,
1749 struct value *vsize;
1751 if (!type->array.vsize || type->array.static_size)
1754 vsize = var_value(c, type->array.vsize);
1756 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
1757 type->array.size = mpz_get_si(q);
1761 type->array.static_size = 1;
1762 type->size = type->array.size * type->array.member->size;
1763 type->align = type->array.member->align;
1767 static void array_init(struct type *type, struct value *val)
1770 void *ptr = val->ptr;
1774 if (!type->array.static_size) {
1775 val->array = calloc(type->array.size,
1776 type->array.member->size);
1779 for (i = 0; i < type->array.size; i++) {
1781 v = (void*)ptr + i * type->array.member->size;
1782 val_init(type->array.member, v);
1786 static void array_free(struct type *type, struct value *val)
1789 void *ptr = val->ptr;
1791 if (!type->array.static_size)
1793 for (i = 0; i < type->array.size; i++) {
1795 v = (void*)ptr + i * type->array.member->size;
1796 free_value(type->array.member, v);
1798 if (!type->array.static_size)
1802 static int array_compat(struct type *require, struct type *have)
1804 if (have->compat != require->compat)
1805 return 0; // UNTESTED
1806 /* Both are arrays, so we can look at details */
1807 if (!type_compat(require->array.member, have->array.member, 0))
1809 if (have->array.unspec && require->array.unspec) {
1810 if (have->array.vsize && require->array.vsize &&
1811 have->array.vsize != require->array.vsize) // UNTESTED
1812 /* sizes might not be the same */
1813 return 0; // UNTESTED
1816 if (have->array.unspec || require->array.unspec)
1817 return 1; // UNTESTED
1818 if (require->array.vsize == NULL && have->array.vsize == NULL)
1819 return require->array.size == have->array.size;
1821 return require->array.vsize == have->array.vsize; // UNTESTED
1824 static void array_print_type(struct type *type, FILE *f)
1827 if (type->array.vsize) {
1828 struct binding *b = type->array.vsize->name;
1829 fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
1830 type->array.unspec ? "::" : "");
1832 fprintf(f, "%d]", type->array.size);
1833 type_print(type->array.member, f);
1836 static struct type array_prototype = {
1838 .prepare_type = array_prepare_type,
1839 .print_type = array_print_type,
1840 .compat = array_compat,
1842 .size = sizeof(void*),
1843 .align = sizeof(void*),
1846 ###### declare terminals
1851 | [ NUMBER ] Type ${ {
1854 struct text noname = { "", 0 };
1857 $0 = t = add_type(c, noname, &array_prototype);
1858 t->array.member = $<4;
1859 t->array.vsize = NULL;
1860 if (number_parse(num, tail, $2.txt) == 0)
1861 tok_err(c, "error: unrecognised number", &$2);
1863 tok_err(c, "error: unsupported number suffix", &$2);
1865 t->array.size = mpz_get_ui(mpq_numref(num));
1866 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1867 tok_err(c, "error: array size must be an integer",
1869 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1870 tok_err(c, "error: array size is too large",
1874 t->array.static_size = 1;
1875 t->size = t->array.size * t->array.member->size;
1876 t->align = t->array.member->align;
1879 | [ IDENTIFIER ] Type ${ {
1880 struct variable *v = var_ref(c, $2.txt);
1881 struct text noname = { "", 0 };
1884 tok_err(c, "error: name undeclared", &$2);
1885 else if (!v->constant)
1886 tok_err(c, "error: array size must be a constant", &$2);
1888 $0 = add_type(c, noname, &array_prototype);
1889 $0->array.member = $<4;
1891 $0->array.vsize = v;
1896 OptType -> Type ${ $0 = $<1; }$
1899 ###### formal type grammar
1901 | [ IDENTIFIER :: OptType ] Type ${ {
1902 struct variable *v = var_decl(c, $ID.txt);
1903 struct text noname = { "", 0 };
1909 $0 = add_type(c, noname, &array_prototype);
1910 $0->array.member = $<6;
1912 $0->array.unspec = 1;
1913 $0->array.vsize = v;
1919 ###### variable grammar
1921 | Variable [ Expression ] ${ {
1922 struct binode *b = new(binode);
1929 ###### print binode cases
1931 print_exec(b->left, -1, bracket);
1933 print_exec(b->right, -1, bracket);
1937 ###### propagate binode cases
1939 /* left must be an array, right must be a number,
1940 * result is the member type of the array
1942 propagate_types(b->right, c, ok, Tnum, 0);
1943 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1944 if (!t || t->compat != array_compat) {
1945 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1948 if (!type_compat(type, t->array.member, rules)) {
1949 type_err(c, "error: have %1 but need %2", prog,
1950 t->array.member, rules, type);
1952 return t->array.member;
1956 ###### interp binode cases
1962 lleft = linterp_exec(c, b->left, <ype);
1963 right = interp_exec(c, b->right, &rtype);
1965 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1969 if (ltype->array.static_size)
1972 ptr = *(void**)lleft;
1973 rvtype = ltype->array.member;
1974 if (i >= 0 && i < ltype->array.size)
1975 lrv = ptr + i * rvtype->size;
1977 val_init(ltype->array.member, &rv);
1984 A `struct` is a data-type that contains one or more other data-types.
1985 It differs from an array in that each member can be of a different
1986 type, and they are accessed by name rather than by number. Thus you
1987 cannot choose an element by calculation, you need to know what you
1990 The language makes no promises about how a given structure will be
1991 stored in memory - it is free to rearrange fields to suit whatever
1992 criteria seems important.
1994 Structs are declared separately from program code - they cannot be
1995 declared in-line in a variable declaration like arrays can. A struct
1996 is given a name and this name is used to identify the type - the name
1997 is not prefixed by the word `struct` as it would be in C.
1999 Structs are only treated as the same if they have the same name.
2000 Simply having the same fields in the same order is not enough. This
2001 might change once we can create structure initializers from a list of
2004 Each component datum is identified much like a variable is declared,
2005 with a name, one or two colons, and a type. The type cannot be omitted
2006 as there is no opportunity to deduce the type from usage. An initial
2007 value can be given following an equals sign, so
2009 ##### Example: a struct type
2015 would declare a type called "complex" which has two number fields,
2016 each initialised to zero.
2018 Struct will need to be declared separately from the code that uses
2019 them, so we will need to be able to print out the declaration of a
2020 struct when reprinting the whole program. So a `print_type_decl` type
2021 function will be needed.
2023 ###### type union fields
2035 ###### type functions
2036 void (*print_type_decl)(struct type *type, FILE *f);
2038 ###### value functions
2040 static void structure_init(struct type *type, struct value *val)
2044 for (i = 0; i < type->structure.nfields; i++) {
2046 v = (void*) val->ptr + type->structure.fields[i].offset;
2047 if (type->structure.fields[i].init)
2048 dup_value(type->structure.fields[i].type,
2049 type->structure.fields[i].init,
2052 val_init(type->structure.fields[i].type, v);
2056 static void structure_free(struct type *type, struct value *val)
2060 for (i = 0; i < type->structure.nfields; i++) {
2062 v = (void*)val->ptr + type->structure.fields[i].offset;
2063 free_value(type->structure.fields[i].type, v);
2067 static void structure_free_type(struct type *t)
2070 for (i = 0; i < t->structure.nfields; i++)
2071 if (t->structure.fields[i].init) {
2072 free_value(t->structure.fields[i].type,
2073 t->structure.fields[i].init);
2075 free(t->structure.fields);
2078 static struct type structure_prototype = {
2079 .init = structure_init,
2080 .free = structure_free,
2081 .free_type = structure_free_type,
2082 .print_type_decl = structure_print_type,
2096 ###### free exec cases
2098 free_exec(cast(fieldref, e)->left);
2102 ###### declare terminals
2105 ###### variable grammar
2107 | Variable . IDENTIFIER ${ {
2108 struct fieldref *fr = new_pos(fieldref, $2);
2115 ###### print exec cases
2119 struct fieldref *f = cast(fieldref, e);
2120 print_exec(f->left, -1, bracket);
2121 printf(".%.*s", f->name.len, f->name.txt);
2125 ###### ast functions
2126 static int find_struct_index(struct type *type, struct text field)
2129 for (i = 0; i < type->structure.nfields; i++)
2130 if (text_cmp(type->structure.fields[i].name, field) == 0)
2135 ###### propagate exec cases
2139 struct fieldref *f = cast(fieldref, prog);
2140 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
2143 type_err(c, "error: unknown type for field access", f->left, // UNTESTED
2145 else if (st->init != structure_init)
2146 type_err(c, "error: field reference attempted on %1, not a struct",
2147 f->left, st, 0, NULL);
2148 else if (f->index == -2) {
2149 f->index = find_struct_index(st, f->name);
2151 type_err(c, "error: cannot find requested field in %1",
2152 f->left, st, 0, NULL);
2154 if (f->index >= 0) {
2155 struct type *ft = st->structure.fields[f->index].type;
2156 if (!type_compat(type, ft, rules))
2157 type_err(c, "error: have %1 but need %2", prog,
2164 ###### interp exec cases
2167 struct fieldref *f = cast(fieldref, e);
2169 struct value *lleft = linterp_exec(c, f->left, <ype);
2170 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
2171 rvtype = ltype->structure.fields[f->index].type;
2177 struct fieldlist *prev;
2181 ###### ast functions
2182 static void free_fieldlist(struct fieldlist *f)
2186 free_fieldlist(f->prev);
2188 free_value(f->f.type, f->f.init); // UNTESTED
2189 free(f->f.init); // UNTESTED
2194 ###### top level grammar
2195 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2197 add_type(c, $2.txt, &structure_prototype);
2199 struct fieldlist *f;
2201 for (f = $3; f; f=f->prev)
2204 t->structure.nfields = cnt;
2205 t->structure.fields = calloc(cnt, sizeof(struct field));
2208 int a = f->f.type->align;
2210 t->structure.fields[cnt] = f->f;
2211 if (t->size & (a-1))
2212 t->size = (t->size | (a-1)) + 1;
2213 t->structure.fields[cnt].offset = t->size;
2214 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2223 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2224 | { SimpleFieldList } ${ $0 = $<SFL; }$
2225 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2226 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2228 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2229 | FieldLines SimpleFieldList Newlines ${
2234 SimpleFieldList -> Field ${ $0 = $<F; }$
2235 | SimpleFieldList ; Field ${
2239 | SimpleFieldList ; ${
2242 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2244 Field -> IDENTIFIER : Type = Expression ${ {
2247 $0 = calloc(1, sizeof(struct fieldlist));
2248 $0->f.name = $1.txt;
2253 propagate_types($<5, c, &ok, $3, 0);
2256 c->parse_error = 1; // UNTESTED
2258 struct value vl = interp_exec(c, $5, NULL);
2259 $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
2262 | IDENTIFIER : Type ${
2263 $0 = calloc(1, sizeof(struct fieldlist));
2264 $0->f.name = $1.txt;
2266 if ($0->f.type->prepare_type)
2267 $0->f.type->prepare_type(c, $0->f.type, 1);
2270 ###### forward decls
2271 static void structure_print_type(struct type *t, FILE *f);
2273 ###### value functions
2274 static void structure_print_type(struct type *t, FILE *f) // UNTESTED
2278 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2280 for (i = 0; i < t->structure.nfields; i++) {
2281 struct field *fl = t->structure.fields + i;
2282 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2283 type_print(fl->type, f);
2284 if (fl->type->print && fl->init) {
2286 if (fl->type == Tstr)
2287 fprintf(f, "\""); // UNTESTED
2288 print_value(fl->type, fl->init);
2289 if (fl->type == Tstr)
2290 fprintf(f, "\""); // UNTESTED
2296 ###### print type decls
2298 struct type *t; // UNTESTED
2301 while (target != 0) {
2303 for (t = context.typelist; t ; t=t->next)
2304 if (t->print_type_decl) {
2313 t->print_type_decl(t, stdout);
2321 A function is a named chunk of code which can be passed parameters and
2322 can return results. Each function has an implicit type which includes
2323 the set of parameters and the return value. As yet these types cannot
2324 be declared separate from the function itself.
2326 In fact, only one function is currently possible - `main`. `main` is
2327 passed an array of strings together with the size of the array, and
2328 doesn't return anything. The strings are command line arguments.
2330 The parameters can be specified either in parentheses as a list, such as
2332 ##### Example: function 1
2334 func main(av:[ac::number]string)
2337 or as an indented list of one parameter per line
2339 ##### Example: function 2
2342 argv:[argc::number]string
2346 For constructing these lists we use a `List` binode, which will be
2347 further detailed when Expression Lists are introduced.
2357 MainFunction -> func main ( OpenScope Args ) Block Newlines ${
2360 $0->left = reorder_bilist($<Ar);
2362 var_block_close(c, CloseSequential, $0);
2363 if (c->scope_stack && !c->parse_error) abort();
2365 | func main IN OpenScope OptNL Args OUT OptNL do Block Newlines ${
2368 $0->left = reorder_bilist($<Ar);
2370 var_block_close(c, CloseSequential, $0);
2371 if (c->scope_stack && !c->parse_error) abort();
2373 | func main NEWLINE OpenScope OptNL do Block Newlines ${
2378 var_block_close(c, CloseSequential, $0);
2379 if (c->scope_stack && !c->parse_error) abort();
2382 Args -> ${ $0 = NULL; }$
2383 | Varlist ${ $0 = $<1; }$
2384 | Varlist ; ${ $0 = $<1; }$
2385 | Varlist NEWLINE ${ $0 = $<1; }$
2387 Varlist -> Varlist ; ArgDecl ${ // UNTESTED
2401 ArgDecl -> IDENTIFIER : FormalType ${ {
2402 struct variable *v = var_decl(c, $1.txt);
2408 ## Executables: the elements of code
2410 Each code element needs to be parsed, printed, analysed,
2411 interpreted, and freed. There are several, so let's just start with
2412 the easy ones and work our way up.
2416 We have already met values as separate objects. When manifest
2417 constants appear in the program text, that must result in an executable
2418 which has a constant value. So the `val` structure embeds a value in
2431 ###### ast functions
2432 struct val *new_val(struct type *T, struct token tk)
2434 struct val *v = new_pos(val, tk);
2445 $0 = new_val(Tbool, $1);
2449 $0 = new_val(Tbool, $1);
2453 $0 = new_val(Tnum, $1);
2456 if (number_parse($0->val.num, tail, $1.txt) == 0)
2457 mpq_init($0->val.num); // UNTESTED
2459 tok_err(c, "error: unsupported number suffix",
2464 $0 = new_val(Tstr, $1);
2467 string_parse(&$1, '\\', &$0->val.str, tail);
2469 tok_err(c, "error: unsupported string suffix",
2474 $0 = new_val(Tstr, $1);
2477 string_parse(&$1, '\\', &$0->val.str, tail);
2479 tok_err(c, "error: unsupported string suffix",
2484 ###### print exec cases
2487 struct val *v = cast(val, e);
2488 if (v->vtype == Tstr)
2490 print_value(v->vtype, &v->val);
2491 if (v->vtype == Tstr)
2496 ###### propagate exec cases
2499 struct val *val = cast(val, prog);
2500 if (!type_compat(type, val->vtype, rules))
2501 type_err(c, "error: expected %1%r found %2",
2502 prog, type, rules, val->vtype);
2506 ###### interp exec cases
2508 rvtype = cast(val, e)->vtype;
2509 dup_value(rvtype, &cast(val, e)->val, &rv);
2512 ###### ast functions
2513 static void free_val(struct val *v)
2516 free_value(v->vtype, &v->val);
2520 ###### free exec cases
2521 case Xval: free_val(cast(val, e)); break;
2523 ###### ast functions
2524 // Move all nodes from 'b' to 'rv', reversing their order.
2525 // In 'b' 'left' is a list, and 'right' is the last node.
2526 // In 'rv', left' is the first node and 'right' is a list.
2527 static struct binode *reorder_bilist(struct binode *b)
2529 struct binode *rv = NULL;
2532 struct exec *t = b->right;
2536 b = cast(binode, b->left);
2546 Just as we used a `val` to wrap a value into an `exec`, we similarly
2547 need a `var` to wrap a `variable` into an exec. While each `val`
2548 contained a copy of the value, each `var` holds a link to the variable
2549 because it really is the same variable no matter where it appears.
2550 When a variable is used, we need to remember to follow the `->merged`
2551 link to find the primary instance.
2559 struct variable *var;
2567 VariableDecl -> IDENTIFIER : ${ {
2568 struct variable *v = var_decl(c, $1.txt);
2569 $0 = new_pos(var, $1);
2574 v = var_ref(c, $1.txt);
2576 type_err(c, "error: variable '%v' redeclared",
2578 type_err(c, "info: this is where '%v' was first declared",
2579 v->where_decl, NULL, 0, NULL);
2582 | IDENTIFIER :: ${ {
2583 struct variable *v = var_decl(c, $1.txt);
2584 $0 = new_pos(var, $1);
2590 v = var_ref(c, $1.txt);
2592 type_err(c, "error: variable '%v' redeclared",
2594 type_err(c, "info: this is where '%v' was first declared",
2595 v->where_decl, NULL, 0, NULL);
2598 | IDENTIFIER : Type ${ {
2599 struct variable *v = var_decl(c, $1.txt);
2600 $0 = new_pos(var, $1);
2607 v = var_ref(c, $1.txt);
2609 type_err(c, "error: variable '%v' redeclared",
2611 type_err(c, "info: this is where '%v' was first declared",
2612 v->where_decl, NULL, 0, NULL);
2615 | IDENTIFIER :: Type ${ {
2616 struct variable *v = var_decl(c, $1.txt);
2617 $0 = new_pos(var, $1);
2625 v = var_ref(c, $1.txt);
2627 type_err(c, "error: variable '%v' redeclared",
2629 type_err(c, "info: this is where '%v' was first declared",
2630 v->where_decl, NULL, 0, NULL);
2635 Variable -> IDENTIFIER ${ {
2636 struct variable *v = var_ref(c, $1.txt);
2637 $0 = new_pos(var, $1);
2639 /* This might be a label - allocate a var just in case */
2640 v = var_decl(c, $1.txt);
2647 cast(var, $0)->var = v;
2651 ###### print exec cases
2654 struct var *v = cast(var, e);
2656 struct binding *b = v->var->name;
2657 printf("%.*s", b->name.len, b->name.txt);
2664 if (loc && loc->type == Xvar) {
2665 struct var *v = cast(var, loc);
2667 struct binding *b = v->var->name;
2668 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2670 fputs("???", stderr); // NOTEST
2672 fputs("NOTVAR", stderr); // NOTEST
2675 ###### propagate exec cases
2679 struct var *var = cast(var, prog);
2680 struct variable *v = var->var;
2682 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2683 return Tnone; // NOTEST
2686 if (v->constant && (rules & Rnoconstant)) {
2687 type_err(c, "error: Cannot assign to a constant: %v",
2688 prog, NULL, 0, NULL);
2689 type_err(c, "info: name was defined as a constant here",
2690 v->where_decl, NULL, 0, NULL);
2693 if (v->type == Tnone && v->where_decl == prog)
2694 type_err(c, "error: variable used but not declared: %v",
2695 prog, NULL, 0, NULL);
2696 if (v->type == NULL) {
2697 if (type && *ok != 0) {
2699 v->where_set = prog;
2704 if (!type_compat(type, v->type, rules)) {
2705 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2706 type, rules, v->type);
2707 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2708 v->type, rules, NULL);
2715 ###### interp exec cases
2718 struct var *var = cast(var, e);
2719 struct variable *v = var->var;
2722 lrv = var_value(c, v);
2727 ###### ast functions
2729 static void free_var(struct var *v)
2734 ###### free exec cases
2735 case Xvar: free_var(cast(var, e)); break;
2737 ### Expressions: Conditional
2739 Our first user of the `binode` will be conditional expressions, which
2740 is a bit odd as they actually have three components. That will be
2741 handled by having 2 binodes for each expression. The conditional
2742 expression is the lowest precedence operator which is why we define it
2743 first - to start the precedence list.
2745 Conditional expressions are of the form "value `if` condition `else`
2746 other_value". They associate to the right, so everything to the right
2747 of `else` is part of an else value, while only a higher-precedence to
2748 the left of `if` is the if values. Between `if` and `else` there is no
2749 room for ambiguity, so a full conditional expression is allowed in
2761 Expression -> Expression if Expression else Expression $$ifelse ${ {
2762 struct binode *b1 = new(binode);
2763 struct binode *b2 = new(binode);
2772 ## expression grammar
2774 ###### print binode cases
2777 b2 = cast(binode, b->right);
2778 if (bracket) printf("(");
2779 print_exec(b2->left, -1, bracket);
2781 print_exec(b->left, -1, bracket);
2783 print_exec(b2->right, -1, bracket);
2784 if (bracket) printf(")");
2787 ###### propagate binode cases
2790 /* cond must be Tbool, others must match */
2791 struct binode *b2 = cast(binode, b->right);
2794 propagate_types(b->left, c, ok, Tbool, 0);
2795 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2796 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2800 ###### interp binode cases
2803 struct binode *b2 = cast(binode, b->right);
2804 left = interp_exec(c, b->left, <ype);
2806 rv = interp_exec(c, b2->left, &rvtype); // UNTESTED
2808 rv = interp_exec(c, b2->right, &rvtype);
2814 We take a brief detour, now that we have expressions, to describe lists
2815 of expressions. These will be needed for function parameters and
2816 possibly other situations. They seem generic enough to introduce here
2817 to be used elsewhere.
2819 And ExpressionList will use the `List` type of `binode`, building up at
2820 the end. And place where they are used will probably call
2821 `reorder_bilist()` to get a more normal first/next arrangement.
2823 ###### declare terminals
2826 `List` execs have no implicit semantics, so they are never propagated or
2827 interpreted. The can be printed as a comma separate list, which is how
2828 they are parsed. Note they are also used for function formal parameter
2829 lists. In that case a separate function is used to print them.
2831 ###### print binode cases
2835 print_exec(b->left, -1, bracket);
2838 b = cast(binode, b->right);
2842 ###### propagate binode cases
2843 case List: abort(); // NOTEST
2844 ###### interp binode cases
2845 case List: abort(); // NOTEST
2850 ExpressionList -> ExpressionList , Expression ${
2863 ### Expressions: Boolean
2865 The next class of expressions to use the `binode` will be Boolean
2866 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2867 have same corresponding precendence. The difference is that they don't
2868 evaluate the second expression if not necessary.
2877 ###### expr precedence
2882 ###### expression grammar
2883 | Expression or Expression ${ {
2884 struct binode *b = new(binode);
2890 | Expression or else Expression ${ {
2891 struct binode *b = new(binode);
2898 | Expression and Expression ${ {
2899 struct binode *b = new(binode);
2905 | Expression and then Expression ${ {
2906 struct binode *b = new(binode);
2913 | not Expression ${ {
2914 struct binode *b = new(binode);
2920 ###### print binode cases
2922 if (bracket) printf("(");
2923 print_exec(b->left, -1, bracket);
2925 print_exec(b->right, -1, bracket);
2926 if (bracket) printf(")");
2929 if (bracket) printf("(");
2930 print_exec(b->left, -1, bracket);
2931 printf(" and then ");
2932 print_exec(b->right, -1, bracket);
2933 if (bracket) printf(")");
2936 if (bracket) printf("(");
2937 print_exec(b->left, -1, bracket);
2939 print_exec(b->right, -1, bracket);
2940 if (bracket) printf(")");
2943 if (bracket) printf("(");
2944 print_exec(b->left, -1, bracket);
2945 printf(" or else ");
2946 print_exec(b->right, -1, bracket);
2947 if (bracket) printf(")");
2950 if (bracket) printf("(");
2952 print_exec(b->right, -1, bracket);
2953 if (bracket) printf(")");
2956 ###### propagate binode cases
2962 /* both must be Tbool, result is Tbool */
2963 propagate_types(b->left, c, ok, Tbool, 0);
2964 propagate_types(b->right, c, ok, Tbool, 0);
2965 if (type && type != Tbool)
2966 type_err(c, "error: %1 operation found where %2 expected", prog,
2970 ###### interp binode cases
2972 rv = interp_exec(c, b->left, &rvtype);
2973 right = interp_exec(c, b->right, &rtype);
2974 rv.bool = rv.bool && right.bool;
2977 rv = interp_exec(c, b->left, &rvtype);
2979 rv = interp_exec(c, b->right, NULL);
2982 rv = interp_exec(c, b->left, &rvtype);
2983 right = interp_exec(c, b->right, &rtype);
2984 rv.bool = rv.bool || right.bool;
2987 rv = interp_exec(c, b->left, &rvtype);
2989 rv = interp_exec(c, b->right, NULL);
2992 rv = interp_exec(c, b->right, &rvtype);
2996 ### Expressions: Comparison
2998 Of slightly higher precedence that Boolean expressions are Comparisons.
2999 A comparison takes arguments of any comparable type, but the two types
3002 To simplify the parsing we introduce an `eop` which can record an
3003 expression operator, and the `CMPop` non-terminal will match one of them.
3010 ###### ast functions
3011 static void free_eop(struct eop *e)
3025 ###### expr precedence
3026 $LEFT < > <= >= == != CMPop
3028 ###### expression grammar
3029 | Expression CMPop Expression ${ {
3030 struct binode *b = new(binode);
3040 CMPop -> < ${ $0.op = Less; }$
3041 | > ${ $0.op = Gtr; }$
3042 | <= ${ $0.op = LessEq; }$
3043 | >= ${ $0.op = GtrEq; }$
3044 | == ${ $0.op = Eql; }$
3045 | != ${ $0.op = NEql; }$
3047 ###### print binode cases
3055 if (bracket) printf("(");
3056 print_exec(b->left, -1, bracket);
3058 case Less: printf(" < "); break;
3059 case LessEq: printf(" <= "); break;
3060 case Gtr: printf(" > "); break;
3061 case GtrEq: printf(" >= "); break;
3062 case Eql: printf(" == "); break;
3063 case NEql: printf(" != "); break;
3064 default: abort(); // NOTEST
3066 print_exec(b->right, -1, bracket);
3067 if (bracket) printf(")");
3070 ###### propagate binode cases
3077 /* Both must match but not be labels, result is Tbool */
3078 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
3080 propagate_types(b->right, c, ok, t, 0);
3082 t = propagate_types(b->right, c, ok, NULL, Rnolabel); // UNTESTED
3084 t = propagate_types(b->left, c, ok, t, 0); // UNTESTED
3086 if (!type_compat(type, Tbool, 0))
3087 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
3088 Tbool, rules, type);
3091 ###### interp binode cases
3100 left = interp_exec(c, b->left, <ype);
3101 right = interp_exec(c, b->right, &rtype);
3102 cmp = value_cmp(ltype, rtype, &left, &right);
3105 case Less: rv.bool = cmp < 0; break;
3106 case LessEq: rv.bool = cmp <= 0; break;
3107 case Gtr: rv.bool = cmp > 0; break;
3108 case GtrEq: rv.bool = cmp >= 0; break;
3109 case Eql: rv.bool = cmp == 0; break;
3110 case NEql: rv.bool = cmp != 0; break;
3111 default: rv.bool = 0; break; // NOTEST
3116 ### Expressions: The rest
3118 The remaining expressions with the highest precedence are arithmetic,
3119 string concatenation, and string conversion. String concatenation
3120 (`++`) has the same precedence as multiplication and division, but lower
3123 String conversion is a temporary feature until I get a better type
3124 system. `$` is a prefix operator which expects a string and returns
3127 `+` and `-` are both infix and prefix operations (where they are
3128 absolute value and negation). These have different operator names.
3130 We also have a 'Bracket' operator which records where parentheses were
3131 found. This makes it easy to reproduce these when printing. Possibly I
3132 should only insert brackets were needed for precedence.
3142 ###### expr precedence
3148 ###### expression grammar
3149 | Expression Eop Expression ${ {
3150 struct binode *b = new(binode);
3157 | Expression Top Expression ${ {
3158 struct binode *b = new(binode);
3165 | ( Expression ) ${ {
3166 struct binode *b = new_pos(binode, $1);
3171 | Uop Expression ${ {
3172 struct binode *b = new(binode);
3177 | Value ${ $0 = $<1; }$
3178 | Variable ${ $0 = $<1; }$
3181 Eop -> + ${ $0.op = Plus; }$
3182 | - ${ $0.op = Minus; }$
3184 Uop -> + ${ $0.op = Absolute; }$
3185 | - ${ $0.op = Negate; }$
3186 | $ ${ $0.op = StringConv; }$
3188 Top -> * ${ $0.op = Times; }$
3189 | / ${ $0.op = Divide; }$
3190 | % ${ $0.op = Rem; }$
3191 | ++ ${ $0.op = Concat; }$
3193 ###### print binode cases
3200 if (bracket) printf("(");
3201 print_exec(b->left, indent, bracket);
3203 case Plus: fputs(" + ", stdout); break;
3204 case Minus: fputs(" - ", stdout); break;
3205 case Times: fputs(" * ", stdout); break;
3206 case Divide: fputs(" / ", stdout); break;
3207 case Rem: fputs(" % ", stdout); break;
3208 case Concat: fputs(" ++ ", stdout); break;
3209 default: abort(); // NOTEST
3211 print_exec(b->right, indent, bracket);
3212 if (bracket) printf(")");
3217 if (bracket) printf("(");
3219 case Absolute: fputs("+", stdout); break;
3220 case Negate: fputs("-", stdout); break;
3221 case StringConv: fputs("$", stdout); break;
3222 default: abort(); // NOTEST
3224 print_exec(b->right, indent, bracket);
3225 if (bracket) printf(")");
3229 print_exec(b->right, indent, bracket);
3233 ###### propagate binode cases
3239 /* both must be numbers, result is Tnum */
3242 /* as propagate_types ignores a NULL,
3243 * unary ops fit here too */
3244 propagate_types(b->left, c, ok, Tnum, 0);
3245 propagate_types(b->right, c, ok, Tnum, 0);
3246 if (!type_compat(type, Tnum, 0))
3247 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
3252 /* both must be Tstr, result is Tstr */
3253 propagate_types(b->left, c, ok, Tstr, 0);
3254 propagate_types(b->right, c, ok, Tstr, 0);
3255 if (!type_compat(type, Tstr, 0))
3256 type_err(c, "error: Concat returns %1 but %2 expected", prog,
3261 /* op must be string, result is number */
3262 propagate_types(b->left, c, ok, Tstr, 0);
3263 if (!type_compat(type, Tnum, 0))
3264 type_err(c, // UNTESTED
3265 "error: Can only convert string to number, not %1",
3266 prog, type, 0, NULL);
3270 return propagate_types(b->right, c, ok, type, 0);
3272 ###### interp binode cases
3275 rv = interp_exec(c, b->left, &rvtype);
3276 right = interp_exec(c, b->right, &rtype);
3277 mpq_add(rv.num, rv.num, right.num);
3280 rv = interp_exec(c, b->left, &rvtype);
3281 right = interp_exec(c, b->right, &rtype);
3282 mpq_sub(rv.num, rv.num, right.num);
3285 rv = interp_exec(c, b->left, &rvtype);
3286 right = interp_exec(c, b->right, &rtype);
3287 mpq_mul(rv.num, rv.num, right.num);
3290 rv = interp_exec(c, b->left, &rvtype);
3291 right = interp_exec(c, b->right, &rtype);
3292 mpq_div(rv.num, rv.num, right.num);
3297 left = interp_exec(c, b->left, <ype);
3298 right = interp_exec(c, b->right, &rtype);
3299 mpz_init(l); mpz_init(r); mpz_init(rem);
3300 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
3301 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
3302 mpz_tdiv_r(rem, l, r);
3303 val_init(Tnum, &rv);
3304 mpq_set_z(rv.num, rem);
3305 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
3310 rv = interp_exec(c, b->right, &rvtype);
3311 mpq_neg(rv.num, rv.num);
3314 rv = interp_exec(c, b->right, &rvtype);
3315 mpq_abs(rv.num, rv.num);
3318 rv = interp_exec(c, b->right, &rvtype);
3321 left = interp_exec(c, b->left, <ype);
3322 right = interp_exec(c, b->right, &rtype);
3324 rv.str = text_join(left.str, right.str);
3327 right = interp_exec(c, b->right, &rvtype);
3331 struct text tx = right.str;
3334 if (tx.txt[0] == '-') {
3335 neg = 1; // UNTESTED
3336 tx.txt++; // UNTESTED
3337 tx.len--; // UNTESTED
3339 if (number_parse(rv.num, tail, tx) == 0)
3340 mpq_init(rv.num); // UNTESTED
3342 mpq_neg(rv.num, rv.num); // UNTESTED
3344 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt); // UNTESTED
3348 ###### value functions
3350 static struct text text_join(struct text a, struct text b)
3353 rv.len = a.len + b.len;
3354 rv.txt = malloc(rv.len);
3355 memcpy(rv.txt, a.txt, a.len);
3356 memcpy(rv.txt+a.len, b.txt, b.len);
3360 ### Blocks, Statements, and Statement lists.
3362 Now that we have expressions out of the way we need to turn to
3363 statements. There are simple statements and more complex statements.
3364 Simple statements do not contain (syntactic) newlines, complex statements do.
3366 Statements often come in sequences and we have corresponding simple
3367 statement lists and complex statement lists.
3368 The former comprise only simple statements separated by semicolons.
3369 The later comprise complex statements and simple statement lists. They are
3370 separated by newlines. Thus the semicolon is only used to separate
3371 simple statements on the one line. This may be overly restrictive,
3372 but I'm not sure I ever want a complex statement to share a line with
3375 Note that a simple statement list can still use multiple lines if
3376 subsequent lines are indented, so
3378 ###### Example: wrapped simple statement list
3383 is a single simple statement list. This might allow room for
3384 confusion, so I'm not set on it yet.
3386 A simple statement list needs no extra syntax. A complex statement
3387 list has two syntactic forms. It can be enclosed in braces (much like
3388 C blocks), or it can be introduced by an indent and continue until an
3389 unindented newline (much like Python blocks). With this extra syntax
3390 it is referred to as a block.
3392 Note that a block does not have to include any newlines if it only
3393 contains simple statements. So both of:
3395 if condition: a=b; d=f
3397 if condition { a=b; print f }
3401 In either case the list is constructed from a `binode` list with
3402 `Block` as the operator. When parsing the list it is most convenient
3403 to append to the end, so a list is a list and a statement. When using
3404 the list it is more convenient to consider a list to be a statement
3405 and a list. So we need a function to re-order a list.
3406 `reorder_bilist` serves this purpose.
3408 The only stand-alone statement we introduce at this stage is `pass`
3409 which does nothing and is represented as a `NULL` pointer in a `Block`
3410 list. Other stand-alone statements will follow once the infrastructure
3421 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3422 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3423 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3424 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3425 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3427 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3428 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3429 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3430 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3431 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3433 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3434 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3435 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3437 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3438 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3439 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3440 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3441 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3443 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3445 ComplexStatements -> ComplexStatements ComplexStatement ${
3455 | ComplexStatement ${
3467 ComplexStatement -> SimpleStatements Newlines ${
3468 $0 = reorder_bilist($<SS);
3470 | SimpleStatements ; Newlines ${
3471 $0 = reorder_bilist($<SS);
3473 ## ComplexStatement Grammar
3476 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3482 | SimpleStatement ${
3490 SimpleStatement -> pass ${ $0 = NULL; }$
3491 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3492 ## SimpleStatement Grammar
3494 ###### print binode cases
3498 if (b->left == NULL) // UNTESTED
3499 printf("pass"); // UNTESTED
3501 print_exec(b->left, indent, bracket); // UNTESTED
3502 if (b->right) { // UNTESTED
3503 printf("; "); // UNTESTED
3504 print_exec(b->right, indent, bracket); // UNTESTED
3507 // block, one per line
3508 if (b->left == NULL)
3509 do_indent(indent, "pass\n");
3511 print_exec(b->left, indent, bracket);
3513 print_exec(b->right, indent, bracket);
3517 ###### propagate binode cases
3520 /* If any statement returns something other than Tnone
3521 * or Tbool then all such must return same type.
3522 * As each statement may be Tnone or something else,
3523 * we must always pass NULL (unknown) down, otherwise an incorrect
3524 * error might occur. We never return Tnone unless it is
3529 for (e = b; e; e = cast(binode, e->right)) {
3530 t = propagate_types(e->left, c, ok, NULL, rules);
3531 if ((rules & Rboolok) && t == Tbool)
3533 if (t && t != Tnone && t != Tbool) {
3537 type_err(c, "error: expected %1%r, found %2",
3538 e->left, type, rules, t);
3544 ###### interp binode cases
3546 while (rvtype == Tnone &&
3549 rv = interp_exec(c, b->left, &rvtype);
3550 b = cast(binode, b->right);
3554 ### The Print statement
3556 `print` is a simple statement that takes a comma-separated list of
3557 expressions and prints the values separated by spaces and terminated
3558 by a newline. No control of formatting is possible.
3560 `print` uses `ExpressionList` to collect the expressions and stores them
3561 on the left side of a `Print` binode unlessthere is a trailing comma
3562 when the list is stored on the `right` side and no trailing newline is
3568 ##### expr precedence
3571 ###### SimpleStatement Grammar
3573 | print ExpressionList ${
3577 $0->left = reorder_bilist($<EL);
3579 | print ExpressionList , ${ {
3582 $0->right = reorder_bilist($<EL);
3592 ###### print binode cases
3595 do_indent(indent, "print");
3597 print_exec(b->right, -1, bracket);
3600 print_exec(b->left, -1, bracket);
3605 ###### propagate binode cases
3608 /* don't care but all must be consistent */
3610 b = cast(binode, b->left);
3612 b = cast(binode, b->right);
3614 propagate_types(b->left, c, ok, NULL, Rnolabel);
3615 b = cast(binode, b->right);
3619 ###### interp binode cases
3623 struct binode *b2 = cast(binode, b->left);
3625 b2 = cast(binode, b->right);
3626 for (; b2; b2 = cast(binode, b2->right)) {
3627 left = interp_exec(c, b2->left, <ype);
3628 print_value(ltype, &left);
3629 free_value(ltype, &left);
3633 if (b->right == NULL)
3639 ###### Assignment statement
3641 An assignment will assign a value to a variable, providing it hasn't
3642 been declared as a constant. The analysis phase ensures that the type
3643 will be correct so the interpreter just needs to perform the
3644 calculation. There is a form of assignment which declares a new
3645 variable as well as assigning a value. If a name is assigned before
3646 it is declared, and error will be raised as the name is created as
3647 `Tlabel` and it is illegal to assign to such names.
3653 ###### declare terminals
3656 ###### SimpleStatement Grammar
3657 | Variable = Expression ${
3663 | VariableDecl = Expression ${
3671 if ($1->var->where_set == NULL) {
3673 "Variable declared with no type or value: %v",
3683 ###### print binode cases
3686 do_indent(indent, "");
3687 print_exec(b->left, indent, bracket);
3689 print_exec(b->right, indent, bracket);
3696 struct variable *v = cast(var, b->left)->var;
3697 do_indent(indent, "");
3698 print_exec(b->left, indent, bracket);
3699 if (cast(var, b->left)->var->constant) {
3701 if (v->where_decl == v->where_set) {
3702 type_print(v->type, stdout);
3707 if (v->where_decl == v->where_set) {
3708 type_print(v->type, stdout);
3714 print_exec(b->right, indent, bracket);
3721 ###### propagate binode cases
3725 /* Both must match and not be labels,
3726 * Type must support 'dup',
3727 * For Assign, left must not be constant.
3730 t = propagate_types(b->left, c, ok, NULL,
3731 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3736 if (propagate_types(b->right, c, ok, t, 0) != t)
3737 if (b->left->type == Xvar)
3738 type_err(c, "info: variable '%v' was set as %1 here.",
3739 cast(var, b->left)->var->where_set, t, rules, NULL);
3741 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3743 propagate_types(b->left, c, ok, t,
3744 (b->op == Assign ? Rnoconstant : 0));
3746 if (t && t->dup == NULL)
3747 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3752 ###### interp binode cases
3755 lleft = linterp_exec(c, b->left, <ype);
3756 right = interp_exec(c, b->right, &rtype);
3758 free_value(ltype, lleft);
3759 dup_value(ltype, &right, lleft);
3766 struct variable *v = cast(var, b->left)->var;
3769 val = var_value(c, v);
3770 if (v->type->prepare_type)
3771 v->type->prepare_type(c, v->type, 0);
3773 right = interp_exec(c, b->right, &rtype);
3774 memcpy(val, &right, rtype->size);
3777 val_init(v->type, val);
3782 ### The `use` statement
3784 The `use` statement is the last "simple" statement. It is needed when
3785 the condition in a conditional statement is a block. `use` works much
3786 like `return` in C, but only completes the `condition`, not the whole
3792 ###### expr precedence
3795 ###### SimpleStatement Grammar
3797 $0 = new_pos(binode, $1);
3800 if ($0->right->type == Xvar) {
3801 struct var *v = cast(var, $0->right);
3802 if (v->var->type == Tnone) {
3803 /* Convert this to a label */
3806 v->var->type = Tlabel;
3807 val = global_alloc(c, Tlabel, v->var, NULL);
3813 ###### print binode cases
3816 do_indent(indent, "use ");
3817 print_exec(b->right, -1, bracket);
3822 ###### propagate binode cases
3825 /* result matches value */
3826 return propagate_types(b->right, c, ok, type, 0);
3828 ###### interp binode cases
3831 rv = interp_exec(c, b->right, &rvtype);
3834 ### The Conditional Statement
3836 This is the biggy and currently the only complex statement. This
3837 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3838 It is comprised of a number of parts, all of which are optional though
3839 set combinations apply. Each part is (usually) a key word (`then` is
3840 sometimes optional) followed by either an expression or a code block,
3841 except the `casepart` which is a "key word and an expression" followed
3842 by a code block. The code-block option is valid for all parts and,
3843 where an expression is also allowed, the code block can use the `use`
3844 statement to report a value. If the code block does not report a value
3845 the effect is similar to reporting `True`.
3847 The `else` and `case` parts, as well as `then` when combined with
3848 `if`, can contain a `use` statement which will apply to some
3849 containing conditional statement. `for` parts, `do` parts and `then`
3850 parts used with `for` can never contain a `use`, except in some
3851 subordinate conditional statement.
3853 If there is a `forpart`, it is executed first, only once.
3854 If there is a `dopart`, then it is executed repeatedly providing
3855 always that the `condpart` or `cond`, if present, does not return a non-True
3856 value. `condpart` can fail to return any value if it simply executes
3857 to completion. This is treated the same as returning `True`.
3859 If there is a `thenpart` it will be executed whenever the `condpart`
3860 or `cond` returns True (or does not return any value), but this will happen
3861 *after* `dopart` (when present).
3863 If `elsepart` is present it will be executed at most once when the
3864 condition returns `False` or some value that isn't `True` and isn't
3865 matched by any `casepart`. If there are any `casepart`s, they will be
3866 executed when the condition returns a matching value.
3868 The particular sorts of values allowed in case parts has not yet been
3869 determined in the language design, so nothing is prohibited.
3871 The various blocks in this complex statement potentially provide scope
3872 for variables as described earlier. Each such block must include the
3873 "OpenScope" nonterminal before parsing the block, and must call
3874 `var_block_close()` when closing the block.
3876 The code following "`if`", "`switch`" and "`for`" does not get its own
3877 scope, but is in a scope covering the whole statement, so names
3878 declared there cannot be redeclared elsewhere. Similarly the
3879 condition following "`while`" is in a scope the covers the body
3880 ("`do`" part) of the loop, and which does not allow conditional scope
3881 extension. Code following "`then`" (both looping and non-looping),
3882 "`else`" and "`case`" each get their own local scope.
3884 The type requirements on the code block in a `whilepart` are quite
3885 unusal. It is allowed to return a value of some identifiable type, in
3886 which case the loop aborts and an appropriate `casepart` is run, or it
3887 can return a Boolean, in which case the loop either continues to the
3888 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3889 This is different both from the `ifpart` code block which is expected to
3890 return a Boolean, or the `switchpart` code block which is expected to
3891 return the same type as the casepart values. The correct analysis of
3892 the type of the `whilepart` code block is the reason for the
3893 `Rboolok` flag which is passed to `propagate_types()`.
3895 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3896 defined. As there are two scopes which cover multiple parts - one for
3897 the whole statement and one for "while" and "do" - and as we will use
3898 the 'struct exec' to track scopes, we actually need two new types of
3899 exec. One is a `binode` for the looping part, the rest is the
3900 `cond_statement`. The `cond_statement` will use an auxilliary `struct
3901 casepart` to track a list of case parts.
3912 struct exec *action;
3913 struct casepart *next;
3915 struct cond_statement {
3917 struct exec *forpart, *condpart, *thenpart, *elsepart;
3918 struct binode *looppart;
3919 struct casepart *casepart;
3922 ###### ast functions
3924 static void free_casepart(struct casepart *cp)
3928 free_exec(cp->value);
3929 free_exec(cp->action);
3936 static void free_cond_statement(struct cond_statement *s)
3940 free_exec(s->forpart);
3941 free_exec(s->condpart);
3942 free_exec(s->looppart);
3943 free_exec(s->thenpart);
3944 free_exec(s->elsepart);
3945 free_casepart(s->casepart);
3949 ###### free exec cases
3950 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3952 ###### ComplexStatement Grammar
3953 | CondStatement ${ $0 = $<1; }$
3955 ###### expr precedence
3956 $TERM for then while do
3963 // A CondStatement must end with EOL, as does CondSuffix and
3965 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3966 // may or may not end with EOL
3967 // WhilePart and IfPart include an appropriate Suffix
3969 // ForPart, SwitchPart, and IfPart open scopes, o we have to close
3970 // them. WhilePart opens and closes its own scope.
3971 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3974 $0->thenpart = $<TP;
3975 $0->looppart = $<WP;
3976 var_block_close(c, CloseSequential, $0);
3978 | ForPart OptNL WhilePart CondSuffix ${
3981 $0->looppart = $<WP;
3982 var_block_close(c, CloseSequential, $0);
3984 | WhilePart CondSuffix ${
3986 $0->looppart = $<WP;
3988 | SwitchPart OptNL CasePart CondSuffix ${
3990 $0->condpart = $<SP;
3991 $CP->next = $0->casepart;
3992 $0->casepart = $<CP;
3993 var_block_close(c, CloseSequential, $0);
3995 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3997 $0->condpart = $<SP;
3998 $CP->next = $0->casepart;
3999 $0->casepart = $<CP;
4000 var_block_close(c, CloseSequential, $0);
4002 | IfPart IfSuffix ${
4004 $0->condpart = $IP.condpart; $IP.condpart = NULL;
4005 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
4006 // This is where we close an "if" statement
4007 var_block_close(c, CloseSequential, $0);
4010 CondSuffix -> IfSuffix ${
4013 | Newlines CasePart CondSuffix ${
4015 $CP->next = $0->casepart;
4016 $0->casepart = $<CP;
4018 | CasePart CondSuffix ${
4020 $CP->next = $0->casepart;
4021 $0->casepart = $<CP;
4024 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
4025 | Newlines ElsePart ${ $0 = $<EP; }$
4026 | ElsePart ${$0 = $<EP; }$
4028 ElsePart -> else OpenBlock Newlines ${
4029 $0 = new(cond_statement);
4030 $0->elsepart = $<OB;
4031 var_block_close(c, CloseElse, $0->elsepart);
4033 | else OpenScope CondStatement ${
4034 $0 = new(cond_statement);
4035 $0->elsepart = $<CS;
4036 var_block_close(c, CloseElse, $0->elsepart);
4040 CasePart -> case Expression OpenScope ColonBlock ${
4041 $0 = calloc(1,sizeof(struct casepart));
4044 var_block_close(c, CloseParallel, $0->action);
4048 // These scopes are closed in CondStatement
4049 ForPart -> for OpenBlock ${
4053 ThenPart -> then OpenBlock ${
4055 var_block_close(c, CloseSequential, $0);
4059 // This scope is closed in CondStatement
4060 WhilePart -> while UseBlock OptNL do OpenBlock ${
4065 var_block_close(c, CloseSequential, $0->right);
4066 var_block_close(c, CloseSequential, $0);
4068 | while OpenScope Expression OpenScope ColonBlock ${
4073 var_block_close(c, CloseSequential, $0->right);
4074 var_block_close(c, CloseSequential, $0);
4078 IfPart -> if UseBlock OptNL then OpenBlock ${
4081 var_block_close(c, CloseParallel, $0.thenpart);
4083 | if OpenScope Expression OpenScope ColonBlock ${
4086 var_block_close(c, CloseParallel, $0.thenpart);
4088 | if OpenScope Expression OpenScope OptNL then Block ${
4091 var_block_close(c, CloseParallel, $0.thenpart);
4095 // This scope is closed in CondStatement
4096 SwitchPart -> switch OpenScope Expression ${
4099 | switch UseBlock ${
4103 ###### print binode cases
4105 if (b->left && b->left->type == Xbinode &&
4106 cast(binode, b->left)->op == Block) {
4108 do_indent(indent, "while {\n");
4110 do_indent(indent, "while\n");
4111 print_exec(b->left, indent+1, bracket);
4113 do_indent(indent, "} do {\n");
4115 do_indent(indent, "do\n");
4116 print_exec(b->right, indent+1, bracket);
4118 do_indent(indent, "}\n");
4120 do_indent(indent, "while ");
4121 print_exec(b->left, 0, bracket);
4126 print_exec(b->right, indent+1, bracket);
4128 do_indent(indent, "}\n");
4132 ###### print exec cases
4134 case Xcond_statement:
4136 struct cond_statement *cs = cast(cond_statement, e);
4137 struct casepart *cp;
4139 do_indent(indent, "for");
4140 if (bracket) printf(" {\n"); else printf("\n");
4141 print_exec(cs->forpart, indent+1, bracket);
4144 do_indent(indent, "} then {\n");
4146 do_indent(indent, "then\n");
4147 print_exec(cs->thenpart, indent+1, bracket);
4149 if (bracket) do_indent(indent, "}\n");
4152 print_exec(cs->looppart, indent, bracket);
4156 do_indent(indent, "switch");
4158 do_indent(indent, "if");
4159 if (cs->condpart && cs->condpart->type == Xbinode &&
4160 cast(binode, cs->condpart)->op == Block) {
4165 print_exec(cs->condpart, indent+1, bracket);
4167 do_indent(indent, "}\n");
4169 do_indent(indent, "then\n");
4170 print_exec(cs->thenpart, indent+1, bracket);
4174 print_exec(cs->condpart, 0, bracket);
4180 print_exec(cs->thenpart, indent+1, bracket);
4182 do_indent(indent, "}\n");
4187 for (cp = cs->casepart; cp; cp = cp->next) {
4188 do_indent(indent, "case ");
4189 print_exec(cp->value, -1, 0);
4194 print_exec(cp->action, indent+1, bracket);
4196 do_indent(indent, "}\n");
4199 do_indent(indent, "else");
4204 print_exec(cs->elsepart, indent+1, bracket);
4206 do_indent(indent, "}\n");
4211 ###### propagate binode cases
4213 t = propagate_types(b->right, c, ok, Tnone, 0);
4214 if (!type_compat(Tnone, t, 0))
4215 *ok = 0; // UNTESTED
4216 return propagate_types(b->left, c, ok, type, rules);
4218 ###### propagate exec cases
4219 case Xcond_statement:
4221 // forpart and looppart->right must return Tnone
4222 // thenpart must return Tnone if there is a loopart,
4223 // otherwise it is like elsepart.
4225 // be bool if there is no casepart
4226 // match casepart->values if there is a switchpart
4227 // either be bool or match casepart->value if there
4229 // elsepart and casepart->action must match the return type
4230 // expected of this statement.
4231 struct cond_statement *cs = cast(cond_statement, prog);
4232 struct casepart *cp;
4234 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
4235 if (!type_compat(Tnone, t, 0))
4236 *ok = 0; // UNTESTED
4239 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
4240 if (!type_compat(Tnone, t, 0))
4241 *ok = 0; // UNTESTED
4243 if (cs->casepart == NULL) {
4244 propagate_types(cs->condpart, c, ok, Tbool, 0);
4245 propagate_types(cs->looppart, c, ok, Tbool, 0);
4247 /* Condpart must match case values, with bool permitted */
4249 for (cp = cs->casepart;
4250 cp && !t; cp = cp->next)
4251 t = propagate_types(cp->value, c, ok, NULL, 0);
4252 if (!t && cs->condpart)
4253 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok); // UNTESTED
4254 if (!t && cs->looppart)
4255 t = propagate_types(cs->looppart, c, ok, NULL, Rboolok); // UNTESTED
4256 // Now we have a type (I hope) push it down
4258 for (cp = cs->casepart; cp; cp = cp->next)
4259 propagate_types(cp->value, c, ok, t, 0);
4260 propagate_types(cs->condpart, c, ok, t, Rboolok);
4261 propagate_types(cs->looppart, c, ok, t, Rboolok);
4264 // (if)then, else, and case parts must return expected type.
4265 if (!cs->looppart && !type)
4266 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
4268 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
4269 for (cp = cs->casepart;
4271 cp = cp->next) // UNTESTED
4272 type = propagate_types(cp->action, c, ok, NULL, rules); // UNTESTED
4275 propagate_types(cs->thenpart, c, ok, type, rules);
4276 propagate_types(cs->elsepart, c, ok, type, rules);
4277 for (cp = cs->casepart; cp ; cp = cp->next)
4278 propagate_types(cp->action, c, ok, type, rules);
4284 ###### interp binode cases
4286 // This just performs one iterration of the loop
4287 rv = interp_exec(c, b->left, &rvtype);
4288 if (rvtype == Tnone ||
4289 (rvtype == Tbool && rv.bool != 0))
4290 // cnd is Tnone or Tbool, doesn't need to be freed
4291 interp_exec(c, b->right, NULL);
4294 ###### interp exec cases
4295 case Xcond_statement:
4297 struct value v, cnd;
4298 struct type *vtype, *cndtype;
4299 struct casepart *cp;
4300 struct cond_statement *cs = cast(cond_statement, e);
4303 interp_exec(c, cs->forpart, NULL);
4305 while ((cnd = interp_exec(c, cs->looppart, &cndtype)),
4306 cndtype == Tnone || (cndtype == Tbool && cnd.bool != 0))
4307 interp_exec(c, cs->thenpart, NULL);
4309 cnd = interp_exec(c, cs->condpart, &cndtype);
4310 if ((cndtype == Tnone ||
4311 (cndtype == Tbool && cnd.bool != 0))) {
4312 // cnd is Tnone or Tbool, doesn't need to be freed
4313 rv = interp_exec(c, cs->thenpart, &rvtype);
4314 // skip else (and cases)
4318 for (cp = cs->casepart; cp; cp = cp->next) {
4319 v = interp_exec(c, cp->value, &vtype);
4320 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
4321 free_value(vtype, &v);
4322 free_value(cndtype, &cnd);
4323 rv = interp_exec(c, cp->action, &rvtype);
4326 free_value(vtype, &v);
4328 free_value(cndtype, &cnd);
4330 rv = interp_exec(c, cs->elsepart, &rvtype);
4337 ### Top level structure
4339 All the language elements so far can be used in various places. Now
4340 it is time to clarify what those places are.
4342 At the top level of a file there will be a number of declarations.
4343 Many of the things that can be declared haven't been described yet,
4344 such as functions, procedures, imports, and probably more.
4345 For now there are two sorts of things that can appear at the top
4346 level. They are predefined constants, `struct` types, and the `main`
4347 function. While the syntax will allow the `main` function to appear
4348 multiple times, that will trigger an error if it is actually attempted.
4350 The various declarations do not return anything. They store the
4351 various declarations in the parse context.
4353 ###### Parser: grammar
4356 Ocean -> OptNL DeclarationList
4358 ## declare terminals
4365 DeclarationList -> Declaration
4366 | DeclarationList Declaration
4368 Declaration -> ERROR Newlines ${
4369 tok_err(c, // UNTESTED
4370 "error: unhandled parse error", &$1);
4376 ## top level grammar
4380 ### The `const` section
4382 As well as being defined in with the code that uses them, constants
4383 can be declared at the top level. These have full-file scope, so they
4384 are always `InScope`. The value of a top level constant can be given
4385 as an expression, and this is evaluated immediately rather than in the
4386 later interpretation stage. Once we add functions to the language, we
4387 will need rules concern which, if any, can be used to define a top
4390 Constants are defined in a section that starts with the reserved word
4391 `const` and then has a block with a list of assignment statements.
4392 For syntactic consistency, these must use the double-colon syntax to
4393 make it clear that they are constants. Type can also be given: if
4394 not, the type will be determined during analysis, as with other
4397 As the types constants are inserted at the head of a list, printing
4398 them in the same order that they were read is not straight forward.
4399 We take a quadratic approach here and count the number of constants
4400 (variables of depth 0), then count down from there, each time
4401 searching through for the Nth constant for decreasing N.
4403 ###### top level grammar
4407 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4408 | const { SimpleConstList } Newlines
4409 | const IN OptNL ConstList OUT Newlines
4410 | const SimpleConstList Newlines
4412 ConstList -> ConstList SimpleConstLine
4414 SimpleConstList -> SimpleConstList ; Const
4417 SimpleConstLine -> SimpleConstList Newlines
4418 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4421 CType -> Type ${ $0 = $<1; }$
4424 Const -> IDENTIFIER :: CType = Expression ${ {
4428 v = var_decl(c, $1.txt);
4430 struct var *var = new_pos(var, $1);
4431 v->where_decl = var;
4436 v = var_ref(c, $1.txt);
4437 tok_err(c, "error: name already declared", &$1);
4438 type_err(c, "info: this is where '%v' was first declared",
4439 v->where_decl, NULL, 0, NULL);
4443 propagate_types($5, c, &ok, $3, 0);
4448 struct value res = interp_exec(c, $5, &v->type);
4449 global_alloc(c, v->type, v, &res);
4453 ###### print const decls
4458 while (target != 0) {
4460 for (v = context.in_scope; v; v=v->in_scope)
4461 if (v->depth == 0) {
4472 struct value *val = var_value(&context, v);
4473 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4474 type_print(v->type, stdout);
4476 if (v->type == Tstr)
4478 print_value(v->type, val);
4479 if (v->type == Tstr)
4487 ### Finally the whole `main` function.
4489 An Ocean program can currently have only one function - `main` - and
4490 that must exist. It expects an array of strings with a provided size.
4491 Following this is a `block` which is the code to execute.
4493 As this is the top level, several things are handled a bit
4495 The function is not interpreted by `interp_exec` as that isn't
4496 passed the argument list which the program requires. Similarly type
4497 analysis is a bit more interesting at this level.
4499 ###### top level grammar
4501 DeclareFunction -> MainFunction ${ {
4503 type_err(c, "\"main\" defined a second time",
4509 ###### print binode cases
4511 do_indent(indent, "func main(");
4512 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4513 struct variable *v = cast(var, b2->left)->var;
4515 print_exec(b2->left, 0, 0);
4517 type_print(v->type, stdout);
4523 print_exec(b->right, indent+1, bracket);
4525 do_indent(indent, "}\n");
4528 ###### propagate binode cases
4529 case Func: abort(); // NOTEST
4531 ###### core functions
4533 static int analyse_prog(struct exec *prog, struct parse_context *c)
4535 struct binode *bp = cast(binode, prog);
4539 struct type *argv_type;
4540 struct text argv_type_name = { " argv", 5 };
4545 argv_type = add_type(c, argv_type_name, &array_prototype);
4546 argv_type->array.member = Tstr;
4547 argv_type->array.unspec = 1;
4549 for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
4553 propagate_types(b->left, c, &ok, argv_type, 0);
4555 default: /* invalid */ // NOTEST
4556 propagate_types(b->left, c, &ok, Tnone, 0); // NOTEST
4562 propagate_types(bp->right, c, &ok, Tnone, 0);
4567 /* Make sure everything is still consistent */
4568 propagate_types(bp->right, c, &ok, Tnone, 0);
4570 return 0; // UNTESTED
4575 static void interp_prog(struct parse_context *c, struct exec *prog,
4576 int argc, char **argv)
4578 struct binode *p = cast(binode, prog);
4586 al = cast(binode, p->left);
4588 struct var *v = cast(var, al->left);
4589 struct value *vl = var_value(c, v->var);
4599 mpq_set_ui(argcq, argc, 1);
4600 memcpy(var_value(c, t->array.vsize), &argcq, sizeof(argcq));
4601 t->prepare_type(c, t, 0);
4602 array_init(v->var->type, vl);
4603 for (i = 0; i < argc; i++) {
4604 struct value *vl2 = vl->array + i * v->var->type->array.member->size;
4607 arg.str.txt = argv[i];
4608 arg.str.len = strlen(argv[i]);
4609 free_value(Tstr, vl2);
4610 dup_value(Tstr, &arg, vl2);
4614 al = cast(binode, al->right);
4616 v = interp_exec(c, p, &vtype);
4617 free_value(vtype, &v);
4620 ###### interp binode cases
4622 rv = interp_exec(c, b->right, &rvtype);
4625 ## And now to test it out.
4627 Having a language requires having a "hello world" program. I'll
4628 provide a little more than that: a program that prints "Hello world"
4629 finds the GCD of two numbers, prints the first few elements of
4630 Fibonacci, performs a binary search for a number, and a few other
4631 things which will likely grow as the languages grows.
4633 ###### File: oceani.mk
4636 @echo "===== DEMO ====="
4637 ./oceani --section "demo: hello" oceani.mdc 55 33
4643 four ::= 2 + 2 ; five ::= 10/2
4644 const pie ::= "I like Pie";
4645 cake ::= "The cake is"
4656 print "Hello World, what lovely oceans you have!"
4657 print "Are there", five, "?"
4658 print pi, pie, "but", cake
4660 A := $argv[1]; B := $argv[2]
4662 /* When a variable is defined in both branches of an 'if',
4663 * and used afterwards, the variables are merged.
4669 print "Is", A, "bigger than", B,"? ", bigger
4670 /* If a variable is not used after the 'if', no
4671 * merge happens, so types can be different
4674 double:string = "yes"
4675 print A, "is more than twice", B, "?", double
4678 print "double", B, "is", double
4683 if a > 0 and then b > 0:
4689 print "GCD of", A, "and", B,"is", a
4691 print a, "is not positive, cannot calculate GCD"
4693 print b, "is not positive, cannot calculate GCD"
4698 print "Fibonacci:", f1,f2,
4699 then togo = togo - 1
4707 /* Binary search... */
4712 mid := (lo + hi) / 2
4725 print "Yay, I found", target
4727 print "Closest I found was", lo
4732 // "middle square" PRNG. Not particularly good, but one my
4733 // Dad taught me - the first one I ever heard of.
4734 for i:=1; then i = i + 1; while i < size:
4735 n := list[i-1] * list[i-1]
4736 list[i] = (n / 100) % 10 000
4738 print "Before sort:",
4739 for i:=0; then i = i + 1; while i < size:
4743 for i := 1; then i=i+1; while i < size:
4744 for j:=i-1; then j=j-1; while j >= 0:
4745 if list[j] > list[j+1]:
4749 print " After sort:",
4750 for i:=0; then i = i + 1; while i < size:
4754 if 1 == 2 then print "yes"; else print "no"
4758 bob.alive = (bob.name == "Hello")
4759 print "bob", "is" if bob.alive else "isn't", "alive"