1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 The "func" clause currently only allows a "main" function to be
45 declared. That will be extended when proper function support is added.
47 An element that is present purely to make a usable language, and
48 without any expectation that they will remain, is the "print" statement
49 which performs simple output.
51 The current scalar types are "number", "Boolean", and "string".
52 Boolean will likely stay in its current form, the other two might, but
53 could just as easily be changed.
57 Versions of the interpreter which obviously do not support a complete
58 language will be named after creeks and streams. This one is Jamison
61 Once we have something reasonably resembling a complete language, the
62 names of rivers will be used.
63 Early versions of the compiler will be named after seas. Major
64 releases of the compiler will be named after oceans. Hopefully I will
65 be finished once I get to the Pacific Ocean release.
69 As well as parsing and executing a program, the interpreter can print
70 out the program from the parsed internal structure. This is useful
71 for validating the parsing.
72 So the main requirements of the interpreter are:
74 - Parse the program, possibly with tracing,
75 - Analyse the parsed program to ensure consistency,
77 - Execute the "main" function in the program, if no parsing or
78 consistency errors were found.
80 This is all performed by a single C program extracted with
83 There will be two formats for printing the program: a default and one
84 that uses bracketing. So a `--bracket` command line option is needed
85 for that. Normally the first code section found is used, however an
86 alternate section can be requested so that a file (such as this one)
87 can contain multiple programs. This is effected with the `--section`
90 This code must be compiled with `-fplan9-extensions` so that anonymous
91 structures can be used.
93 ###### File: oceani.mk
95 myCFLAGS := -Wall -g -fplan9-extensions
96 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
97 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
98 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
100 all :: $(LDLIBS) oceani
101 oceani.c oceani.h : oceani.mdc parsergen
102 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
103 oceani.mk: oceani.mdc md2c
106 oceani: oceani.o $(LDLIBS)
107 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
109 ###### Parser: header
111 struct parse_context;
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 ###### Parser: reduce
131 struct parse_context *c = config2context(config);
139 #include <sys/mman.h>
158 static char Usage[] =
159 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
160 static const struct option long_options[] = {
161 {"trace", 0, NULL, 't'},
162 {"print", 0, NULL, 'p'},
163 {"noexec", 0, NULL, 'n'},
164 {"brackets", 0, NULL, 'b'},
165 {"section", 1, NULL, 's'},
168 const char *options = "tpnbs";
170 static void pr_err(char *msg) // NOTEST
172 fprintf(stderr, "%s\n", msg); // NOTEST
175 int main(int argc, char *argv[])
180 struct section *s, *ss;
181 char *section = NULL;
182 struct parse_context context = {
184 .ignored = (1 << TK_mark),
185 .number_chars = ".,_+- ",
190 int doprint=0, dotrace=0, doexec=1, brackets=0;
192 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
195 case 't': dotrace=1; break;
196 case 'p': doprint=1; break;
197 case 'n': doexec=0; break;
198 case 'b': brackets=1; break;
199 case 's': section = optarg; break;
200 default: fprintf(stderr, Usage);
204 if (optind >= argc) {
205 fprintf(stderr, "oceani: no input file given\n");
208 fd = open(argv[optind], O_RDONLY);
210 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
213 context.file_name = argv[optind];
214 len = lseek(fd, 0, 2);
215 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
216 s = code_extract(file, file+len, pr_err);
218 fprintf(stderr, "oceani: could not find any code in %s\n",
223 ## context initialization
226 for (ss = s; ss; ss = ss->next) {
227 struct text sec = ss->section;
228 if (sec.len == strlen(section) &&
229 strncmp(sec.txt, section, sec.len) == 0)
233 fprintf(stderr, "oceani: cannot find section %s\n",
239 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
242 fprintf(stderr, "oceani: no main function found.\n");
243 context.parse_error = 1;
245 if (context.prog && doprint) {
248 print_exec(context.prog, 0, brackets);
250 if (context.prog && doexec && !context.parse_error) {
251 if (!analyse_prog(context.prog, &context)) {
252 fprintf(stderr, "oceani: type error in program - not running.\n");
255 interp_prog(&context, context.prog, argc - optind, argv+optind);
257 free_exec(context.prog);
260 struct section *t = s->next;
266 ## free context types
267 ## free context storage
268 exit(context.parse_error ? 1 : 0);
273 The four requirements of parse, analyse, print, interpret apply to
274 each language element individually so that is how most of the code
277 Three of the four are fairly self explanatory. The one that requires
278 a little explanation is the analysis step.
280 The current language design does not require the types of variables to
281 be declared, but they must still have a single type. Different
282 operations impose different requirements on the variables, for example
283 addition requires both arguments to be numeric, and assignment
284 requires the variable on the left to have the same type as the
285 expression on the right.
287 Analysis involves propagating these type requirements around and
288 consequently setting the type of each variable. If any requirements
289 are violated (e.g. a string is compared with a number) or if a
290 variable needs to have two different types, then an error is raised
291 and the program will not run.
293 If the same variable is declared in both branchs of an 'if/else', or
294 in all cases of a 'switch' then the multiple instances may be merged
295 into just one variable if the variable is referenced after the
296 conditional statement. When this happens, the types must naturally be
297 consistent across all the branches. When the variable is not used
298 outside the if, the variables in the different branches are distinct
299 and can be of different types.
301 Undeclared names may only appear in "use" statements and "case" expressions.
302 These names are given a type of "label" and a unique value.
303 This allows them to fill the role of a name in an enumerated type, which
304 is useful for testing the `switch` statement.
306 As we will see, the condition part of a `while` statement can return
307 either a Boolean or some other type. This requires that the expected
308 type that gets passed around comprises a type and a flag to indicate
309 that `Tbool` is also permitted.
311 As there are, as yet, no distinct types that are compatible, there
312 isn't much subtlety in the analysis. When we have distinct number
313 types, this will become more interesting.
317 When analysis discovers an inconsistency it needs to report an error;
318 just refusing to run the code ensures that the error doesn't cascade,
319 but by itself it isn't very useful. A clear understanding of the sort
320 of error message that are useful will help guide the process of
323 At a simplistic level, the only sort of error that type analysis can
324 report is that the type of some construct doesn't match a contextual
325 requirement. For example, in `4 + "hello"` the addition provides a
326 contextual requirement for numbers, but `"hello"` is not a number. In
327 this particular example no further information is needed as the types
328 are obvious from local information. When a variable is involved that
329 isn't the case. It may be helpful to explain why the variable has a
330 particular type, by indicating the location where the type was set,
331 whether by declaration or usage.
333 Using a recursive-descent analysis we can easily detect a problem at
334 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
335 will detect that one argument is not a number and the usage of `hello`
336 will detect that a number was wanted, but not provided. In this
337 (early) version of the language, we will generate error reports at
338 multiple locations, so the use of `hello` will report an error and
339 explain were the value was set, and the addition will report an error
340 and say why numbers are needed. To be able to report locations for
341 errors, each language element will need to record a file location
342 (line and column) and each variable will need to record the language
343 element where its type was set. For now we will assume that each line
344 of an error message indicates one location in the file, and up to 2
345 types. So we provide a `printf`-like function which takes a format, a
346 location (a `struct exec` which has not yet been introduced), and 2
347 types. "`%1`" reports the first type, "`%2`" reports the second. We
348 will need a function to print the location, once we know how that is
349 stored. e As will be explained later, there are sometimes extra rules for
350 type matching and they might affect error messages, we need to pass those
353 As well as type errors, we sometimes need to report problems with
354 tokens, which might be unexpected or might name a type that has not
355 been defined. For these we have `tok_err()` which reports an error
356 with a given token. Each of the error functions sets the flag in the
357 context so indicate that parsing failed.
361 static void fput_loc(struct exec *loc, FILE *f);
363 ###### core functions
365 static void type_err(struct parse_context *c,
366 char *fmt, struct exec *loc,
367 struct type *t1, int rules, struct type *t2)
369 fprintf(stderr, "%s:", c->file_name);
370 fput_loc(loc, stderr);
371 for (; *fmt ; fmt++) {
378 case '%': fputc(*fmt, stderr); break; // NOTEST
379 default: fputc('?', stderr); break; // NOTEST
381 type_print(t1, stderr);
384 type_print(t2, stderr);
393 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
395 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
396 t->txt.len, t->txt.txt);
400 ## Entities: declared and predeclared.
402 There are various "things" that the language and/or the interpreter
403 needs to know about to parse and execute a program. These include
404 types, variables, values, and executable code. These are all lumped
405 together under the term "entities" (calling them "objects" would be
406 confusing) and introduced here. The following section will present the
407 different specific code elements which comprise or manipulate these
412 Values come in a wide range of types, with more likely to be added.
413 Each type needs to be able to print its own values (for convenience at
414 least) as well as to compare two values, at least for equality and
415 possibly for order. For now, values might need to be duplicated and
416 freed, though eventually such manipulations will be better integrated
419 Rather than requiring every numeric type to support all numeric
420 operations (add, multiple, etc), we allow types to be able to present
421 as one of a few standard types: integer, float, and fraction. The
422 existence of these conversion functions eventually enable types to
423 determine if they are compatible with other types, though such types
424 have not yet been implemented.
426 Named type are stored in a simple linked list. Objects of each type are
427 "values" which are often passed around by value.
434 ## value union fields
442 void (*init)(struct type *type, struct value *val);
443 void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
444 void (*print)(struct type *type, struct value *val);
445 void (*print_type)(struct type *type, FILE *f);
446 int (*cmp_order)(struct type *t1, struct type *t2,
447 struct value *v1, struct value *v2);
448 int (*cmp_eq)(struct type *t1, struct type *t2,
449 struct value *v1, struct value *v2);
450 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
451 void (*free)(struct type *type, struct value *val);
452 void (*free_type)(struct type *t);
453 long long (*to_int)(struct value *v);
454 double (*to_float)(struct value *v);
455 int (*to_mpq)(mpq_t *q, struct value *v);
464 struct type *typelist;
468 static struct type *find_type(struct parse_context *c, struct text s)
470 struct type *l = c->typelist;
473 text_cmp(l->name, s) != 0)
478 static struct type *add_type(struct parse_context *c, struct text s,
483 n = calloc(1, sizeof(*n));
486 n->next = c->typelist;
491 static void free_type(struct type *t)
493 /* The type is always a reference to something in the
494 * context, so we don't need to free anything.
498 static void free_value(struct type *type, struct value *v)
504 static void type_print(struct type *type, FILE *f)
507 fputs("*unknown*type*", f); // NOTEST
508 else if (type->name.len)
509 fprintf(f, "%.*s", type->name.len, type->name.txt);
510 else if (type->print_type)
511 type->print_type(type, f);
513 fputs("*invalid*type*", f); // NOTEST
516 static void val_init(struct type *type, struct value *val)
518 if (type && type->init)
519 type->init(type, val);
522 static void dup_value(struct type *type,
523 struct value *vold, struct value *vnew)
525 if (type && type->dup)
526 type->dup(type, vold, vnew);
529 static int value_cmp(struct type *tl, struct type *tr,
530 struct value *left, struct value *right)
532 if (tl && tl->cmp_order)
533 return tl->cmp_order(tl, tr, left, right);
534 if (tl && tl->cmp_eq) // NOTEST
535 return tl->cmp_eq(tl, tr, left, right); // NOTEST
539 static void print_value(struct type *type, struct value *v)
541 if (type && type->print)
542 type->print(type, v);
544 printf("*Unknown*"); // NOTEST
549 static void free_value(struct type *type, struct value *v);
550 static int type_compat(struct type *require, struct type *have, int rules);
551 static void type_print(struct type *type, FILE *f);
552 static void val_init(struct type *type, struct value *v);
553 static void dup_value(struct type *type,
554 struct value *vold, struct value *vnew);
555 static int value_cmp(struct type *tl, struct type *tr,
556 struct value *left, struct value *right);
557 static void print_value(struct type *type, struct value *v);
559 ###### free context types
561 while (context.typelist) {
562 struct type *t = context.typelist;
564 context.typelist = t->next;
570 Type can be specified for local variables, for fields in a structure,
571 for formal parameters to functions, and possibly elsewhere. Different
572 rules may apply in different contexts. As a minimum, a named type may
573 always be used. Currently the type of a formal parameter can be
574 different from types in other contexts, so we have a separate grammar
580 Type -> IDENTIFIER ${
581 $0 = find_type(c, $1.txt);
584 "error: undefined type", &$1);
591 FormalType -> Type ${ $0 = $<1; }$
592 ## formal type grammar
596 Values of the base types can be numbers, which we represent as
597 multi-precision fractions, strings, Booleans and labels. When
598 analysing the program we also need to allow for places where no value
599 is meaningful (type `Tnone`) and where we don't know what type to
600 expect yet (type is `NULL`).
602 Values are never shared, they are always copied when used, and freed
603 when no longer needed.
605 When propagating type information around the program, we need to
606 determine if two types are compatible, where type `NULL` is compatible
607 with anything. There are two special cases with type compatibility,
608 both related to the Conditional Statement which will be described
609 later. In some cases a Boolean can be accepted as well as some other
610 primary type, and in others any type is acceptable except a label (`Vlabel`).
611 A separate function encoding these cases will simplify some code later.
613 ###### type functions
615 int (*compat)(struct type *this, struct type *other);
619 static int type_compat(struct type *require, struct type *have, int rules)
621 if ((rules & Rboolok) && have == Tbool)
623 if ((rules & Rnolabel) && have == Tlabel)
625 if (!require || !have)
629 return require->compat(require, have);
631 return require == have;
636 #include "parse_string.h"
637 #include "parse_number.h"
640 myLDLIBS := libnumber.o libstring.o -lgmp
641 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
643 ###### type union fields
644 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
646 ###### value union fields
653 static void _free_value(struct type *type, struct value *v)
657 switch (type->vtype) {
659 case Vstr: free(v->str.txt); break;
660 case Vnum: mpq_clear(v->num); break;
666 ###### value functions
668 static void _val_init(struct type *type, struct value *val)
670 switch(type->vtype) {
671 case Vnone: // NOTEST
674 mpq_init(val->num); break;
676 val->str.txt = malloc(1);
688 static void _dup_value(struct type *type,
689 struct value *vold, struct value *vnew)
691 switch (type->vtype) {
692 case Vnone: // NOTEST
695 vnew->label = vold->label;
698 vnew->bool = vold->bool;
702 mpq_set(vnew->num, vold->num);
705 vnew->str.len = vold->str.len;
706 vnew->str.txt = malloc(vnew->str.len);
707 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
712 static int _value_cmp(struct type *tl, struct type *tr,
713 struct value *left, struct value *right)
717 return tl - tr; // NOTEST
719 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
720 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
721 case Vstr: cmp = text_cmp(left->str, right->str); break;
722 case Vbool: cmp = left->bool - right->bool; break;
723 case Vnone: cmp = 0; // NOTEST
728 static void _print_value(struct type *type, struct value *v)
730 switch (type->vtype) {
731 case Vnone: // NOTEST
732 printf("*no-value*"); break; // NOTEST
733 case Vlabel: // NOTEST
734 printf("*label-%p*", v->label); break; // NOTEST
736 printf("%.*s", v->str.len, v->str.txt); break;
738 printf("%s", v->bool ? "True":"False"); break;
743 mpf_set_q(fl, v->num);
744 gmp_printf("%Fg", fl);
751 static void _free_value(struct type *type, struct value *v);
753 static struct type base_prototype = {
755 .print = _print_value,
756 .cmp_order = _value_cmp,
757 .cmp_eq = _value_cmp,
762 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
765 static struct type *add_base_type(struct parse_context *c, char *n,
766 enum vtype vt, int size)
768 struct text txt = { n, strlen(n) };
771 t = add_type(c, txt, &base_prototype);
774 t->align = size > sizeof(void*) ? sizeof(void*) : size;
775 if (t->size & (t->align - 1))
776 t->size = (t->size | (t->align - 1)) + 1; // NOTEST
780 ###### context initialization
782 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
783 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
784 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
785 Tnone = add_base_type(&context, "none", Vnone, 0);
786 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
790 Variables are scoped named values. We store the names in a linked list
791 of "bindings" sorted in lexical order, and use sequential search and
798 struct binding *next; // in lexical order
802 This linked list is stored in the parse context so that "reduce"
803 functions can find or add variables, and so the analysis phase can
804 ensure that every variable gets a type.
808 struct binding *varlist; // In lexical order
812 static struct binding *find_binding(struct parse_context *c, struct text s)
814 struct binding **l = &c->varlist;
819 (cmp = text_cmp((*l)->name, s)) < 0)
823 n = calloc(1, sizeof(*n));
830 Each name can be linked to multiple variables defined in different
831 scopes. Each scope starts where the name is declared and continues
832 until the end of the containing code block. Scopes of a given name
833 cannot nest, so a declaration while a name is in-scope is an error.
835 ###### binding fields
836 struct variable *var;
840 struct variable *previous;
842 struct binding *name;
843 struct exec *where_decl;// where name was declared
844 struct exec *where_set; // where type was set
848 While the naming seems strange, we include local constants in the
849 definition of variables. A name declared `var := value` can
850 subsequently be changed, but a name declared `var ::= value` cannot -
853 ###### variable fields
856 Scopes in parallel branches can be partially merged. More
857 specifically, if a given name is declared in both branches of an
858 if/else then its scope is a candidate for merging. Similarly if
859 every branch of an exhaustive switch (e.g. has an "else" clause)
860 declares a given name, then the scopes from the branches are
861 candidates for merging.
863 Note that names declared inside a loop (which is only parallel to
864 itself) are never visible after the loop. Similarly names defined in
865 scopes which are not parallel, such as those started by `for` and
866 `switch`, are never visible after the scope. Only variables defined in
867 both `then` and `else` (including the implicit then after an `if`, and
868 excluding `then` used with `for`) and in all `case`s and `else` of a
869 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
871 Labels, which are a bit like variables, follow different rules.
872 Labels are not explicitly declared, but if an undeclared name appears
873 in a context where a label is legal, that effectively declares the
874 name as a label. The declaration remains in force (or in scope) at
875 least to the end of the immediately containing block and conditionally
876 in any larger containing block which does not declare the name in some
877 other way. Importantly, the conditional scope extension happens even
878 if the label is only used in one parallel branch of a conditional --
879 when used in one branch it is treated as having been declared in all
882 Merge candidates are tentatively visible beyond the end of the
883 branching statement which creates them. If the name is used, the
884 merge is affirmed and they become a single variable visible at the
885 outer layer. If not - if it is redeclared first - the merge lapses.
887 To track scopes we have an extra stack, implemented as a linked list,
888 which roughly parallels the parse stack and which is used exclusively
889 for scoping. When a new scope is opened, a new frame is pushed and
890 the child-count of the parent frame is incremented. This child-count
891 is used to distinguish between the first of a set of parallel scopes,
892 in which declared variables must not be in scope, and subsequent
893 branches, whether they may already be conditionally scoped.
895 To push a new frame *before* any code in the frame is parsed, we need a
896 grammar reduction. This is most easily achieved with a grammar
897 element which derives the empty string, and creates the new scope when
898 it is recognised. This can be placed, for example, between a keyword
899 like "if" and the code following it.
903 struct scope *parent;
909 struct scope *scope_stack;
912 static void scope_pop(struct parse_context *c)
914 struct scope *s = c->scope_stack;
916 c->scope_stack = s->parent;
921 static void scope_push(struct parse_context *c)
923 struct scope *s = calloc(1, sizeof(*s));
925 c->scope_stack->child_count += 1;
926 s->parent = c->scope_stack;
934 OpenScope -> ${ scope_push(c); }$
936 Each variable records a scope depth and is in one of four states:
938 - "in scope". This is the case between the declaration of the
939 variable and the end of the containing block, and also between
940 the usage with affirms a merge and the end of that block.
942 The scope depth is not greater than the current parse context scope
943 nest depth. When the block of that depth closes, the state will
944 change. To achieve this, all "in scope" variables are linked
945 together as a stack in nesting order.
947 - "pending". The "in scope" block has closed, but other parallel
948 scopes are still being processed. So far, every parallel block at
949 the same level that has closed has declared the name.
951 The scope depth is the depth of the last parallel block that
952 enclosed the declaration, and that has closed.
954 - "conditionally in scope". The "in scope" block and all parallel
955 scopes have closed, and no further mention of the name has been
956 seen. This state includes a secondary nest depth which records the
957 outermost scope seen since the variable became conditionally in
958 scope. If a use of the name is found, the variable becomes "in
959 scope" and that secondary depth becomes the recorded scope depth.
960 If the name is declared as a new variable, the old variable becomes
961 "out of scope" and the recorded scope depth stays unchanged.
963 - "out of scope". The variable is neither in scope nor conditionally
964 in scope. It is permanently out of scope now and can be removed from
965 the "in scope" stack.
967 ###### variable fields
968 int depth, min_depth;
969 enum { OutScope, PendingScope, CondScope, InScope } scope;
970 struct variable *in_scope;
974 struct variable *in_scope;
976 All variables with the same name are linked together using the
977 'previous' link. Those variable that have been affirmatively merged all
978 have a 'merged' pointer that points to one primary variable - the most
979 recently declared instance. When merging variables, we need to also
980 adjust the 'merged' pointer on any other variables that had previously
981 been merged with the one that will no longer be primary.
983 A variable that is no longer the most recent instance of a name may
984 still have "pending" scope, if it might still be merged with most
985 recent instance. These variables don't really belong in the
986 "in_scope" list, but are not immediately removed when a new instance
987 is found. Instead, they are detected and ignored when considering the
988 list of in_scope names.
990 The storage of the value of a variable will be described later. For now
991 we just need to know that when a variable goes out of scope, it might
992 need to be freed. For this we need to be able to find it, so assume that
993 `var_value()` will provide that.
995 ###### variable fields
996 struct variable *merged;
1000 static void variable_merge(struct variable *primary, struct variable *secondary)
1004 primary = primary->merged;
1006 for (v = primary->previous; v; v=v->previous)
1007 if (v == secondary || v == secondary->merged ||
1008 v->merged == secondary ||
1009 v->merged == secondary->merged) {
1010 v->scope = OutScope;
1011 v->merged = primary;
1015 ###### forward decls
1016 static struct value *var_value(struct parse_context *c, struct variable *v);
1018 ###### free context vars
1020 while (context.varlist) {
1021 struct binding *b = context.varlist;
1022 struct variable *v = b->var;
1023 context.varlist = b->next;
1026 struct variable *t = v;
1029 free_value(t->type, var_value(&context, t));
1031 // This is a global constant
1032 free_exec(t->where_decl);
1037 #### Manipulating Bindings
1039 When a name is conditionally visible, a new declaration discards the
1040 old binding - the condition lapses. Conversely a usage of the name
1041 affirms the visibility and extends it to the end of the containing
1042 block - i.e. the block that contains both the original declaration and
1043 the latest usage. This is determined from `min_depth`. When a
1044 conditionally visible variable gets affirmed like this, it is also
1045 merged with other conditionally visible variables with the same name.
1047 When we parse a variable declaration we either report an error if the
1048 name is currently bound, or create a new variable at the current nest
1049 depth if the name is unbound or bound to a conditionally scoped or
1050 pending-scope variable. If the previous variable was conditionally
1051 scoped, it and its homonyms becomes out-of-scope.
1053 When we parse a variable reference (including non-declarative assignment
1054 "foo = bar") we report an error if the name is not bound or is bound to
1055 a pending-scope variable; update the scope if the name is bound to a
1056 conditionally scoped variable; or just proceed normally if the named
1057 variable is in scope.
1059 When we exit a scope, any variables bound at this level are either
1060 marked out of scope or pending-scoped, depending on whether the scope
1061 was sequential or parallel. Here a "parallel" scope means the "then"
1062 or "else" part of a conditional, or any "case" or "else" branch of a
1063 switch. Other scopes are "sequential".
1065 When exiting a parallel scope we check if there are any variables that
1066 were previously pending and are still visible. If there are, then
1067 there weren't redeclared in the most recent scope, so they cannot be
1068 merged and must become out-of-scope. If it is not the first of
1069 parallel scopes (based on `child_count`), we check that there was a
1070 previous binding that is still pending-scope. If there isn't, the new
1071 variable must now be out-of-scope.
1073 When exiting a sequential scope that immediately enclosed parallel
1074 scopes, we need to resolve any pending-scope variables. If there was
1075 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1076 we need to mark all pending-scope variable as out-of-scope. Otherwise
1077 all pending-scope variables become conditionally scoped.
1080 enum closetype { CloseSequential, CloseParallel, CloseElse };
1082 ###### ast functions
1084 static struct variable *var_decl(struct parse_context *c, struct text s)
1086 struct binding *b = find_binding(c, s);
1087 struct variable *v = b->var;
1089 switch (v ? v->scope : OutScope) {
1091 /* Caller will report the error */
1095 v && v->scope == CondScope;
1097 v->scope = OutScope;
1101 v = calloc(1, sizeof(*v));
1102 v->previous = b->var;
1106 v->min_depth = v->depth = c->scope_depth;
1108 v->in_scope = c->in_scope;
1113 static struct variable *var_ref(struct parse_context *c, struct text s)
1115 struct binding *b = find_binding(c, s);
1116 struct variable *v = b->var;
1117 struct variable *v2;
1119 switch (v ? v->scope : OutScope) {
1122 /* Caller will report the error */
1125 /* All CondScope variables of this name need to be merged
1126 * and become InScope
1128 v->depth = v->min_depth;
1130 for (v2 = v->previous;
1131 v2 && v2->scope == CondScope;
1133 variable_merge(v, v2);
1141 static void var_block_close(struct parse_context *c, enum closetype ct)
1143 /* Close off all variables that are in_scope */
1144 struct variable *v, **vp, *v2;
1147 for (vp = &c->in_scope;
1148 (v = *vp) && v->min_depth > c->scope_depth;
1149 (v->scope == OutScope || v->name->var != v)
1150 ? (*vp = v->in_scope, 0)
1151 : ( vp = &v->in_scope, 0)) {
1152 if (v->name->var != v) {
1153 /* This is still in scope, but we haven't just
1160 case CloseParallel: /* handle PendingScope */
1164 if (c->scope_stack->child_count == 1)
1165 v->scope = PendingScope;
1166 else if (v->previous &&
1167 v->previous->scope == PendingScope)
1168 v->scope = PendingScope;
1169 else if (v->type == Tlabel) // UNTESTED
1170 v->scope = PendingScope; // UNTESTED
1171 else if (v->name->var == v) // UNTESTED
1172 v->scope = OutScope; // UNTESTED
1173 if (ct == CloseElse) {
1174 /* All Pending variables with this name
1175 * are now Conditional */
1177 v2 && v2->scope == PendingScope;
1179 v2->scope = CondScope;
1184 v2 && v2->scope == PendingScope;
1186 if (v2->type != Tlabel)
1187 v2->scope = OutScope;
1189 case OutScope: break; // UNTESTED
1192 case CloseSequential:
1193 if (v->type == Tlabel)
1194 v->scope = PendingScope;
1197 v->scope = OutScope;
1200 /* There was no 'else', so we can only become
1201 * conditional if we know the cases were exhaustive,
1202 * and that doesn't mean anything yet.
1203 * So only labels become conditional..
1206 v2 && v2->scope == PendingScope;
1208 if (v2->type == Tlabel) {
1209 v2->scope = CondScope;
1210 v2->min_depth = c->scope_depth;
1212 v2->scope = OutScope;
1215 case OutScope: break;
1224 The value of a variable is store separately from the variable, on an
1225 analogue of a stack frame. There are (currently) two frames that can be
1226 active. A global frame which currently only stores constants, and a
1227 stacked frame which stores local variables. Each variable knows if it
1228 is global or not, and what its index into the frame is.
1230 Values in the global frame are known immediately they are relevant, so
1231 the frame needs to be reallocated as it grows so it can store those
1232 values. The local frame doesn't get values until the interpreted phase
1233 is started, so there is no need to allocate until the size is known.
1235 ###### variable fields
1239 ###### parse context
1241 short global_size, global_alloc;
1243 void *global, *local;
1245 ###### ast functions
1247 static struct value *var_value(struct parse_context *c, struct variable *v)
1250 if (!c->local || !v->type)
1252 if (v->frame_pos + v->type->size > c->local_size) {
1253 printf("INVALID frame_pos\n"); // NOTEST
1256 return c->local + v->frame_pos;
1258 if (c->global_size > c->global_alloc) {
1259 int old = c->global_alloc;
1260 c->global_alloc = (c->global_size | 1023) + 1024;
1261 c->global = realloc(c->global, c->global_alloc);
1262 memset(c->global + old, 0, c->global_alloc - old);
1264 return c->global + v->frame_pos;
1267 static struct value *global_alloc(struct parse_context *c, struct type *t,
1268 struct variable *v, struct value *init)
1271 struct variable scratch;
1273 if (t->prepare_type)
1274 t->prepare_type(c, t, 1); // NOTEST
1276 if (c->global_size & (t->align - 1))
1277 c->global_size = (c->global_size + t->align) & ~(t->align-1); // UNTESTED
1282 v->frame_pos = c->global_size;
1284 c->global_size += v->type->size;
1285 ret = var_value(c, v);
1287 memcpy(ret, init, t->size);
1293 As global values are found -- struct field initializers, labels etc --
1294 `global_alloc()` is called to record the value in the global frame.
1296 When the program is fully parsed, we need to walk the list of variables
1297 to find any that weren't merged away and that aren't global, and to
1298 calculate the frame size and assign a frame position for each variable.
1299 For this we have `scope_finalize()`.
1301 ###### ast functions
1303 static void scope_finalize(struct parse_context *c)
1307 for (b = c->varlist; b; b = b->next) {
1309 for (v = b->var; v; v = v->previous) {
1310 struct type *t = v->type;
1315 if (c->local_size & (t->align - 1))
1316 c->local_size = (c->local_size + t->align) & ~(t->align-1);
1317 v->frame_pos = c->local_size;
1318 c->local_size += v->type->size;
1321 c->local = calloc(1, c->local_size);
1324 ###### free context storage
1325 free(context.global);
1326 free(context.local);
1330 Executables can be lots of different things. In many cases an
1331 executable is just an operation combined with one or two other
1332 executables. This allows for expressions and lists etc. Other times an
1333 executable is something quite specific like a constant or variable name.
1334 So we define a `struct exec` to be a general executable with a type, and
1335 a `struct binode` which is a subclass of `exec`, forms a node in a
1336 binary tree, and holds an operation. There will be other subclasses,
1337 and to access these we need to be able to `cast` the `exec` into the
1338 various other types. The first field in any `struct exec` is the type
1339 from the `exec_types` enum.
1342 #define cast(structname, pointer) ({ \
1343 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1344 if (__mptr && *__mptr != X##structname) abort(); \
1345 (struct structname *)( (char *)__mptr);})
1347 #define new(structname) ({ \
1348 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1349 __ptr->type = X##structname; \
1350 __ptr->line = -1; __ptr->column = -1; \
1353 #define new_pos(structname, token) ({ \
1354 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1355 __ptr->type = X##structname; \
1356 __ptr->line = token.line; __ptr->column = token.col; \
1365 enum exec_types type;
1373 struct exec *left, *right;
1376 ###### ast functions
1378 static int __fput_loc(struct exec *loc, FILE *f)
1382 if (loc->line >= 0) {
1383 fprintf(f, "%d:%d: ", loc->line, loc->column);
1386 if (loc->type == Xbinode)
1387 return __fput_loc(cast(binode,loc)->left, f) ||
1388 __fput_loc(cast(binode,loc)->right, f); // NOTEST
1391 static void fput_loc(struct exec *loc, FILE *f)
1393 if (!__fput_loc(loc, f))
1394 fprintf(f, "??:??: "); // NOTEST
1397 Each different type of `exec` node needs a number of functions defined,
1398 a bit like methods. We must be able to free it, print it, analyse it
1399 and execute it. Once we have specific `exec` types we will need to
1400 parse them too. Let's take this a bit more slowly.
1404 The parser generator requires a `free_foo` function for each struct
1405 that stores attributes and they will often be `exec`s and subtypes
1406 there-of. So we need `free_exec` which can handle all the subtypes,
1407 and we need `free_binode`.
1409 ###### ast functions
1411 static void free_binode(struct binode *b)
1416 free_exec(b->right);
1420 ###### core functions
1421 static void free_exec(struct exec *e)
1430 ###### forward decls
1432 static void free_exec(struct exec *e);
1434 ###### free exec cases
1435 case Xbinode: free_binode(cast(binode, e)); break;
1439 Printing an `exec` requires that we know the current indent level for
1440 printing line-oriented components. As will become clear later, we
1441 also want to know what sort of bracketing to use.
1443 ###### ast functions
1445 static void do_indent(int i, char *str)
1452 ###### core functions
1453 static void print_binode(struct binode *b, int indent, int bracket)
1457 ## print binode cases
1461 static void print_exec(struct exec *e, int indent, int bracket)
1467 print_binode(cast(binode, e), indent, bracket); break;
1472 ###### forward decls
1474 static void print_exec(struct exec *e, int indent, int bracket);
1478 As discussed, analysis involves propagating type requirements around the
1479 program and looking for errors.
1481 So `propagate_types` is passed an expected type (being a `struct type`
1482 pointer together with some `val_rules` flags) that the `exec` is
1483 expected to return, and returns the type that it does return, either
1484 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1485 by reference. It is set to `0` when an error is found, and `2` when
1486 any change is made. If it remains unchanged at `1`, then no more
1487 propagation is needed.
1491 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1495 if (rules & Rnolabel)
1496 fputs(" (labels not permitted)", stderr);
1499 ###### core functions
1501 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1502 struct type *type, int rules);
1503 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1504 struct type *type, int rules)
1511 switch (prog->type) {
1514 struct binode *b = cast(binode, prog);
1516 ## propagate binode cases
1520 ## propagate exec cases
1525 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1526 struct type *type, int rules)
1528 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1537 Interpreting an `exec` doesn't require anything but the `exec`. State
1538 is stored in variables and each variable will be directly linked from
1539 within the `exec` tree. The exception to this is the `main` function
1540 which needs to look at command line arguments. This function will be
1541 interpreted separately.
1543 Each `exec` can return a value combined with a type in `struct lrval`.
1544 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1545 the location of a value, which can be updated, in `lval`. Others will
1546 set `lval` to NULL indicating that there is a value of appropriate type
1549 ###### core functions
1553 struct value rval, *lval;
1556 static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
1558 static struct value interp_exec(struct parse_context *c, struct exec *e,
1559 struct type **typeret)
1561 struct lrval ret = _interp_exec(c, e);
1563 if (!ret.type) abort();
1565 *typeret = ret.type;
1567 dup_value(ret.type, ret.lval, &ret.rval);
1571 static struct value *linterp_exec(struct parse_context *c, struct exec *e,
1572 struct type **typeret)
1574 struct lrval ret = _interp_exec(c, e);
1577 *typeret = ret.type;
1579 free_value(ret.type, &ret.rval);
1583 static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
1586 struct value rv = {}, *lrv = NULL;
1587 struct type *rvtype;
1589 rvtype = ret.type = Tnone;
1599 struct binode *b = cast(binode, e);
1600 struct value left, right, *lleft;
1601 struct type *ltype, *rtype;
1602 ltype = rtype = Tnone;
1604 ## interp binode cases
1606 free_value(ltype, &left);
1607 free_value(rtype, &right);
1610 ## interp exec cases
1620 Now that we have the shape of the interpreter in place we can add some
1621 complex types and connected them in to the data structures and the
1622 different phases of parse, analyse, print, interpret.
1624 Thus far we have arrays and structs.
1628 Arrays can be declared by giving a size and a type, as `[size]type' so
1629 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1630 size can be either a literal number, or a named constant. Some day an
1631 arbitrary expression will be supported.
1633 As a formal parameter to a function, the array can be declared with a
1634 new variable as the size: `name:[size::number]string`. The `size`
1635 variable is set to the size of the array and must be a constant. As
1636 `number` is the only supported type, it can be left out:
1637 `name:[size::]string`.
1639 Arrays cannot be assigned. When pointers are introduced we will also
1640 introduce array slices which can refer to part or all of an array -
1641 the assignment syntax will create a slice. For now, an array can only
1642 ever be referenced by the name it is declared with. It is likely that
1643 a "`copy`" primitive will eventually be define which can be used to
1644 make a copy of an array with controllable recursive depth.
1646 For now we have two sorts of array, those with fixed size either because
1647 it is given as a literal number or because it is a struct member (which
1648 cannot have a runtime-changing size), and those with a size that is
1649 determined at runtime - local variables with a const size. The former
1650 have their size calculated at parse time, the latter at run time.
1652 For the latter type, the `size` field of the type is the size of a
1653 pointer, and the array is reallocated every time it comes into scope.
1655 We differentiate struct fields with a const size from local variables
1656 with a const size by whether they are prepared at parse time or not.
1658 ###### type union fields
1661 int unspec; // size is unspecified - vsize must be set.
1664 struct variable *vsize;
1665 struct type *member;
1668 ###### value union fields
1669 void *array; // used if not static_size
1671 ###### value functions
1673 static void array_prepare_type(struct parse_context *c, struct type *type,
1676 struct value *vsize;
1678 if (!type->array.vsize || type->array.static_size)
1681 vsize = var_value(c, type->array.vsize);
1683 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
1684 type->array.size = mpz_get_si(q);
1688 type->array.static_size = 1;
1689 type->size = type->array.size * type->array.member->size;
1690 type->align = type->array.member->align;
1694 static void array_init(struct type *type, struct value *val)
1697 void *ptr = val->ptr;
1701 if (!type->array.static_size) {
1702 val->array = calloc(type->array.size,
1703 type->array.member->size);
1706 for (i = 0; i < type->array.size; i++) {
1708 v = (void*)ptr + i * type->array.member->size;
1709 val_init(type->array.member, v);
1713 static void array_free(struct type *type, struct value *val)
1716 void *ptr = val->ptr;
1718 if (!type->array.static_size)
1720 for (i = 0; i < type->array.size; i++) {
1722 v = (void*)ptr + i * type->array.member->size;
1723 free_value(type->array.member, v);
1725 if (!type->array.static_size)
1729 static int array_compat(struct type *require, struct type *have)
1731 if (have->compat != require->compat)
1732 return 0; // UNTESTED
1733 /* Both are arrays, so we can look at details */
1734 if (!type_compat(require->array.member, have->array.member, 0))
1736 if (have->array.unspec && require->array.unspec) {
1737 if (have->array.vsize && require->array.vsize &&
1738 have->array.vsize != require->array.vsize) // UNTESTED
1739 /* sizes might not be the same */
1740 return 0; // UNTESTED
1743 if (have->array.unspec || require->array.unspec)
1744 return 1; // UNTESTED
1745 if (require->array.vsize == NULL && have->array.vsize == NULL)
1746 return require->array.size == have->array.size;
1748 return require->array.vsize == have->array.vsize; // UNTESTED
1751 static void array_print_type(struct type *type, FILE *f)
1754 if (type->array.vsize) {
1755 struct binding *b = type->array.vsize->name;
1756 fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
1757 type->array.unspec ? "::" : "");
1759 fprintf(f, "%d]", type->array.size);
1760 type_print(type->array.member, f);
1763 static struct type array_prototype = {
1765 .prepare_type = array_prepare_type,
1766 .print_type = array_print_type,
1767 .compat = array_compat,
1769 .size = sizeof(void*),
1770 .align = sizeof(void*),
1773 ###### declare terminals
1778 | [ NUMBER ] Type ${ {
1781 struct text noname = { "", 0 };
1784 $0 = t = add_type(c, noname, &array_prototype);
1785 t->array.member = $<4;
1786 t->array.vsize = NULL;
1787 if (number_parse(num, tail, $2.txt) == 0)
1788 tok_err(c, "error: unrecognised number", &$2);
1790 tok_err(c, "error: unsupported number suffix", &$2);
1792 t->array.size = mpz_get_ui(mpq_numref(num));
1793 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1794 tok_err(c, "error: array size must be an integer",
1796 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1797 tok_err(c, "error: array size is too large",
1801 t->array.static_size = 1;
1802 t->size = t->array.size * t->array.member->size;
1803 t->align = t->array.member->align;
1806 | [ IDENTIFIER ] Type ${ {
1807 struct variable *v = var_ref(c, $2.txt);
1808 struct text noname = { "", 0 };
1811 tok_err(c, "error: name undeclared", &$2);
1812 else if (!v->constant)
1813 tok_err(c, "error: array size must be a constant", &$2);
1815 $0 = add_type(c, noname, &array_prototype);
1816 $0->array.member = $<4;
1818 $0->array.vsize = v;
1823 OptType -> Type ${ $0 = $<1; }$
1826 ###### formal type grammar
1828 | [ IDENTIFIER :: OptType ] Type ${ {
1829 struct variable *v = var_decl(c, $ID.txt);
1830 struct text noname = { "", 0 };
1836 $0 = add_type(c, noname, &array_prototype);
1837 $0->array.member = $<6;
1839 $0->array.unspec = 1;
1840 $0->array.vsize = v;
1846 ###### variable grammar
1848 | Variable [ Expression ] ${ {
1849 struct binode *b = new(binode);
1856 ###### print binode cases
1858 print_exec(b->left, -1, bracket);
1860 print_exec(b->right, -1, bracket);
1864 ###### propagate binode cases
1866 /* left must be an array, right must be a number,
1867 * result is the member type of the array
1869 propagate_types(b->right, c, ok, Tnum, 0);
1870 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1871 if (!t || t->compat != array_compat) {
1872 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1875 if (!type_compat(type, t->array.member, rules)) {
1876 type_err(c, "error: have %1 but need %2", prog,
1877 t->array.member, rules, type);
1879 return t->array.member;
1883 ###### interp binode cases
1889 lleft = linterp_exec(c, b->left, <ype);
1890 right = interp_exec(c, b->right, &rtype);
1892 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1896 if (ltype->array.static_size)
1899 ptr = *(void**)lleft;
1900 rvtype = ltype->array.member;
1901 if (i >= 0 && i < ltype->array.size)
1902 lrv = ptr + i * rvtype->size;
1904 val_init(ltype->array.member, &rv);
1911 A `struct` is a data-type that contains one or more other data-types.
1912 It differs from an array in that each member can be of a different
1913 type, and they are accessed by name rather than by number. Thus you
1914 cannot choose an element by calculation, you need to know what you
1917 The language makes no promises about how a given structure will be
1918 stored in memory - it is free to rearrange fields to suit whatever
1919 criteria seems important.
1921 Structs are declared separately from program code - they cannot be
1922 declared in-line in a variable declaration like arrays can. A struct
1923 is given a name and this name is used to identify the type - the name
1924 is not prefixed by the word `struct` as it would be in C.
1926 Structs are only treated as the same if they have the same name.
1927 Simply having the same fields in the same order is not enough. This
1928 might change once we can create structure initializers from a list of
1931 Each component datum is identified much like a variable is declared,
1932 with a name, one or two colons, and a type. The type cannot be omitted
1933 as there is no opportunity to deduce the type from usage. An initial
1934 value can be given following an equals sign, so
1936 ##### Example: a struct type
1942 would declare a type called "complex" which has two number fields,
1943 each initialised to zero.
1945 Struct will need to be declared separately from the code that uses
1946 them, so we will need to be able to print out the declaration of a
1947 struct when reprinting the whole program. So a `print_type_decl` type
1948 function will be needed.
1950 ###### type union fields
1962 ###### type functions
1963 void (*print_type_decl)(struct type *type, FILE *f);
1965 ###### value functions
1967 static void structure_init(struct type *type, struct value *val)
1971 for (i = 0; i < type->structure.nfields; i++) {
1973 v = (void*) val->ptr + type->structure.fields[i].offset;
1974 if (type->structure.fields[i].init)
1975 dup_value(type->structure.fields[i].type,
1976 type->structure.fields[i].init,
1979 val_init(type->structure.fields[i].type, v);
1983 static void structure_free(struct type *type, struct value *val)
1987 for (i = 0; i < type->structure.nfields; i++) {
1989 v = (void*)val->ptr + type->structure.fields[i].offset;
1990 free_value(type->structure.fields[i].type, v);
1994 static void structure_free_type(struct type *t)
1997 for (i = 0; i < t->structure.nfields; i++)
1998 if (t->structure.fields[i].init) {
1999 free_value(t->structure.fields[i].type,
2000 t->structure.fields[i].init);
2002 free(t->structure.fields);
2005 static struct type structure_prototype = {
2006 .init = structure_init,
2007 .free = structure_free,
2008 .free_type = structure_free_type,
2009 .print_type_decl = structure_print_type,
2023 ###### free exec cases
2025 free_exec(cast(fieldref, e)->left);
2029 ###### declare terminals
2032 ###### variable grammar
2034 | Variable . IDENTIFIER ${ {
2035 struct fieldref *fr = new_pos(fieldref, $2);
2042 ###### print exec cases
2046 struct fieldref *f = cast(fieldref, e);
2047 print_exec(f->left, -1, bracket);
2048 printf(".%.*s", f->name.len, f->name.txt);
2052 ###### ast functions
2053 static int find_struct_index(struct type *type, struct text field)
2056 for (i = 0; i < type->structure.nfields; i++)
2057 if (text_cmp(type->structure.fields[i].name, field) == 0)
2062 ###### propagate exec cases
2066 struct fieldref *f = cast(fieldref, prog);
2067 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
2070 type_err(c, "error: unknown type for field access", f->left, // UNTESTED
2072 else if (st->init != structure_init)
2073 type_err(c, "error: field reference attempted on %1, not a struct",
2074 f->left, st, 0, NULL);
2075 else if (f->index == -2) {
2076 f->index = find_struct_index(st, f->name);
2078 type_err(c, "error: cannot find requested field in %1",
2079 f->left, st, 0, NULL);
2081 if (f->index >= 0) {
2082 struct type *ft = st->structure.fields[f->index].type;
2083 if (!type_compat(type, ft, rules))
2084 type_err(c, "error: have %1 but need %2", prog,
2091 ###### interp exec cases
2094 struct fieldref *f = cast(fieldref, e);
2096 struct value *lleft = linterp_exec(c, f->left, <ype);
2097 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
2098 rvtype = ltype->structure.fields[f->index].type;
2104 struct fieldlist *prev;
2108 ###### ast functions
2109 static void free_fieldlist(struct fieldlist *f)
2113 free_fieldlist(f->prev);
2115 free_value(f->f.type, f->f.init); // UNTESTED
2116 free(f->f.init); // UNTESTED
2121 ###### top level grammar
2122 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2124 add_type(c, $2.txt, &structure_prototype);
2126 struct fieldlist *f;
2128 for (f = $3; f; f=f->prev)
2131 t->structure.nfields = cnt;
2132 t->structure.fields = calloc(cnt, sizeof(struct field));
2135 int a = f->f.type->align;
2137 t->structure.fields[cnt] = f->f;
2138 if (t->size & (a-1))
2139 t->size = (t->size | (a-1)) + 1;
2140 t->structure.fields[cnt].offset = t->size;
2141 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2150 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2151 | { SimpleFieldList } ${ $0 = $<SFL; }$
2152 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2153 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2155 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2156 | FieldLines SimpleFieldList Newlines ${
2161 SimpleFieldList -> Field ${ $0 = $<F; }$
2162 | SimpleFieldList ; Field ${
2166 | SimpleFieldList ; ${
2169 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2171 Field -> IDENTIFIER : Type = Expression ${ {
2174 $0 = calloc(1, sizeof(struct fieldlist));
2175 $0->f.name = $1.txt;
2180 propagate_types($<5, c, &ok, $3, 0);
2183 c->parse_error = 1; // UNTESTED
2185 struct value vl = interp_exec(c, $5, NULL);
2186 $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
2189 | IDENTIFIER : Type ${
2190 $0 = calloc(1, sizeof(struct fieldlist));
2191 $0->f.name = $1.txt;
2193 if ($0->f.type->prepare_type)
2194 $0->f.type->prepare_type(c, $0->f.type, 1);
2197 ###### forward decls
2198 static void structure_print_type(struct type *t, FILE *f);
2200 ###### value functions
2201 static void structure_print_type(struct type *t, FILE *f) // UNTESTED
2205 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2207 for (i = 0; i < t->structure.nfields; i++) {
2208 struct field *fl = t->structure.fields + i;
2209 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2210 type_print(fl->type, f);
2211 if (fl->type->print && fl->init) {
2213 if (fl->type == Tstr)
2214 fprintf(f, "\""); // UNTESTED
2215 print_value(fl->type, fl->init);
2216 if (fl->type == Tstr)
2217 fprintf(f, "\""); // UNTESTED
2223 ###### print type decls
2225 struct type *t; // UNTESTED
2228 while (target != 0) {
2230 for (t = context.typelist; t ; t=t->next)
2231 if (t->print_type_decl) {
2240 t->print_type_decl(t, stdout);
2248 A function is a named chunk of code which can be passed parameters and
2249 can return results. Each function has an implicit type which includes
2250 the set of parameters and the return value. As yet these types cannot
2251 be declared separate from the function itself.
2253 In fact, only one function is currently possible - `main`. `main` is
2254 passed an array of strings together with the size of the array, and
2255 doesn't return anything. The strings are command line arguments.
2257 The parameters can be specified either in parentheses as a list, such as
2259 ##### Example: function 1
2261 func main(av:[ac::number]string)
2264 or as an indented list of one parameter per line
2266 ##### Example: function 2
2269 argv:[argc::number]string
2281 MainFunction -> func main ( OpenScope Args ) Block Newlines ${
2284 $0->left = reorder_bilist($<Ar);
2286 var_block_close(c, CloseSequential);
2287 if (c->scope_stack && !c->parse_error) abort();
2289 | func main IN OpenScope OptNL Args OUT OptNL do Block Newlines ${
2292 $0->left = reorder_bilist($<Ar);
2294 var_block_close(c, CloseSequential);
2295 if (c->scope_stack && !c->parse_error) abort();
2297 | func main NEWLINE OpenScope OptNL do Block Newlines ${
2302 var_block_close(c, CloseSequential);
2303 if (c->scope_stack && !c->parse_error) abort();
2306 Args -> ${ $0 = NULL; }$
2307 | Varlist ${ $0 = $<1; }$
2308 | Varlist ; ${ $0 = $<1; }$
2309 | Varlist NEWLINE ${ $0 = $<1; }$
2311 Varlist -> Varlist ; ArgDecl ${ // UNTESTED
2325 ArgDecl -> IDENTIFIER : FormalType ${ {
2326 struct variable *v = var_decl(c, $1.txt);
2332 ## Executables: the elements of code
2334 Each code element needs to be parsed, printed, analysed,
2335 interpreted, and freed. There are several, so let's just start with
2336 the easy ones and work our way up.
2340 We have already met values as separate objects. When manifest
2341 constants appear in the program text, that must result in an executable
2342 which has a constant value. So the `val` structure embeds a value in
2355 ###### ast functions
2356 struct val *new_val(struct type *T, struct token tk)
2358 struct val *v = new_pos(val, tk);
2369 $0 = new_val(Tbool, $1);
2373 $0 = new_val(Tbool, $1);
2377 $0 = new_val(Tnum, $1);
2380 if (number_parse($0->val.num, tail, $1.txt) == 0)
2381 mpq_init($0->val.num); // UNTESTED
2383 tok_err(c, "error: unsupported number suffix",
2388 $0 = new_val(Tstr, $1);
2391 string_parse(&$1, '\\', &$0->val.str, tail);
2393 tok_err(c, "error: unsupported string suffix",
2398 $0 = new_val(Tstr, $1);
2401 string_parse(&$1, '\\', &$0->val.str, tail);
2403 tok_err(c, "error: unsupported string suffix",
2408 ###### print exec cases
2411 struct val *v = cast(val, e);
2412 if (v->vtype == Tstr)
2414 print_value(v->vtype, &v->val);
2415 if (v->vtype == Tstr)
2420 ###### propagate exec cases
2423 struct val *val = cast(val, prog);
2424 if (!type_compat(type, val->vtype, rules))
2425 type_err(c, "error: expected %1%r found %2",
2426 prog, type, rules, val->vtype);
2430 ###### interp exec cases
2432 rvtype = cast(val, e)->vtype;
2433 dup_value(rvtype, &cast(val, e)->val, &rv);
2436 ###### ast functions
2437 static void free_val(struct val *v)
2440 free_value(v->vtype, &v->val);
2444 ###### free exec cases
2445 case Xval: free_val(cast(val, e)); break;
2447 ###### ast functions
2448 // Move all nodes from 'b' to 'rv', reversing their order.
2449 // In 'b' 'left' is a list, and 'right' is the last node.
2450 // In 'rv', left' is the first node and 'right' is a list.
2451 static struct binode *reorder_bilist(struct binode *b)
2453 struct binode *rv = NULL;
2456 struct exec *t = b->right;
2460 b = cast(binode, b->left);
2470 Just as we used a `val` to wrap a value into an `exec`, we similarly
2471 need a `var` to wrap a `variable` into an exec. While each `val`
2472 contained a copy of the value, each `var` holds a link to the variable
2473 because it really is the same variable no matter where it appears.
2474 When a variable is used, we need to remember to follow the `->merged`
2475 link to find the primary instance.
2483 struct variable *var;
2491 VariableDecl -> IDENTIFIER : ${ {
2492 struct variable *v = var_decl(c, $1.txt);
2493 $0 = new_pos(var, $1);
2498 v = var_ref(c, $1.txt);
2500 type_err(c, "error: variable '%v' redeclared",
2502 type_err(c, "info: this is where '%v' was first declared",
2503 v->where_decl, NULL, 0, NULL);
2506 | IDENTIFIER :: ${ {
2507 struct variable *v = var_decl(c, $1.txt);
2508 $0 = new_pos(var, $1);
2514 v = var_ref(c, $1.txt);
2516 type_err(c, "error: variable '%v' redeclared",
2518 type_err(c, "info: this is where '%v' was first declared",
2519 v->where_decl, NULL, 0, NULL);
2522 | IDENTIFIER : Type ${ {
2523 struct variable *v = var_decl(c, $1.txt);
2524 $0 = new_pos(var, $1);
2531 v = var_ref(c, $1.txt);
2533 type_err(c, "error: variable '%v' redeclared",
2535 type_err(c, "info: this is where '%v' was first declared",
2536 v->where_decl, NULL, 0, NULL);
2539 | IDENTIFIER :: Type ${ {
2540 struct variable *v = var_decl(c, $1.txt);
2541 $0 = new_pos(var, $1);
2549 v = var_ref(c, $1.txt);
2551 type_err(c, "error: variable '%v' redeclared",
2553 type_err(c, "info: this is where '%v' was first declared",
2554 v->where_decl, NULL, 0, NULL);
2559 Variable -> IDENTIFIER ${ {
2560 struct variable *v = var_ref(c, $1.txt);
2561 $0 = new_pos(var, $1);
2563 /* This might be a label - allocate a var just in case */
2564 v = var_decl(c, $1.txt);
2571 cast(var, $0)->var = v;
2575 ###### print exec cases
2578 struct var *v = cast(var, e);
2580 struct binding *b = v->var->name;
2581 printf("%.*s", b->name.len, b->name.txt);
2588 if (loc && loc->type == Xvar) {
2589 struct var *v = cast(var, loc);
2591 struct binding *b = v->var->name;
2592 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2594 fputs("???", stderr); // NOTEST
2596 fputs("NOTVAR", stderr); // NOTEST
2599 ###### propagate exec cases
2603 struct var *var = cast(var, prog);
2604 struct variable *v = var->var;
2606 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2607 return Tnone; // NOTEST
2610 if (v->constant && (rules & Rnoconstant)) {
2611 type_err(c, "error: Cannot assign to a constant: %v",
2612 prog, NULL, 0, NULL);
2613 type_err(c, "info: name was defined as a constant here",
2614 v->where_decl, NULL, 0, NULL);
2617 if (v->type == Tnone && v->where_decl == prog)
2618 type_err(c, "error: variable used but not declared: %v",
2619 prog, NULL, 0, NULL);
2620 if (v->type == NULL) {
2621 if (type && *ok != 0) {
2623 v->where_set = prog;
2628 if (!type_compat(type, v->type, rules)) {
2629 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2630 type, rules, v->type);
2631 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2632 v->type, rules, NULL);
2639 ###### interp exec cases
2642 struct var *var = cast(var, e);
2643 struct variable *v = var->var;
2646 lrv = var_value(c, v);
2651 ###### ast functions
2653 static void free_var(struct var *v)
2658 ###### free exec cases
2659 case Xvar: free_var(cast(var, e)); break;
2661 ### Expressions: Conditional
2663 Our first user of the `binode` will be conditional expressions, which
2664 is a bit odd as they actually have three components. That will be
2665 handled by having 2 binodes for each expression. The conditional
2666 expression is the lowest precedence operator which is why we define it
2667 first - to start the precedence list.
2669 Conditional expressions are of the form "value `if` condition `else`
2670 other_value". They associate to the right, so everything to the right
2671 of `else` is part of an else value, while only a higher-precedence to
2672 the left of `if` is the if values. Between `if` and `else` there is no
2673 room for ambiguity, so a full conditional expression is allowed in
2685 Expression -> Expression if Expression else Expression $$ifelse ${ {
2686 struct binode *b1 = new(binode);
2687 struct binode *b2 = new(binode);
2696 ## expression grammar
2698 ###### print binode cases
2701 b2 = cast(binode, b->right);
2702 if (bracket) printf("(");
2703 print_exec(b2->left, -1, bracket);
2705 print_exec(b->left, -1, bracket);
2707 print_exec(b2->right, -1, bracket);
2708 if (bracket) printf(")");
2711 ###### propagate binode cases
2714 /* cond must be Tbool, others must match */
2715 struct binode *b2 = cast(binode, b->right);
2718 propagate_types(b->left, c, ok, Tbool, 0);
2719 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2720 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2724 ###### interp binode cases
2727 struct binode *b2 = cast(binode, b->right);
2728 left = interp_exec(c, b->left, <ype);
2730 rv = interp_exec(c, b2->left, &rvtype); // UNTESTED
2732 rv = interp_exec(c, b2->right, &rvtype);
2736 ### Expressions: Boolean
2738 The next class of expressions to use the `binode` will be Boolean
2739 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2740 have same corresponding precendence. The difference is that they don't
2741 evaluate the second expression if not necessary.
2750 ###### expr precedence
2755 ###### expression grammar
2756 | Expression or Expression ${ {
2757 struct binode *b = new(binode);
2763 | Expression or else Expression ${ {
2764 struct binode *b = new(binode);
2771 | Expression and Expression ${ {
2772 struct binode *b = new(binode);
2778 | Expression and then Expression ${ {
2779 struct binode *b = new(binode);
2786 | not Expression ${ {
2787 struct binode *b = new(binode);
2793 ###### print binode cases
2795 if (bracket) printf("(");
2796 print_exec(b->left, -1, bracket);
2798 print_exec(b->right, -1, bracket);
2799 if (bracket) printf(")");
2802 if (bracket) printf("(");
2803 print_exec(b->left, -1, bracket);
2804 printf(" and then ");
2805 print_exec(b->right, -1, bracket);
2806 if (bracket) printf(")");
2809 if (bracket) printf("(");
2810 print_exec(b->left, -1, bracket);
2812 print_exec(b->right, -1, bracket);
2813 if (bracket) printf(")");
2816 if (bracket) printf("(");
2817 print_exec(b->left, -1, bracket);
2818 printf(" or else ");
2819 print_exec(b->right, -1, bracket);
2820 if (bracket) printf(")");
2823 if (bracket) printf("(");
2825 print_exec(b->right, -1, bracket);
2826 if (bracket) printf(")");
2829 ###### propagate binode cases
2835 /* both must be Tbool, result is Tbool */
2836 propagate_types(b->left, c, ok, Tbool, 0);
2837 propagate_types(b->right, c, ok, Tbool, 0);
2838 if (type && type != Tbool)
2839 type_err(c, "error: %1 operation found where %2 expected", prog,
2843 ###### interp binode cases
2845 rv = interp_exec(c, b->left, &rvtype);
2846 right = interp_exec(c, b->right, &rtype);
2847 rv.bool = rv.bool && right.bool;
2850 rv = interp_exec(c, b->left, &rvtype);
2852 rv = interp_exec(c, b->right, NULL);
2855 rv = interp_exec(c, b->left, &rvtype);
2856 right = interp_exec(c, b->right, &rtype);
2857 rv.bool = rv.bool || right.bool;
2860 rv = interp_exec(c, b->left, &rvtype);
2862 rv = interp_exec(c, b->right, NULL);
2865 rv = interp_exec(c, b->right, &rvtype);
2869 ### Expressions: Comparison
2871 Of slightly higher precedence that Boolean expressions are Comparisons.
2872 A comparison takes arguments of any comparable type, but the two types
2875 To simplify the parsing we introduce an `eop` which can record an
2876 expression operator, and the `CMPop` non-terminal will match one of them.
2883 ###### ast functions
2884 static void free_eop(struct eop *e)
2898 ###### expr precedence
2899 $LEFT < > <= >= == != CMPop
2901 ###### expression grammar
2902 | Expression CMPop Expression ${ {
2903 struct binode *b = new(binode);
2913 CMPop -> < ${ $0.op = Less; }$
2914 | > ${ $0.op = Gtr; }$
2915 | <= ${ $0.op = LessEq; }$
2916 | >= ${ $0.op = GtrEq; }$
2917 | == ${ $0.op = Eql; }$
2918 | != ${ $0.op = NEql; }$
2920 ###### print binode cases
2928 if (bracket) printf("(");
2929 print_exec(b->left, -1, bracket);
2931 case Less: printf(" < "); break;
2932 case LessEq: printf(" <= "); break;
2933 case Gtr: printf(" > "); break;
2934 case GtrEq: printf(" >= "); break;
2935 case Eql: printf(" == "); break;
2936 case NEql: printf(" != "); break;
2937 default: abort(); // NOTEST
2939 print_exec(b->right, -1, bracket);
2940 if (bracket) printf(")");
2943 ###### propagate binode cases
2950 /* Both must match but not be labels, result is Tbool */
2951 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2953 propagate_types(b->right, c, ok, t, 0);
2955 t = propagate_types(b->right, c, ok, NULL, Rnolabel); // UNTESTED
2957 t = propagate_types(b->left, c, ok, t, 0); // UNTESTED
2959 if (!type_compat(type, Tbool, 0))
2960 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2961 Tbool, rules, type);
2964 ###### interp binode cases
2973 left = interp_exec(c, b->left, <ype);
2974 right = interp_exec(c, b->right, &rtype);
2975 cmp = value_cmp(ltype, rtype, &left, &right);
2978 case Less: rv.bool = cmp < 0; break;
2979 case LessEq: rv.bool = cmp <= 0; break;
2980 case Gtr: rv.bool = cmp > 0; break;
2981 case GtrEq: rv.bool = cmp >= 0; break;
2982 case Eql: rv.bool = cmp == 0; break;
2983 case NEql: rv.bool = cmp != 0; break;
2984 default: rv.bool = 0; break; // NOTEST
2989 ### Expressions: The rest
2991 The remaining expressions with the highest precedence are arithmetic,
2992 string concatenation, and string conversion. String concatenation
2993 (`++`) has the same precedence as multiplication and division, but lower
2996 String conversion is a temporary feature until I get a better type
2997 system. `$` is a prefix operator which expects a string and returns
3000 `+` and `-` are both infix and prefix operations (where they are
3001 absolute value and negation). These have different operator names.
3003 We also have a 'Bracket' operator which records where parentheses were
3004 found. This makes it easy to reproduce these when printing. Possibly I
3005 should only insert brackets were needed for precedence.
3015 ###### expr precedence
3021 ###### expression grammar
3022 | Expression Eop Expression ${ {
3023 struct binode *b = new(binode);
3030 | Expression Top Expression ${ {
3031 struct binode *b = new(binode);
3038 | ( Expression ) ${ {
3039 struct binode *b = new_pos(binode, $1);
3044 | Uop Expression ${ {
3045 struct binode *b = new(binode);
3050 | Value ${ $0 = $<1; }$
3051 | Variable ${ $0 = $<1; }$
3054 Eop -> + ${ $0.op = Plus; }$
3055 | - ${ $0.op = Minus; }$
3057 Uop -> + ${ $0.op = Absolute; }$
3058 | - ${ $0.op = Negate; }$
3059 | $ ${ $0.op = StringConv; }$
3061 Top -> * ${ $0.op = Times; }$
3062 | / ${ $0.op = Divide; }$
3063 | % ${ $0.op = Rem; }$
3064 | ++ ${ $0.op = Concat; }$
3066 ###### print binode cases
3073 if (bracket) printf("(");
3074 print_exec(b->left, indent, bracket);
3076 case Plus: fputs(" + ", stdout); break;
3077 case Minus: fputs(" - ", stdout); break;
3078 case Times: fputs(" * ", stdout); break;
3079 case Divide: fputs(" / ", stdout); break;
3080 case Rem: fputs(" % ", stdout); break;
3081 case Concat: fputs(" ++ ", stdout); break;
3082 default: abort(); // NOTEST
3084 print_exec(b->right, indent, bracket);
3085 if (bracket) printf(")");
3090 if (bracket) printf("(");
3092 case Absolute: fputs("+", stdout); break;
3093 case Negate: fputs("-", stdout); break;
3094 case StringConv: fputs("$", stdout); break;
3095 default: abort(); // NOTEST
3097 print_exec(b->right, indent, bracket);
3098 if (bracket) printf(")");
3102 print_exec(b->right, indent, bracket);
3106 ###### propagate binode cases
3112 /* both must be numbers, result is Tnum */
3115 /* as propagate_types ignores a NULL,
3116 * unary ops fit here too */
3117 propagate_types(b->left, c, ok, Tnum, 0);
3118 propagate_types(b->right, c, ok, Tnum, 0);
3119 if (!type_compat(type, Tnum, 0))
3120 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
3125 /* both must be Tstr, result is Tstr */
3126 propagate_types(b->left, c, ok, Tstr, 0);
3127 propagate_types(b->right, c, ok, Tstr, 0);
3128 if (!type_compat(type, Tstr, 0))
3129 type_err(c, "error: Concat returns %1 but %2 expected", prog,
3134 /* op must be string, result is number */
3135 propagate_types(b->left, c, ok, Tstr, 0);
3136 if (!type_compat(type, Tnum, 0))
3137 type_err(c, // UNTESTED
3138 "error: Can only convert string to number, not %1",
3139 prog, type, 0, NULL);
3143 return propagate_types(b->right, c, ok, type, 0);
3145 ###### interp binode cases
3148 rv = interp_exec(c, b->left, &rvtype);
3149 right = interp_exec(c, b->right, &rtype);
3150 mpq_add(rv.num, rv.num, right.num);
3153 rv = interp_exec(c, b->left, &rvtype);
3154 right = interp_exec(c, b->right, &rtype);
3155 mpq_sub(rv.num, rv.num, right.num);
3158 rv = interp_exec(c, b->left, &rvtype);
3159 right = interp_exec(c, b->right, &rtype);
3160 mpq_mul(rv.num, rv.num, right.num);
3163 rv = interp_exec(c, b->left, &rvtype);
3164 right = interp_exec(c, b->right, &rtype);
3165 mpq_div(rv.num, rv.num, right.num);
3170 left = interp_exec(c, b->left, <ype);
3171 right = interp_exec(c, b->right, &rtype);
3172 mpz_init(l); mpz_init(r); mpz_init(rem);
3173 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
3174 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
3175 mpz_tdiv_r(rem, l, r);
3176 val_init(Tnum, &rv);
3177 mpq_set_z(rv.num, rem);
3178 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
3183 rv = interp_exec(c, b->right, &rvtype);
3184 mpq_neg(rv.num, rv.num);
3187 rv = interp_exec(c, b->right, &rvtype);
3188 mpq_abs(rv.num, rv.num);
3191 rv = interp_exec(c, b->right, &rvtype);
3194 left = interp_exec(c, b->left, <ype);
3195 right = interp_exec(c, b->right, &rtype);
3197 rv.str = text_join(left.str, right.str);
3200 right = interp_exec(c, b->right, &rvtype);
3204 struct text tx = right.str;
3207 if (tx.txt[0] == '-') {
3208 neg = 1; // UNTESTED
3209 tx.txt++; // UNTESTED
3210 tx.len--; // UNTESTED
3212 if (number_parse(rv.num, tail, tx) == 0)
3213 mpq_init(rv.num); // UNTESTED
3215 mpq_neg(rv.num, rv.num); // UNTESTED
3217 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt); // UNTESTED
3221 ###### value functions
3223 static struct text text_join(struct text a, struct text b)
3226 rv.len = a.len + b.len;
3227 rv.txt = malloc(rv.len);
3228 memcpy(rv.txt, a.txt, a.len);
3229 memcpy(rv.txt+a.len, b.txt, b.len);
3233 ### Blocks, Statements, and Statement lists.
3235 Now that we have expressions out of the way we need to turn to
3236 statements. There are simple statements and more complex statements.
3237 Simple statements do not contain (syntactic) newlines, complex statements do.
3239 Statements often come in sequences and we have corresponding simple
3240 statement lists and complex statement lists.
3241 The former comprise only simple statements separated by semicolons.
3242 The later comprise complex statements and simple statement lists. They are
3243 separated by newlines. Thus the semicolon is only used to separate
3244 simple statements on the one line. This may be overly restrictive,
3245 but I'm not sure I ever want a complex statement to share a line with
3248 Note that a simple statement list can still use multiple lines if
3249 subsequent lines are indented, so
3251 ###### Example: wrapped simple statement list
3256 is a single simple statement list. This might allow room for
3257 confusion, so I'm not set on it yet.
3259 A simple statement list needs no extra syntax. A complex statement
3260 list has two syntactic forms. It can be enclosed in braces (much like
3261 C blocks), or it can be introduced by an indent and continue until an
3262 unindented newline (much like Python blocks). With this extra syntax
3263 it is referred to as a block.
3265 Note that a block does not have to include any newlines if it only
3266 contains simple statements. So both of:
3268 if condition: a=b; d=f
3270 if condition { a=b; print f }
3274 In either case the list is constructed from a `binode` list with
3275 `Block` as the operator. When parsing the list it is most convenient
3276 to append to the end, so a list is a list and a statement. When using
3277 the list it is more convenient to consider a list to be a statement
3278 and a list. So we need a function to re-order a list.
3279 `reorder_bilist` serves this purpose.
3281 The only stand-alone statement we introduce at this stage is `pass`
3282 which does nothing and is represented as a `NULL` pointer in a `Block`
3283 list. Other stand-alone statements will follow once the infrastructure
3294 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3295 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3296 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3297 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3298 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3300 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3301 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3302 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3303 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3304 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3306 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3307 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3308 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3310 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3311 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3312 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3313 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3314 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3316 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3318 ComplexStatements -> ComplexStatements ComplexStatement ${
3328 | ComplexStatement ${
3340 ComplexStatement -> SimpleStatements Newlines ${
3341 $0 = reorder_bilist($<SS);
3343 | SimpleStatements ; Newlines ${
3344 $0 = reorder_bilist($<SS);
3346 ## ComplexStatement Grammar
3349 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3355 | SimpleStatement ${
3363 SimpleStatement -> pass ${ $0 = NULL; }$
3364 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3365 ## SimpleStatement Grammar
3367 ###### print binode cases
3371 if (b->left == NULL) // UNTESTED
3372 printf("pass"); // UNTESTED
3374 print_exec(b->left, indent, bracket); // UNTESTED
3375 if (b->right) { // UNTESTED
3376 printf("; "); // UNTESTED
3377 print_exec(b->right, indent, bracket); // UNTESTED
3380 // block, one per line
3381 if (b->left == NULL)
3382 do_indent(indent, "pass\n");
3384 print_exec(b->left, indent, bracket);
3386 print_exec(b->right, indent, bracket);
3390 ###### propagate binode cases
3393 /* If any statement returns something other than Tnone
3394 * or Tbool then all such must return same type.
3395 * As each statement may be Tnone or something else,
3396 * we must always pass NULL (unknown) down, otherwise an incorrect
3397 * error might occur. We never return Tnone unless it is
3402 for (e = b; e; e = cast(binode, e->right)) {
3403 t = propagate_types(e->left, c, ok, NULL, rules);
3404 if ((rules & Rboolok) && t == Tbool)
3406 if (t && t != Tnone && t != Tbool) {
3410 type_err(c, "error: expected %1%r, found %2",
3411 e->left, type, rules, t);
3417 ###### interp binode cases
3419 while (rvtype == Tnone &&
3422 rv = interp_exec(c, b->left, &rvtype);
3423 b = cast(binode, b->right);
3427 ### The Print statement
3429 `print` is a simple statement that takes a comma-separated list of
3430 expressions and prints the values separated by spaces and terminated
3431 by a newline. No control of formatting is possible.
3433 `print` faces the same list-ordering issue as blocks, and uses the
3439 ##### expr precedence
3442 ###### SimpleStatement Grammar
3444 | print ExpressionList ${
3445 $0 = reorder_bilist($<2);
3447 | print ExpressionList , ${
3452 $0 = reorder_bilist($0);
3463 ExpressionList -> ExpressionList , Expression ${
3476 ###### print binode cases
3479 do_indent(indent, "print");
3483 print_exec(b->left, -1, bracket);
3487 b = cast(binode, b->right);
3493 ###### propagate binode cases
3496 /* don't care but all must be consistent */
3497 propagate_types(b->left, c, ok, NULL, Rnolabel);
3498 propagate_types(b->right, c, ok, NULL, Rnolabel);
3501 ###### interp binode cases
3507 for ( ; b; b = cast(binode, b->right))
3511 left = interp_exec(c, b->left, <ype);
3512 print_value(ltype, &left);
3513 free_value(ltype, &left);
3524 ###### Assignment statement
3526 An assignment will assign a value to a variable, providing it hasn't
3527 been declared as a constant. The analysis phase ensures that the type
3528 will be correct so the interpreter just needs to perform the
3529 calculation. There is a form of assignment which declares a new
3530 variable as well as assigning a value. If a name is assigned before
3531 it is declared, and error will be raised as the name is created as
3532 `Tlabel` and it is illegal to assign to such names.
3538 ###### declare terminals
3541 ###### SimpleStatement Grammar
3542 | Variable = Expression ${
3548 | VariableDecl = Expression ${
3556 if ($1->var->where_set == NULL) {
3558 "Variable declared with no type or value: %v",
3568 ###### print binode cases
3571 do_indent(indent, "");
3572 print_exec(b->left, indent, bracket);
3574 print_exec(b->right, indent, bracket);
3581 struct variable *v = cast(var, b->left)->var;
3582 do_indent(indent, "");
3583 print_exec(b->left, indent, bracket);
3584 if (cast(var, b->left)->var->constant) {
3585 if (v->where_decl == v->where_set) {
3587 type_print(v->type, stdout);
3592 if (v->where_decl == v->where_set) {
3594 type_print(v->type, stdout);
3601 print_exec(b->right, indent, bracket);
3608 ###### propagate binode cases
3612 /* Both must match and not be labels,
3613 * Type must support 'dup',
3614 * For Assign, left must not be constant.
3617 t = propagate_types(b->left, c, ok, NULL,
3618 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3623 if (propagate_types(b->right, c, ok, t, 0) != t)
3624 if (b->left->type == Xvar)
3625 type_err(c, "info: variable '%v' was set as %1 here.",
3626 cast(var, b->left)->var->where_set, t, rules, NULL);
3628 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3630 propagate_types(b->left, c, ok, t,
3631 (b->op == Assign ? Rnoconstant : 0));
3633 if (t && t->dup == NULL)
3634 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3639 ###### interp binode cases
3642 lleft = linterp_exec(c, b->left, <ype);
3643 right = interp_exec(c, b->right, &rtype);
3645 free_value(ltype, lleft);
3646 dup_value(ltype, &right, lleft);
3653 struct variable *v = cast(var, b->left)->var;
3656 val = var_value(c, v);
3657 free_value(v->type, val);
3658 if (v->type->prepare_type)
3659 v->type->prepare_type(c, v->type, 0);
3661 right = interp_exec(c, b->right, &rtype);
3662 memcpy(val, &right, rtype->size);
3665 val_init(v->type, val);
3670 ### The `use` statement
3672 The `use` statement is the last "simple" statement. It is needed when
3673 the condition in a conditional statement is a block. `use` works much
3674 like `return` in C, but only completes the `condition`, not the whole
3680 ###### expr precedence
3683 ###### SimpleStatement Grammar
3685 $0 = new_pos(binode, $1);
3688 if ($0->right->type == Xvar) {
3689 struct var *v = cast(var, $0->right);
3690 if (v->var->type == Tnone) {
3691 /* Convert this to a label */
3694 v->var->type = Tlabel;
3695 val = global_alloc(c, Tlabel, v->var, NULL);
3701 ###### print binode cases
3704 do_indent(indent, "use ");
3705 print_exec(b->right, -1, bracket);
3710 ###### propagate binode cases
3713 /* result matches value */
3714 return propagate_types(b->right, c, ok, type, 0);
3716 ###### interp binode cases
3719 rv = interp_exec(c, b->right, &rvtype);
3722 ### The Conditional Statement
3724 This is the biggy and currently the only complex statement. This
3725 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3726 It is comprised of a number of parts, all of which are optional though
3727 set combinations apply. Each part is (usually) a key word (`then` is
3728 sometimes optional) followed by either an expression or a code block,
3729 except the `casepart` which is a "key word and an expression" followed
3730 by a code block. The code-block option is valid for all parts and,
3731 where an expression is also allowed, the code block can use the `use`
3732 statement to report a value. If the code block does not report a value
3733 the effect is similar to reporting `True`.
3735 The `else` and `case` parts, as well as `then` when combined with
3736 `if`, can contain a `use` statement which will apply to some
3737 containing conditional statement. `for` parts, `do` parts and `then`
3738 parts used with `for` can never contain a `use`, except in some
3739 subordinate conditional statement.
3741 If there is a `forpart`, it is executed first, only once.
3742 If there is a `dopart`, then it is executed repeatedly providing
3743 always that the `condpart` or `cond`, if present, does not return a non-True
3744 value. `condpart` can fail to return any value if it simply executes
3745 to completion. This is treated the same as returning `True`.
3747 If there is a `thenpart` it will be executed whenever the `condpart`
3748 or `cond` returns True (or does not return any value), but this will happen
3749 *after* `dopart` (when present).
3751 If `elsepart` is present it will be executed at most once when the
3752 condition returns `False` or some value that isn't `True` and isn't
3753 matched by any `casepart`. If there are any `casepart`s, they will be
3754 executed when the condition returns a matching value.
3756 The particular sorts of values allowed in case parts has not yet been
3757 determined in the language design, so nothing is prohibited.
3759 The various blocks in this complex statement potentially provide scope
3760 for variables as described earlier. Each such block must include the
3761 "OpenScope" nonterminal before parsing the block, and must call
3762 `var_block_close()` when closing the block.
3764 The code following "`if`", "`switch`" and "`for`" does not get its own
3765 scope, but is in a scope covering the whole statement, so names
3766 declared there cannot be redeclared elsewhere. Similarly the
3767 condition following "`while`" is in a scope the covers the body
3768 ("`do`" part) of the loop, and which does not allow conditional scope
3769 extension. Code following "`then`" (both looping and non-looping),
3770 "`else`" and "`case`" each get their own local scope.
3772 The type requirements on the code block in a `whilepart` are quite
3773 unusal. It is allowed to return a value of some identifiable type, in
3774 which case the loop aborts and an appropriate `casepart` is run, or it
3775 can return a Boolean, in which case the loop either continues to the
3776 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3777 This is different both from the `ifpart` code block which is expected to
3778 return a Boolean, or the `switchpart` code block which is expected to
3779 return the same type as the casepart values. The correct analysis of
3780 the type of the `whilepart` code block is the reason for the
3781 `Rboolok` flag which is passed to `propagate_types()`.
3783 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3784 defined. As there are two scopes which cover multiple parts - one for
3785 the whole statement and one for "while" and "do" - and as we will use
3786 the 'struct exec' to track scopes, we actually need two new types of
3787 exec. One is a `binode` for the looping part, the rest is the
3788 `cond_statement`. The `cond_statement` will use an auxilliary `struct
3789 casepart` to track a list of case parts.
3800 struct exec *action;
3801 struct casepart *next;
3803 struct cond_statement {
3805 struct exec *forpart, *condpart, *thenpart, *elsepart;
3806 struct binode *looppart;
3807 struct casepart *casepart;
3810 ###### ast functions
3812 static void free_casepart(struct casepart *cp)
3816 free_exec(cp->value);
3817 free_exec(cp->action);
3824 static void free_cond_statement(struct cond_statement *s)
3828 free_exec(s->forpart);
3829 free_exec(s->condpart);
3830 free_exec(s->looppart);
3831 free_exec(s->thenpart);
3832 free_exec(s->elsepart);
3833 free_casepart(s->casepart);
3837 ###### free exec cases
3838 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3840 ###### ComplexStatement Grammar
3841 | CondStatement ${ $0 = $<1; }$
3843 ###### expr precedence
3844 $TERM for then while do
3851 // A CondStatement must end with EOL, as does CondSuffix and
3853 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3854 // may or may not end with EOL
3855 // WhilePart and IfPart include an appropriate Suffix
3857 // ForPart, SwitchPart, and IfPart open scopes, o we have to close
3858 // them. WhilePart opens and closes its own scope.
3859 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3862 $0->thenpart = $<TP;
3863 $0->looppart = $<WP;
3864 var_block_close(c, CloseSequential);
3866 | ForPart OptNL WhilePart CondSuffix ${
3869 $0->looppart = $<WP;
3870 var_block_close(c, CloseSequential);
3872 | WhilePart CondSuffix ${
3874 $0->looppart = $<WP;
3876 | SwitchPart OptNL CasePart CondSuffix ${
3878 $0->condpart = $<SP;
3879 $CP->next = $0->casepart;
3880 $0->casepart = $<CP;
3881 var_block_close(c, CloseSequential);
3883 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3885 $0->condpart = $<SP;
3886 $CP->next = $0->casepart;
3887 $0->casepart = $<CP;
3888 var_block_close(c, CloseSequential);
3890 | IfPart IfSuffix ${
3892 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3893 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3894 // This is where we close an "if" statement
3895 var_block_close(c, CloseSequential);
3898 CondSuffix -> IfSuffix ${
3901 | Newlines CasePart CondSuffix ${
3903 $CP->next = $0->casepart;
3904 $0->casepart = $<CP;
3906 | CasePart CondSuffix ${
3908 $CP->next = $0->casepart;
3909 $0->casepart = $<CP;
3912 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3913 | Newlines ElsePart ${ $0 = $<EP; }$
3914 | ElsePart ${$0 = $<EP; }$
3916 ElsePart -> else OpenBlock Newlines ${
3917 $0 = new(cond_statement);
3918 $0->elsepart = $<OB;
3919 var_block_close(c, CloseElse);
3921 | else OpenScope CondStatement ${
3922 $0 = new(cond_statement);
3923 $0->elsepart = $<CS;
3924 var_block_close(c, CloseElse);
3928 CasePart -> case Expression OpenScope ColonBlock ${
3929 $0 = calloc(1,sizeof(struct casepart));
3932 var_block_close(c, CloseParallel);
3936 // These scopes are closed in CondStatement
3937 ForPart -> for OpenBlock ${
3941 ThenPart -> then OpenBlock ${
3943 var_block_close(c, CloseSequential);
3947 // This scope is closed in CondStatement
3948 WhilePart -> while UseBlock OptNL do OpenBlock ${
3953 var_block_close(c, CloseSequential);
3954 var_block_close(c, CloseSequential);
3956 | while OpenScope Expression OpenScope ColonBlock ${
3961 var_block_close(c, CloseSequential);
3962 var_block_close(c, CloseSequential);
3966 IfPart -> if UseBlock OptNL then OpenBlock ${
3969 var_block_close(c, CloseParallel);
3971 | if OpenScope Expression OpenScope ColonBlock ${
3974 var_block_close(c, CloseParallel);
3976 | if OpenScope Expression OpenScope OptNL then Block ${
3979 var_block_close(c, CloseParallel);
3983 // This scope is closed in CondStatement
3984 SwitchPart -> switch OpenScope Expression ${
3987 | switch UseBlock ${
3991 ###### print binode cases
3993 if (b->left && b->left->type == Xbinode &&
3994 cast(binode, b->left)->op == Block) {
3996 do_indent(indent, "while {\n");
3998 do_indent(indent, "while\n");
3999 print_exec(b->left, indent+1, bracket);
4001 do_indent(indent, "} do {\n");
4003 do_indent(indent, "do\n");
4004 print_exec(b->right, indent+1, bracket);
4006 do_indent(indent, "}\n");
4008 do_indent(indent, "while ");
4009 print_exec(b->left, 0, bracket);
4014 print_exec(b->right, indent+1, bracket);
4016 do_indent(indent, "}\n");
4020 ###### print exec cases
4022 case Xcond_statement:
4024 struct cond_statement *cs = cast(cond_statement, e);
4025 struct casepart *cp;
4027 do_indent(indent, "for");
4028 if (bracket) printf(" {\n"); else printf("\n");
4029 print_exec(cs->forpart, indent+1, bracket);
4032 do_indent(indent, "} then {\n");
4034 do_indent(indent, "then\n");
4035 print_exec(cs->thenpart, indent+1, bracket);
4037 if (bracket) do_indent(indent, "}\n");
4040 print_exec(cs->looppart, indent, bracket);
4044 do_indent(indent, "switch");
4046 do_indent(indent, "if");
4047 if (cs->condpart && cs->condpart->type == Xbinode &&
4048 cast(binode, cs->condpart)->op == Block) {
4053 print_exec(cs->condpart, indent+1, bracket);
4055 do_indent(indent, "}\n");
4057 do_indent(indent, "then\n");
4058 print_exec(cs->thenpart, indent+1, bracket);
4062 print_exec(cs->condpart, 0, bracket);
4068 print_exec(cs->thenpart, indent+1, bracket);
4070 do_indent(indent, "}\n");
4075 for (cp = cs->casepart; cp; cp = cp->next) {
4076 do_indent(indent, "case ");
4077 print_exec(cp->value, -1, 0);
4082 print_exec(cp->action, indent+1, bracket);
4084 do_indent(indent, "}\n");
4087 do_indent(indent, "else");
4092 print_exec(cs->elsepart, indent+1, bracket);
4094 do_indent(indent, "}\n");
4099 ###### propagate binode cases
4101 t = propagate_types(b->right, c, ok, Tnone, 0);
4102 if (!type_compat(Tnone, t, 0))
4103 *ok = 0; // UNTESTED
4104 return propagate_types(b->left, c, ok, type, rules);
4106 ###### propagate exec cases
4107 case Xcond_statement:
4109 // forpart and looppart->right must return Tnone
4110 // thenpart must return Tnone if there is a loopart,
4111 // otherwise it is like elsepart.
4113 // be bool if there is no casepart
4114 // match casepart->values if there is a switchpart
4115 // either be bool or match casepart->value if there
4117 // elsepart and casepart->action must match the return type
4118 // expected of this statement.
4119 struct cond_statement *cs = cast(cond_statement, prog);
4120 struct casepart *cp;
4122 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
4123 if (!type_compat(Tnone, t, 0))
4124 *ok = 0; // UNTESTED
4127 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
4128 if (!type_compat(Tnone, t, 0))
4129 *ok = 0; // UNTESTED
4131 if (cs->casepart == NULL) {
4132 propagate_types(cs->condpart, c, ok, Tbool, 0);
4133 propagate_types(cs->looppart, c, ok, Tbool, 0);
4135 /* Condpart must match case values, with bool permitted */
4137 for (cp = cs->casepart;
4138 cp && !t; cp = cp->next)
4139 t = propagate_types(cp->value, c, ok, NULL, 0);
4140 if (!t && cs->condpart)
4141 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok); // UNTESTED
4142 if (!t && cs->looppart)
4143 t = propagate_types(cs->looppart, c, ok, NULL, Rboolok); // UNTESTED
4144 // Now we have a type (I hope) push it down
4146 for (cp = cs->casepart; cp; cp = cp->next)
4147 propagate_types(cp->value, c, ok, t, 0);
4148 propagate_types(cs->condpart, c, ok, t, Rboolok);
4149 propagate_types(cs->looppart, c, ok, t, Rboolok);
4152 // (if)then, else, and case parts must return expected type.
4153 if (!cs->looppart && !type)
4154 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
4156 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
4157 for (cp = cs->casepart;
4159 cp = cp->next) // UNTESTED
4160 type = propagate_types(cp->action, c, ok, NULL, rules); // UNTESTED
4163 propagate_types(cs->thenpart, c, ok, type, rules);
4164 propagate_types(cs->elsepart, c, ok, type, rules);
4165 for (cp = cs->casepart; cp ; cp = cp->next)
4166 propagate_types(cp->action, c, ok, type, rules);
4172 ###### interp binode cases
4174 // This just performs one iterration of the loop
4175 rv = interp_exec(c, b->left, &rvtype);
4176 if (rvtype == Tnone ||
4177 (rvtype == Tbool && rv.bool != 0))
4178 // cnd is Tnone or Tbool, doesn't need to be freed
4179 interp_exec(c, b->right, NULL);
4182 ###### interp exec cases
4183 case Xcond_statement:
4185 struct value v, cnd;
4186 struct type *vtype, *cndtype;
4187 struct casepart *cp;
4188 struct cond_statement *cs = cast(cond_statement, e);
4191 interp_exec(c, cs->forpart, NULL);
4193 while ((cnd = interp_exec(c, cs->looppart, &cndtype)),
4194 cndtype == Tnone || (cndtype == Tbool && cnd.bool != 0))
4195 interp_exec(c, cs->thenpart, NULL);
4197 cnd = interp_exec(c, cs->condpart, &cndtype);
4198 if ((cndtype == Tnone ||
4199 (cndtype == Tbool && cnd.bool != 0))) {
4200 // cnd is Tnone or Tbool, doesn't need to be freed
4201 rv = interp_exec(c, cs->thenpart, &rvtype);
4202 // skip else (and cases)
4206 for (cp = cs->casepart; cp; cp = cp->next) {
4207 v = interp_exec(c, cp->value, &vtype);
4208 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
4209 free_value(vtype, &v);
4210 free_value(cndtype, &cnd);
4211 rv = interp_exec(c, cp->action, &rvtype);
4214 free_value(vtype, &v);
4216 free_value(cndtype, &cnd);
4218 rv = interp_exec(c, cs->elsepart, &rvtype);
4225 ### Top level structure
4227 All the language elements so far can be used in various places. Now
4228 it is time to clarify what those places are.
4230 At the top level of a file there will be a number of declarations.
4231 Many of the things that can be declared haven't been described yet,
4232 such as functions, procedures, imports, and probably more.
4233 For now there are two sorts of things that can appear at the top
4234 level. They are predefined constants, `struct` types, and the `main`
4235 function. While the syntax will allow the `main` function to appear
4236 multiple times, that will trigger an error if it is actually attempted.
4238 The various declarations do not return anything. They store the
4239 various declarations in the parse context.
4241 ###### Parser: grammar
4244 Ocean -> OptNL DeclarationList
4246 ## declare terminals
4253 DeclarationList -> Declaration
4254 | DeclarationList Declaration
4256 Declaration -> ERROR Newlines ${
4257 tok_err(c, // UNTESTED
4258 "error: unhandled parse error", &$1);
4264 ## top level grammar
4268 ### The `const` section
4270 As well as being defined in with the code that uses them, constants
4271 can be declared at the top level. These have full-file scope, so they
4272 are always `InScope`. The value of a top level constant can be given
4273 as an expression, and this is evaluated immediately rather than in the
4274 later interpretation stage. Once we add functions to the language, we
4275 will need rules concern which, if any, can be used to define a top
4278 Constants are defined in a section that starts with the reserved word
4279 `const` and then has a block with a list of assignment statements.
4280 For syntactic consistency, these must use the double-colon syntax to
4281 make it clear that they are constants. Type can also be given: if
4282 not, the type will be determined during analysis, as with other
4285 As the types constants are inserted at the head of a list, printing
4286 them in the same order that they were read is not straight forward.
4287 We take a quadratic approach here and count the number of constants
4288 (variables of depth 0), then count down from there, each time
4289 searching through for the Nth constant for decreasing N.
4291 ###### top level grammar
4295 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4296 | const { SimpleConstList } Newlines
4297 | const IN OptNL ConstList OUT Newlines
4298 | const SimpleConstList Newlines
4300 ConstList -> ConstList SimpleConstLine
4302 SimpleConstList -> SimpleConstList ; Const
4305 SimpleConstLine -> SimpleConstList Newlines
4306 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4309 CType -> Type ${ $0 = $<1; }$
4312 Const -> IDENTIFIER :: CType = Expression ${ {
4316 v = var_decl(c, $1.txt);
4318 struct var *var = new_pos(var, $1);
4319 v->where_decl = var;
4324 v = var_ref(c, $1.txt);
4325 tok_err(c, "error: name already declared", &$1);
4326 type_err(c, "info: this is where '%v' was first declared",
4327 v->where_decl, NULL, 0, NULL);
4331 propagate_types($5, c, &ok, $3, 0);
4336 struct value res = interp_exec(c, $5, &v->type);
4337 global_alloc(c, v->type, v, &res);
4341 ###### print const decls
4346 while (target != 0) {
4348 for (v = context.in_scope; v; v=v->in_scope)
4349 if (v->depth == 0) {
4360 struct value *val = var_value(&context, v);
4361 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4362 type_print(v->type, stdout);
4364 if (v->type == Tstr)
4366 print_value(v->type, val);
4367 if (v->type == Tstr)
4375 ### Finally the whole `main` function.
4377 An Ocean program can currently have only one function - `main` - and
4378 that must exist. It expects an array of strings with a provided size.
4379 Following this is a `block` which is the code to execute.
4381 As this is the top level, several things are handled a bit
4383 The function is not interpreted by `interp_exec` as that isn't
4384 passed the argument list which the program requires. Similarly type
4385 analysis is a bit more interesting at this level.
4387 ###### top level grammar
4389 DeclareFunction -> MainFunction ${ {
4391 type_err(c, "\"main\" defined a second time",
4397 ###### print binode cases
4400 do_indent(indent, "func main(");
4401 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4402 struct variable *v = cast(var, b2->left)->var;
4404 print_exec(b2->left, 0, 0);
4406 type_print(v->type, stdout);
4412 print_exec(b->right, indent+1, bracket);
4414 do_indent(indent, "}\n");
4417 ###### propagate binode cases
4419 case Func: abort(); // NOTEST
4421 ###### core functions
4423 static int analyse_prog(struct exec *prog, struct parse_context *c)
4425 struct binode *bp = cast(binode, prog);
4429 struct type *argv_type;
4430 struct text argv_type_name = { " argv", 5 };
4435 argv_type = add_type(c, argv_type_name, &array_prototype);
4436 argv_type->array.member = Tstr;
4437 argv_type->array.unspec = 1;
4439 for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
4443 propagate_types(b->left, c, &ok, argv_type, 0);
4445 default: /* invalid */ // NOTEST
4446 propagate_types(b->left, c, &ok, Tnone, 0); // NOTEST
4452 propagate_types(bp->right, c, &ok, Tnone, 0);
4457 /* Make sure everything is still consistent */
4458 propagate_types(bp->right, c, &ok, Tnone, 0);
4460 return 0; // UNTESTED
4465 static void interp_prog(struct parse_context *c, struct exec *prog,
4466 int argc, char **argv)
4468 struct binode *p = cast(binode, prog);
4476 al = cast(binode, p->left);
4478 struct var *v = cast(var, al->left);
4479 struct value *vl = var_value(c, v->var);
4489 mpq_set_ui(argcq, argc, 1);
4490 memcpy(var_value(c, t->array.vsize), &argcq, sizeof(argcq));
4491 t->prepare_type(c, t, 0);
4492 array_init(v->var->type, vl);
4493 for (i = 0; i < argc; i++) {
4494 struct value *vl2 = vl->array + i * v->var->type->array.member->size;
4497 arg.str.txt = argv[i];
4498 arg.str.len = strlen(argv[i]);
4499 free_value(Tstr, vl2);
4500 dup_value(Tstr, &arg, vl2);
4504 al = cast(binode, al->right);
4506 v = interp_exec(c, p, &vtype);
4507 free_value(vtype, &v);
4510 ###### interp binode cases
4511 case List: abort(); // NOTEST
4514 rv = interp_exec(c, b->right, &rvtype);
4517 ## And now to test it out.
4519 Having a language requires having a "hello world" program. I'll
4520 provide a little more than that: a program that prints "Hello world"
4521 finds the GCD of two numbers, prints the first few elements of
4522 Fibonacci, performs a binary search for a number, and a few other
4523 things which will likely grow as the languages grows.
4525 ###### File: oceani.mk
4528 @echo "===== DEMO ====="
4529 ./oceani --section "demo: hello" oceani.mdc 55 33
4535 four ::= 2 + 2 ; five ::= 10/2
4536 const pie ::= "I like Pie";
4537 cake ::= "The cake is"
4548 print "Hello World, what lovely oceans you have!"
4549 print "Are there", five, "?"
4550 print pi, pie, "but", cake
4552 A := $argv[1]; B := $argv[2]
4554 /* When a variable is defined in both branches of an 'if',
4555 * and used afterwards, the variables are merged.
4561 print "Is", A, "bigger than", B,"? ", bigger
4562 /* If a variable is not used after the 'if', no
4563 * merge happens, so types can be different
4566 double:string = "yes"
4567 print A, "is more than twice", B, "?", double
4570 print "double", B, "is", double
4575 if a > 0 and then b > 0:
4581 print "GCD of", A, "and", B,"is", a
4583 print a, "is not positive, cannot calculate GCD"
4585 print b, "is not positive, cannot calculate GCD"
4590 print "Fibonacci:", f1,f2,
4591 then togo = togo - 1
4599 /* Binary search... */
4604 mid := (lo + hi) / 2
4617 print "Yay, I found", target
4619 print "Closest I found was", lo
4624 // "middle square" PRNG. Not particularly good, but one my
4625 // Dad taught me - the first one I ever heard of.
4626 for i:=1; then i = i + 1; while i < size:
4627 n := list[i-1] * list[i-1]
4628 list[i] = (n / 100) % 10 000
4630 print "Before sort:",
4631 for i:=0; then i = i + 1; while i < size:
4635 for i := 1; then i=i+1; while i < size:
4636 for j:=i-1; then j=j-1; while j >= 0:
4637 if list[j] > list[j+1]:
4641 print " After sort:",
4642 for i:=0; then i = i + 1; while i < size:
4646 if 1 == 2 then print "yes"; else print "no"
4650 bob.alive = (bob.name == "Hello")
4651 print "bob", "is" if bob.alive else "isn't", "alive"