1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 The "func" clause currently only allows a "main" function to be
45 declared. That will be extended when proper function support is added.
47 An element that is present purely to make a usable language, and
48 without any expectation that they will remain, is the "print" statement
49 which performs simple output.
51 The current scalar types are "number", "Boolean", and "string".
52 Boolean will likely stay in its current form, the other two might, but
53 could just as easily be changed.
57 Versions of the interpreter which obviously do not support a complete
58 language will be named after creeks and streams. This one is Jamison
61 Once we have something reasonably resembling a complete language, the
62 names of rivers will be used.
63 Early versions of the compiler will be named after seas. Major
64 releases of the compiler will be named after oceans. Hopefully I will
65 be finished once I get to the Pacific Ocean release.
69 As well as parsing and executing a program, the interpreter can print
70 out the program from the parsed internal structure. This is useful
71 for validating the parsing.
72 So the main requirements of the interpreter are:
74 - Parse the program, possibly with tracing,
75 - Analyse the parsed program to ensure consistency,
77 - Execute the "main" function in the program, if no parsing or
78 consistency errors were found.
80 This is all performed by a single C program extracted with
83 There will be two formats for printing the program: a default and one
84 that uses bracketing. So a `--bracket` command line option is needed
85 for that. Normally the first code section found is used, however an
86 alternate section can be requested so that a file (such as this one)
87 can contain multiple programs. This is effected with the `--section`
90 This code must be compiled with `-fplan9-extensions` so that anonymous
91 structures can be used.
93 ###### File: oceani.mk
95 myCFLAGS := -Wall -g -fplan9-extensions
96 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
97 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
98 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
100 all :: $(LDLIBS) oceani
101 oceani.c oceani.h : oceani.mdc parsergen
102 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
103 oceani.mk: oceani.mdc md2c
106 oceani: oceani.o $(LDLIBS)
107 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
109 ###### Parser: header
111 struct parse_context;
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 ###### Parser: reduce
131 struct parse_context *c = config2context(config);
139 #include <sys/mman.h>
158 static char Usage[] =
159 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
160 static const struct option long_options[] = {
161 {"trace", 0, NULL, 't'},
162 {"print", 0, NULL, 'p'},
163 {"noexec", 0, NULL, 'n'},
164 {"brackets", 0, NULL, 'b'},
165 {"section", 1, NULL, 's'},
168 const char *options = "tpnbs";
170 static void pr_err(char *msg) // NOTEST
172 fprintf(stderr, "%s\n", msg); // NOTEST
175 int main(int argc, char *argv[])
180 struct section *s, *ss;
181 char *section = NULL;
182 struct parse_context context = {
184 .ignored = (1 << TK_mark),
185 .number_chars = ".,_+- ",
190 int doprint=0, dotrace=0, doexec=1, brackets=0;
192 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
195 case 't': dotrace=1; break;
196 case 'p': doprint=1; break;
197 case 'n': doexec=0; break;
198 case 'b': brackets=1; break;
199 case 's': section = optarg; break;
200 default: fprintf(stderr, Usage);
204 if (optind >= argc) {
205 fprintf(stderr, "oceani: no input file given\n");
208 fd = open(argv[optind], O_RDONLY);
210 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
213 context.file_name = argv[optind];
214 len = lseek(fd, 0, 2);
215 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
216 s = code_extract(file, file+len, pr_err);
218 fprintf(stderr, "oceani: could not find any code in %s\n",
223 ## context initialization
226 for (ss = s; ss; ss = ss->next) {
227 struct text sec = ss->section;
228 if (sec.len == strlen(section) &&
229 strncmp(sec.txt, section, sec.len) == 0)
233 fprintf(stderr, "oceani: cannot find section %s\n",
239 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
242 fprintf(stderr, "oceani: no main function found.\n");
243 context.parse_error = 1;
245 if (context.prog && doprint) {
248 print_exec(context.prog, 0, brackets);
250 if (context.prog && doexec && !context.parse_error) {
251 if (!analyse_prog(context.prog, &context)) {
252 fprintf(stderr, "oceani: type error in program - not running.\n");
255 interp_prog(&context, context.prog, argc - optind, argv+optind);
257 free_exec(context.prog);
260 struct section *t = s->next;
266 ## free context types
267 exit(context.parse_error ? 1 : 0);
272 The four requirements of parse, analyse, print, interpret apply to
273 each language element individually so that is how most of the code
276 Three of the four are fairly self explanatory. The one that requires
277 a little explanation is the analysis step.
279 The current language design does not require the types of variables to
280 be declared, but they must still have a single type. Different
281 operations impose different requirements on the variables, for example
282 addition requires both arguments to be numeric, and assignment
283 requires the variable on the left to have the same type as the
284 expression on the right.
286 Analysis involves propagating these type requirements around and
287 consequently setting the type of each variable. If any requirements
288 are violated (e.g. a string is compared with a number) or if a
289 variable needs to have two different types, then an error is raised
290 and the program will not run.
292 If the same variable is declared in both branchs of an 'if/else', or
293 in all cases of a 'switch' then the multiple instances may be merged
294 into just one variable if the variable is referenced after the
295 conditional statement. When this happens, the types must naturally be
296 consistent across all the branches. When the variable is not used
297 outside the if, the variables in the different branches are distinct
298 and can be of different types.
300 Undeclared names may only appear in "use" statements and "case" expressions.
301 These names are given a type of "label" and a unique value.
302 This allows them to fill the role of a name in an enumerated type, which
303 is useful for testing the `switch` statement.
305 As we will see, the condition part of a `while` statement can return
306 either a Boolean or some other type. This requires that the expected
307 type that gets passed around comprises a type and a flag to indicate
308 that `Tbool` is also permitted.
310 As there are, as yet, no distinct types that are compatible, there
311 isn't much subtlety in the analysis. When we have distinct number
312 types, this will become more interesting.
316 When analysis discovers an inconsistency it needs to report an error;
317 just refusing to run the code ensures that the error doesn't cascade,
318 but by itself it isn't very useful. A clear understanding of the sort
319 of error message that are useful will help guide the process of
322 At a simplistic level, the only sort of error that type analysis can
323 report is that the type of some construct doesn't match a contextual
324 requirement. For example, in `4 + "hello"` the addition provides a
325 contextual requirement for numbers, but `"hello"` is not a number. In
326 this particular example no further information is needed as the types
327 are obvious from local information. When a variable is involved that
328 isn't the case. It may be helpful to explain why the variable has a
329 particular type, by indicating the location where the type was set,
330 whether by declaration or usage.
332 Using a recursive-descent analysis we can easily detect a problem at
333 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
334 will detect that one argument is not a number and the usage of `hello`
335 will detect that a number was wanted, but not provided. In this
336 (early) version of the language, we will generate error reports at
337 multiple locations, so the use of `hello` will report an error and
338 explain were the value was set, and the addition will report an error
339 and say why numbers are needed. To be able to report locations for
340 errors, each language element will need to record a file location
341 (line and column) and each variable will need to record the language
342 element where its type was set. For now we will assume that each line
343 of an error message indicates one location in the file, and up to 2
344 types. So we provide a `printf`-like function which takes a format, a
345 location (a `struct exec` which has not yet been introduced), and 2
346 types. "`%1`" reports the first type, "`%2`" reports the second. We
347 will need a function to print the location, once we know how that is
348 stored. e As will be explained later, there are sometimes extra rules for
349 type matching and they might affect error messages, we need to pass those
352 As well as type errors, we sometimes need to report problems with
353 tokens, which might be unexpected or might name a type that has not
354 been defined. For these we have `tok_err()` which reports an error
355 with a given token. Each of the error functions sets the flag in the
356 context so indicate that parsing failed.
360 static void fput_loc(struct exec *loc, FILE *f);
362 ###### core functions
364 static void type_err(struct parse_context *c,
365 char *fmt, struct exec *loc,
366 struct type *t1, int rules, struct type *t2)
368 fprintf(stderr, "%s:", c->file_name);
369 fput_loc(loc, stderr);
370 for (; *fmt ; fmt++) {
377 case '%': fputc(*fmt, stderr); break; // NOTEST
378 default: fputc('?', stderr); break; // NOTEST
380 type_print(t1, stderr);
383 type_print(t2, stderr);
392 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
394 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
395 t->txt.len, t->txt.txt);
399 ## Entities: declared and predeclared.
401 There are various "things" that the language and/or the interpreter
402 needs to know about to parse and execute a program. These include
403 types, variables, values, and executable code. These are all lumped
404 together under the term "entities" (calling them "objects" would be
405 confusing) and introduced here. The following section will present the
406 different specific code elements which comprise or manipulate these
411 Values come in a wide range of types, with more likely to be added.
412 Each type needs to be able to print its own values (for convenience at
413 least) as well as to compare two values, at least for equality and
414 possibly for order. For now, values might need to be duplicated and
415 freed, though eventually such manipulations will be better integrated
418 Rather than requiring every numeric type to support all numeric
419 operations (add, multiple, etc), we allow types to be able to present
420 as one of a few standard types: integer, float, and fraction. The
421 existence of these conversion functions eventually enable types to
422 determine if they are compatible with other types, though such types
423 have not yet been implemented.
425 Named type are stored in a simple linked list. Objects of each type are
426 "values" which are often passed around by value.
433 ## value union fields
441 void (*init)(struct type *type, struct value *val);
442 void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
443 void (*print)(struct type *type, struct value *val);
444 void (*print_type)(struct type *type, FILE *f);
445 int (*cmp_order)(struct type *t1, struct type *t2,
446 struct value *v1, struct value *v2);
447 int (*cmp_eq)(struct type *t1, struct type *t2,
448 struct value *v1, struct value *v2);
449 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
450 void (*free)(struct type *type, struct value *val);
451 void (*free_type)(struct type *t);
452 long long (*to_int)(struct value *v);
453 double (*to_float)(struct value *v);
454 int (*to_mpq)(mpq_t *q, struct value *v);
463 struct type *typelist;
467 static struct type *find_type(struct parse_context *c, struct text s)
469 struct type *l = c->typelist;
472 text_cmp(l->name, s) != 0)
477 static struct type *add_type(struct parse_context *c, struct text s,
482 n = calloc(1, sizeof(*n));
485 n->next = c->typelist;
490 static void free_type(struct type *t)
492 /* The type is always a reference to something in the
493 * context, so we don't need to free anything.
497 static void free_value(struct type *type, struct value *v)
503 static void type_print(struct type *type, FILE *f)
506 fputs("*unknown*type*", f); // NOTEST
507 else if (type->name.len)
508 fprintf(f, "%.*s", type->name.len, type->name.txt);
509 else if (type->print_type)
510 type->print_type(type, f);
512 fputs("*invalid*type*", f); // NOTEST
515 static void val_init(struct type *type, struct value *val)
517 if (type && type->init)
518 type->init(type, val);
521 static void dup_value(struct type *type,
522 struct value *vold, struct value *vnew)
524 if (type && type->dup)
525 type->dup(type, vold, vnew);
528 static int value_cmp(struct type *tl, struct type *tr,
529 struct value *left, struct value *right)
531 if (tl && tl->cmp_order)
532 return tl->cmp_order(tl, tr, left, right);
533 if (tl && tl->cmp_eq) // NOTEST
534 return tl->cmp_eq(tl, tr, left, right); // NOTEST
538 static void print_value(struct type *type, struct value *v)
540 if (type && type->print)
541 type->print(type, v);
543 printf("*Unknown*"); // NOTEST
548 static void free_value(struct type *type, struct value *v);
549 static int type_compat(struct type *require, struct type *have, int rules);
550 static void type_print(struct type *type, FILE *f);
551 static void val_init(struct type *type, struct value *v);
552 static void dup_value(struct type *type,
553 struct value *vold, struct value *vnew);
554 static int value_cmp(struct type *tl, struct type *tr,
555 struct value *left, struct value *right);
556 static void print_value(struct type *type, struct value *v);
558 ###### free context types
560 while (context.typelist) {
561 struct type *t = context.typelist;
563 context.typelist = t->next;
569 Type can be specified for local variables, for fields in a structure,
570 for formal parameters to functions, and possibly elsewhere. Different
571 rules may apply in different contexts. As a minimum, a named type may
572 always be used. Currently the type of a formal parameter can be
573 different from types in other contexts, so we have a separate grammar
579 Type -> IDENTIFIER ${
580 $0 = find_type(c, $1.txt);
583 "error: undefined type", &$1);
590 FormalType -> Type ${ $0 = $<1; }$
591 ## formal type grammar
595 Values of the base types can be numbers, which we represent as
596 multi-precision fractions, strings, Booleans and labels. When
597 analysing the program we also need to allow for places where no value
598 is meaningful (type `Tnone`) and where we don't know what type to
599 expect yet (type is `NULL`).
601 Values are never shared, they are always copied when used, and freed
602 when no longer needed.
604 When propagating type information around the program, we need to
605 determine if two types are compatible, where type `NULL` is compatible
606 with anything. There are two special cases with type compatibility,
607 both related to the Conditional Statement which will be described
608 later. In some cases a Boolean can be accepted as well as some other
609 primary type, and in others any type is acceptable except a label (`Vlabel`).
610 A separate function encoding these cases will simplify some code later.
612 ###### type functions
614 int (*compat)(struct type *this, struct type *other);
618 static int type_compat(struct type *require, struct type *have, int rules)
620 if ((rules & Rboolok) && have == Tbool)
622 if ((rules & Rnolabel) && have == Tlabel)
624 if (!require || !have)
628 return require->compat(require, have);
630 return require == have;
635 #include "parse_string.h"
636 #include "parse_number.h"
639 myLDLIBS := libnumber.o libstring.o -lgmp
640 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
642 ###### type union fields
643 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
645 ###### value union fields
652 static void _free_value(struct type *type, struct value *v)
656 switch (type->vtype) {
658 case Vstr: free(v->str.txt); break;
659 case Vnum: mpq_clear(v->num); break;
665 ###### value functions
667 static void _val_init(struct type *type, struct value *val)
669 switch(type->vtype) {
670 case Vnone: // NOTEST
673 mpq_init(val->num); break;
675 val->str.txt = malloc(1);
687 static void _dup_value(struct type *type,
688 struct value *vold, struct value *vnew)
690 switch (type->vtype) {
691 case Vnone: // NOTEST
694 vnew->label = vold->label;
697 vnew->bool = vold->bool;
701 mpq_set(vnew->num, vold->num);
704 vnew->str.len = vold->str.len;
705 vnew->str.txt = malloc(vnew->str.len);
706 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
711 static int _value_cmp(struct type *tl, struct type *tr,
712 struct value *left, struct value *right)
716 return tl - tr; // NOTEST
718 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
719 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
720 case Vstr: cmp = text_cmp(left->str, right->str); break;
721 case Vbool: cmp = left->bool - right->bool; break;
722 case Vnone: cmp = 0; // NOTEST
727 static void _print_value(struct type *type, struct value *v)
729 switch (type->vtype) {
730 case Vnone: // NOTEST
731 printf("*no-value*"); break; // NOTEST
732 case Vlabel: // NOTEST
733 printf("*label-%p*", v->label); break; // NOTEST
735 printf("%.*s", v->str.len, v->str.txt); break;
737 printf("%s", v->bool ? "True":"False"); break;
742 mpf_set_q(fl, v->num);
743 gmp_printf("%Fg", fl);
750 static void _free_value(struct type *type, struct value *v);
752 static struct type base_prototype = {
754 .print = _print_value,
755 .cmp_order = _value_cmp,
756 .cmp_eq = _value_cmp,
761 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
764 static struct type *add_base_type(struct parse_context *c, char *n,
765 enum vtype vt, int size)
767 struct text txt = { n, strlen(n) };
770 t = add_type(c, txt, &base_prototype);
773 t->align = size > sizeof(void*) ? sizeof(void*) : size;
774 if (t->size & (t->align - 1))
775 t->size = (t->size | (t->align - 1)) + 1;
779 ###### context initialization
781 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
782 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
783 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
784 Tnone = add_base_type(&context, "none", Vnone, 0);
785 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
789 Variables are scoped named values. We store the names in a linked list
790 of "bindings" sorted in lexical order, and use sequential search and
797 struct binding *next; // in lexical order
801 This linked list is stored in the parse context so that "reduce"
802 functions can find or add variables, and so the analysis phase can
803 ensure that every variable gets a type.
807 struct binding *varlist; // In lexical order
811 static struct binding *find_binding(struct parse_context *c, struct text s)
813 struct binding **l = &c->varlist;
818 (cmp = text_cmp((*l)->name, s)) < 0)
822 n = calloc(1, sizeof(*n));
829 Each name can be linked to multiple variables defined in different
830 scopes. Each scope starts where the name is declared and continues
831 until the end of the containing code block. Scopes of a given name
832 cannot nest, so a declaration while a name is in-scope is an error.
834 ###### binding fields
835 struct variable *var;
839 struct variable *previous;
841 struct binding *name;
842 struct exec *where_decl;// where name was declared
843 struct exec *where_set; // where type was set
847 While the naming seems strange, we include local constants in the
848 definition of variables. A name declared `var := value` can
849 subsequently be changed, but a name declared `var ::= value` cannot -
852 ###### variable fields
855 Scopes in parallel branches can be partially merged. More
856 specifically, if a given name is declared in both branches of an
857 if/else then its scope is a candidate for merging. Similarly if
858 every branch of an exhaustive switch (e.g. has an "else" clause)
859 declares a given name, then the scopes from the branches are
860 candidates for merging.
862 Note that names declared inside a loop (which is only parallel to
863 itself) are never visible after the loop. Similarly names defined in
864 scopes which are not parallel, such as those started by `for` and
865 `switch`, are never visible after the scope. Only variables defined in
866 both `then` and `else` (including the implicit then after an `if`, and
867 excluding `then` used with `for`) and in all `case`s and `else` of a
868 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
870 Labels, which are a bit like variables, follow different rules.
871 Labels are not explicitly declared, but if an undeclared name appears
872 in a context where a label is legal, that effectively declares the
873 name as a label. The declaration remains in force (or in scope) at
874 least to the end of the immediately containing block and conditionally
875 in any larger containing block which does not declare the name in some
876 other way. Importantly, the conditional scope extension happens even
877 if the label is only used in one parallel branch of a conditional --
878 when used in one branch it is treated as having been declared in all
881 Merge candidates are tentatively visible beyond the end of the
882 branching statement which creates them. If the name is used, the
883 merge is affirmed and they become a single variable visible at the
884 outer layer. If not - if it is redeclared first - the merge lapses.
886 To track scopes we have an extra stack, implemented as a linked list,
887 which roughly parallels the parse stack and which is used exclusively
888 for scoping. When a new scope is opened, a new frame is pushed and
889 the child-count of the parent frame is incremented. This child-count
890 is used to distinguish between the first of a set of parallel scopes,
891 in which declared variables must not be in scope, and subsequent
892 branches, whether they may already be conditionally scoped.
894 To push a new frame *before* any code in the frame is parsed, we need a
895 grammar reduction. This is most easily achieved with a grammar
896 element which derives the empty string, and creates the new scope when
897 it is recognised. This can be placed, for example, between a keyword
898 like "if" and the code following it.
902 struct scope *parent;
908 struct scope *scope_stack;
911 static void scope_pop(struct parse_context *c)
913 struct scope *s = c->scope_stack;
915 c->scope_stack = s->parent;
920 static void scope_push(struct parse_context *c)
922 struct scope *s = calloc(1, sizeof(*s));
924 c->scope_stack->child_count += 1;
925 s->parent = c->scope_stack;
933 OpenScope -> ${ scope_push(c); }$
934 ClosePara -> ${ var_block_close(c, CloseParallel); }$
936 Each variable records a scope depth and is in one of four states:
938 - "in scope". This is the case between the declaration of the
939 variable and the end of the containing block, and also between
940 the usage with affirms a merge and the end of that block.
942 The scope depth is not greater than the current parse context scope
943 nest depth. When the block of that depth closes, the state will
944 change. To achieve this, all "in scope" variables are linked
945 together as a stack in nesting order.
947 - "pending". The "in scope" block has closed, but other parallel
948 scopes are still being processed. So far, every parallel block at
949 the same level that has closed has declared the name.
951 The scope depth is the depth of the last parallel block that
952 enclosed the declaration, and that has closed.
954 - "conditionally in scope". The "in scope" block and all parallel
955 scopes have closed, and no further mention of the name has been
956 seen. This state includes a secondary nest depth which records the
957 outermost scope seen since the variable became conditionally in
958 scope. If a use of the name is found, the variable becomes "in
959 scope" and that secondary depth becomes the recorded scope depth.
960 If the name is declared as a new variable, the old variable becomes
961 "out of scope" and the recorded scope depth stays unchanged.
963 - "out of scope". The variable is neither in scope nor conditionally
964 in scope. It is permanently out of scope now and can be removed from
965 the "in scope" stack.
967 ###### variable fields
968 int depth, min_depth;
969 enum { OutScope, PendingScope, CondScope, InScope } scope;
970 struct variable *in_scope;
974 struct variable *in_scope;
976 All variables with the same name are linked together using the
977 'previous' link. Those variable that have been affirmatively merged all
978 have a 'merged' pointer that points to one primary variable - the most
979 recently declared instance. When merging variables, we need to also
980 adjust the 'merged' pointer on any other variables that had previously
981 been merged with the one that will no longer be primary.
983 A variable that is no longer the most recent instance of a name may
984 still have "pending" scope, if it might still be merged with most
985 recent instance. These variables don't really belong in the
986 "in_scope" list, but are not immediately removed when a new instance
987 is found. Instead, they are detected and ignored when considering the
988 list of in_scope names.
990 The storage of the value of a variable will be described later. For now
991 we just need to know that when a variable goes out of scope, it might
992 need to be freed. For this we need to be able to find it, so assume that
993 `var_value()` will provide that.
995 ###### variable fields
996 struct variable *merged;
1000 static void variable_merge(struct variable *primary, struct variable *secondary)
1004 if (primary->merged)
1006 primary = primary->merged; // NOTEST
1008 for (v = primary->previous; v; v=v->previous)
1009 if (v == secondary || v == secondary->merged ||
1010 v->merged == secondary ||
1011 (v->merged && v->merged == secondary->merged)) {
1012 v->scope = OutScope;
1013 v->merged = primary;
1017 ###### forward decls
1018 static struct value *var_value(struct parse_context *c, struct variable *v);
1020 ###### free context vars
1022 while (context.varlist) {
1023 struct binding *b = context.varlist;
1024 struct variable *v = b->var;
1025 context.varlist = b->next;
1028 struct variable *t = v;
1031 free_value(t->type, var_value(&context, t));
1033 // This is a global constant
1034 free_exec(t->where_decl);
1039 #### Manipulating Bindings
1041 When a name is conditionally visible, a new declaration discards the
1042 old binding - the condition lapses. Conversely a usage of the name
1043 affirms the visibility and extends it to the end of the containing
1044 block - i.e. the block that contains both the original declaration and
1045 the latest usage. This is determined from `min_depth`. When a
1046 conditionally visible variable gets affirmed like this, it is also
1047 merged with other conditionally visible variables with the same name.
1049 When we parse a variable declaration we either report an error if the
1050 name is currently bound, or create a new variable at the current nest
1051 depth if the name is unbound or bound to a conditionally scoped or
1052 pending-scope variable. If the previous variable was conditionally
1053 scoped, it and its homonyms becomes out-of-scope.
1055 When we parse a variable reference (including non-declarative assignment
1056 "foo = bar") we report an error if the name is not bound or is bound to
1057 a pending-scope variable; update the scope if the name is bound to a
1058 conditionally scoped variable; or just proceed normally if the named
1059 variable is in scope.
1061 When we exit a scope, any variables bound at this level are either
1062 marked out of scope or pending-scoped, depending on whether the scope
1063 was sequential or parallel. Here a "parallel" scope means the "then"
1064 or "else" part of a conditional, or any "case" or "else" branch of a
1065 switch. Other scopes are "sequential".
1067 When exiting a parallel scope we check if there are any variables that
1068 were previously pending and are still visible. If there are, then
1069 there weren't redeclared in the most recent scope, so they cannot be
1070 merged and must become out-of-scope. If it is not the first of
1071 parallel scopes (based on `child_count`), we check that there was a
1072 previous binding that is still pending-scope. If there isn't, the new
1073 variable must now be out-of-scope.
1075 When exiting a sequential scope that immediately enclosed parallel
1076 scopes, we need to resolve any pending-scope variables. If there was
1077 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1078 we need to mark all pending-scope variable as out-of-scope. Otherwise
1079 all pending-scope variables become conditionally scoped.
1082 enum closetype { CloseSequential, CloseParallel, CloseElse };
1084 ###### ast functions
1086 static struct variable *var_decl(struct parse_context *c, struct text s)
1088 struct binding *b = find_binding(c, s);
1089 struct variable *v = b->var;
1091 switch (v ? v->scope : OutScope) {
1093 /* Caller will report the error */
1097 v && v->scope == CondScope;
1099 v->scope = OutScope;
1103 v = calloc(1, sizeof(*v));
1104 v->previous = b->var;
1107 v->min_depth = v->depth = c->scope_depth;
1109 v->in_scope = c->in_scope;
1114 static struct variable *var_ref(struct parse_context *c, struct text s)
1116 struct binding *b = find_binding(c, s);
1117 struct variable *v = b->var;
1118 struct variable *v2;
1120 switch (v ? v->scope : OutScope) {
1123 /* Caller will report the error */
1126 /* All CondScope variables of this name need to be merged
1127 * and become InScope
1129 v->depth = v->min_depth;
1131 for (v2 = v->previous;
1132 v2 && v2->scope == CondScope;
1134 variable_merge(v, v2);
1142 static void var_block_close(struct parse_context *c, enum closetype ct)
1144 /* Close off all variables that are in_scope */
1145 struct variable *v, **vp, *v2;
1148 for (vp = &c->in_scope;
1149 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1151 if (v->name->var == v) switch (ct) {
1153 case CloseParallel: /* handle PendingScope */
1157 if (c->scope_stack->child_count == 1)
1158 v->scope = PendingScope;
1159 else if (v->previous &&
1160 v->previous->scope == PendingScope)
1161 v->scope = PendingScope;
1162 else if (v->type == Tlabel)
1163 v->scope = PendingScope;
1164 else if (v->name->var == v)
1165 v->scope = OutScope;
1166 if (ct == CloseElse) {
1167 /* All Pending variables with this name
1168 * are now Conditional */
1170 v2 && v2->scope == PendingScope;
1172 v2->scope = CondScope;
1177 v2 && v2->scope == PendingScope;
1179 if (v2->type != Tlabel)
1180 v2->scope = OutScope;
1182 case OutScope: break;
1185 case CloseSequential:
1186 if (v->type == Tlabel)
1187 v->scope = PendingScope;
1190 v->scope = OutScope;
1193 /* There was no 'else', so we can only become
1194 * conditional if we know the cases were exhaustive,
1195 * and that doesn't mean anything yet.
1196 * So only labels become conditional..
1199 v2 && v2->scope == PendingScope;
1201 if (v2->type == Tlabel) {
1202 v2->scope = CondScope;
1203 v2->min_depth = c->scope_depth;
1205 v2->scope = OutScope;
1208 case OutScope: break;
1212 if (v->scope == OutScope || v->name->var != v)
1221 The value of a variable is store separately from the variable, on an
1222 analogue of a stack frame. There are (currently) two frames that can be
1223 active. A global frame which currently only stores constants, and a
1224 stacked frame which stores local variables. Each variable knows if it
1225 is global or not, and what its index into the frame is.
1227 Values in the global frame are known immediately they are relevant, so
1228 the frame needs to be reallocated as it grows so it can store those
1229 values. The local frame doesn't get values until the interpreted phase
1230 is started, so there is no need to allocate until the size is known.
1232 ###### variable fields
1236 ###### parse context
1238 short global_size, global_alloc;
1240 void *global, *local;
1242 ###### ast functions
1244 static struct value *var_value(struct parse_context *c, struct variable *v)
1247 if (!c->local || !v->type)
1249 if (v->frame_pos + v->type->size > c->local_size) {
1250 printf("INVALID frame_pos\n"); // NOTEST
1253 return c->local + v->frame_pos;
1255 if (c->global_size > c->global_alloc) {
1256 int old = c->global_alloc;
1257 c->global_alloc = (c->global_size | 1023) + 1024;
1258 c->global = realloc(c->global, c->global_alloc);
1259 memset(c->global + old, 0, c->global_alloc - old);
1261 return c->global + v->frame_pos;
1264 static struct value *global_alloc(struct parse_context *c, struct type *t,
1265 struct variable *v, struct value *init)
1268 struct variable scratch;
1270 if (t->prepare_type)
1271 t->prepare_type(c, t, 1); // NOTEST
1273 if (c->global_size & (t->align - 1))
1274 c->global_size = (c->global_size + t->align) & ~(t->align-1);
1279 v->frame_pos = c->global_size;
1281 c->global_size += v->type->size;
1282 ret = var_value(c, v);
1284 memcpy(ret, init, t->size);
1290 As global values are found -- struct field initializers, labels etc --
1291 `global_alloc()` is called to record the value in the global frame.
1293 When the program is fully parsed, we need to walk the list of variables
1294 to find any that weren't merged away and that aren't global, and to
1295 calculate the frame size and assign a frame position for each variable.
1296 For this we have `scope_finalize()`.
1298 ###### ast functions
1300 static void scope_finalize(struct parse_context *c)
1304 for (b = c->varlist; b; b = b->next) {
1306 for (v = b->var; v; v = v->previous) {
1307 struct type *t = v->type;
1308 if (v->merged && v->merged != v)
1312 if (c->local_size & (t->align - 1))
1313 c->local_size = (c->local_size + t->align) & ~(t->align-1);
1314 v->frame_pos = c->local_size;
1315 c->local_size += v->type->size;
1318 c->local = calloc(1, c->local_size);
1321 ###### free context vars
1322 free(context.global);
1323 free(context.local);
1327 Executables can be lots of different things. In many cases an
1328 executable is just an operation combined with one or two other
1329 executables. This allows for expressions and lists etc. Other times an
1330 executable is something quite specific like a constant or variable name.
1331 So we define a `struct exec` to be a general executable with a type, and
1332 a `struct binode` which is a subclass of `exec`, forms a node in a
1333 binary tree, and holds an operation. There will be other subclasses,
1334 and to access these we need to be able to `cast` the `exec` into the
1335 various other types. The first field in any `struct exec` is the type
1336 from the `exec_types` enum.
1339 #define cast(structname, pointer) ({ \
1340 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1341 if (__mptr && *__mptr != X##structname) abort(); \
1342 (struct structname *)( (char *)__mptr);})
1344 #define new(structname) ({ \
1345 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1346 __ptr->type = X##structname; \
1347 __ptr->line = -1; __ptr->column = -1; \
1350 #define new_pos(structname, token) ({ \
1351 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1352 __ptr->type = X##structname; \
1353 __ptr->line = token.line; __ptr->column = token.col; \
1362 enum exec_types type;
1370 struct exec *left, *right;
1373 ###### ast functions
1375 static int __fput_loc(struct exec *loc, FILE *f)
1379 if (loc->line >= 0) {
1380 fprintf(f, "%d:%d: ", loc->line, loc->column);
1383 if (loc->type == Xbinode)
1384 return __fput_loc(cast(binode,loc)->left, f) ||
1385 __fput_loc(cast(binode,loc)->right, f); // NOTEST
1388 static void fput_loc(struct exec *loc, FILE *f)
1390 if (!__fput_loc(loc, f))
1391 fprintf(f, "??:??: "); // NOTEST
1394 Each different type of `exec` node needs a number of functions defined,
1395 a bit like methods. We must be able to free it, print it, analyse it
1396 and execute it. Once we have specific `exec` types we will need to
1397 parse them too. Let's take this a bit more slowly.
1401 The parser generator requires a `free_foo` function for each struct
1402 that stores attributes and they will often be `exec`s and subtypes
1403 there-of. So we need `free_exec` which can handle all the subtypes,
1404 and we need `free_binode`.
1406 ###### ast functions
1408 static void free_binode(struct binode *b)
1413 free_exec(b->right);
1417 ###### core functions
1418 static void free_exec(struct exec *e)
1427 ###### forward decls
1429 static void free_exec(struct exec *e);
1431 ###### free exec cases
1432 case Xbinode: free_binode(cast(binode, e)); break;
1436 Printing an `exec` requires that we know the current indent level for
1437 printing line-oriented components. As will become clear later, we
1438 also want to know what sort of bracketing to use.
1440 ###### ast functions
1442 static void do_indent(int i, char *str)
1449 ###### core functions
1450 static void print_binode(struct binode *b, int indent, int bracket)
1454 ## print binode cases
1458 static void print_exec(struct exec *e, int indent, int bracket)
1464 print_binode(cast(binode, e), indent, bracket); break;
1469 ###### forward decls
1471 static void print_exec(struct exec *e, int indent, int bracket);
1475 As discussed, analysis involves propagating type requirements around the
1476 program and looking for errors.
1478 So `propagate_types` is passed an expected type (being a `struct type`
1479 pointer together with some `val_rules` flags) that the `exec` is
1480 expected to return, and returns the type that it does return, either
1481 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1482 by reference. It is set to `0` when an error is found, and `2` when
1483 any change is made. If it remains unchanged at `1`, then no more
1484 propagation is needed.
1488 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1492 if (rules & Rnolabel)
1493 fputs(" (labels not permitted)", stderr);
1496 ###### core functions
1498 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1499 struct type *type, int rules);
1500 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1501 struct type *type, int rules)
1508 switch (prog->type) {
1511 struct binode *b = cast(binode, prog);
1513 ## propagate binode cases
1517 ## propagate exec cases
1522 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1523 struct type *type, int rules)
1525 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1534 Interpreting an `exec` doesn't require anything but the `exec`. State
1535 is stored in variables and each variable will be directly linked from
1536 within the `exec` tree. The exception to this is the `main` function
1537 which needs to look at command line arguments. This function will be
1538 interpreted separately.
1540 Each `exec` can return a value combined with a type in `struct lrval`.
1541 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1542 the location of a value, which can be updated, in `lval`. Others will
1543 set `lval` to NULL indicating that there is a value of appropriate type
1546 ###### core functions
1550 struct value rval, *lval;
1553 static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
1555 static struct value interp_exec(struct parse_context *c, struct exec *e,
1556 struct type **typeret)
1558 struct lrval ret = _interp_exec(c, e);
1560 if (!ret.type) abort();
1562 *typeret = ret.type;
1564 dup_value(ret.type, ret.lval, &ret.rval);
1568 static struct value *linterp_exec(struct parse_context *c, struct exec *e,
1569 struct type **typeret)
1571 struct lrval ret = _interp_exec(c, e);
1574 *typeret = ret.type;
1576 free_value(ret.type, &ret.rval);
1580 static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
1583 struct value rv = {}, *lrv = NULL;
1584 struct type *rvtype;
1586 rvtype = ret.type = Tnone;
1596 struct binode *b = cast(binode, e);
1597 struct value left, right, *lleft;
1598 struct type *ltype, *rtype;
1599 ltype = rtype = Tnone;
1601 ## interp binode cases
1603 free_value(ltype, &left);
1604 free_value(rtype, &right);
1607 ## interp exec cases
1617 Now that we have the shape of the interpreter in place we can add some
1618 complex types and connected them in to the data structures and the
1619 different phases of parse, analyse, print, interpret.
1621 Thus far we have arrays and structs.
1625 Arrays can be declared by giving a size and a type, as `[size]type' so
1626 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1627 size can be either a literal number, or a named constant. Some day an
1628 arbitrary expression will be supported.
1630 As a formal parameter to a function, the array can be declared with a
1631 new variable as the size: `name:[size::number]string`. The `size`
1632 variable is set to the size of the array and must be a constant. As
1633 `number` is the only supported type, it can be left out:
1634 `name:[size::]string`.
1636 Arrays cannot be assigned. When pointers are introduced we will also
1637 introduce array slices which can refer to part or all of an array -
1638 the assignment syntax will create a slice. For now, an array can only
1639 ever be referenced by the name it is declared with. It is likely that
1640 a "`copy`" primitive will eventually be define which can be used to
1641 make a copy of an array with controllable recursive depth.
1643 For now we have two sorts of array, those with fixed size either because
1644 it is given as a literal number or because it is a struct member (which
1645 cannot have a runtime-changing size), and those with a size that is
1646 determined at runtime - local variables with a const size. The former
1647 have their size calculated at parse time, the latter at run time.
1649 For the latter type, the `size` field of the type is the size of a
1650 pointer, and the array is reallocated every time it comes into scope.
1652 We differentiate struct fields with a const size from local variables
1653 with a const size by whether they are prepared at parse time or not.
1655 ###### type union fields
1658 int unspec; // size is unspecified - vsize must be set.
1661 struct variable *vsize;
1662 struct type *member;
1665 ###### value union fields
1666 void *array; // used if not static_size
1668 ###### value functions
1670 static void array_prepare_type(struct parse_context *c, struct type *type,
1673 struct value *vsize;
1675 if (!type->array.vsize || type->array.static_size)
1678 vsize = var_value(c, type->array.vsize);
1680 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
1681 type->array.size = mpz_get_si(q);
1685 type->array.static_size = 1;
1686 type->size = type->array.size * type->array.member->size;
1687 type->align = type->array.member->align;
1691 static void array_init(struct type *type, struct value *val)
1694 void *ptr = val->ptr;
1698 if (!type->array.static_size) {
1699 val->array = calloc(type->array.size,
1700 type->array.member->size);
1703 for (i = 0; i < type->array.size; i++) {
1705 v = (void*)ptr + i * type->array.member->size;
1706 val_init(type->array.member, v);
1710 static void array_free(struct type *type, struct value *val)
1713 void *ptr = val->ptr;
1715 if (!type->array.static_size)
1717 for (i = 0; i < type->array.size; i++) {
1719 v = (void*)ptr + i * type->array.member->size;
1720 free_value(type->array.member, v);
1722 if (!type->array.static_size)
1726 static int array_compat(struct type *require, struct type *have)
1728 if (have->compat != require->compat)
1730 /* Both are arrays, so we can look at details */
1731 if (!type_compat(require->array.member, have->array.member, 0))
1733 if (have->array.unspec && require->array.unspec) {
1734 if (have->array.vsize && require->array.vsize &&
1735 have->array.vsize != require->array.vsize)
1736 /* sizes might not be the same */
1740 if (have->array.unspec || require->array.unspec)
1742 if (require->array.vsize == NULL && have->array.vsize == NULL)
1743 return require->array.size == have->array.size;
1745 return require->array.vsize == have->array.vsize;
1748 static void array_print_type(struct type *type, FILE *f)
1751 if (type->array.vsize) {
1752 struct binding *b = type->array.vsize->name;
1753 fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
1754 type->array.unspec ? "::" : "");
1756 fprintf(f, "%d]", type->array.size);
1757 type_print(type->array.member, f);
1760 static struct type array_prototype = {
1762 .prepare_type = array_prepare_type,
1763 .print_type = array_print_type,
1764 .compat = array_compat,
1766 .size = sizeof(void*),
1767 .align = sizeof(void*),
1770 ###### declare terminals
1775 | [ NUMBER ] Type ${ {
1778 struct text noname = { "", 0 };
1781 $0 = t = add_type(c, noname, &array_prototype);
1782 t->array.member = $<4;
1783 t->array.vsize = NULL;
1784 if (number_parse(num, tail, $2.txt) == 0)
1785 tok_err(c, "error: unrecognised number", &$2);
1787 tok_err(c, "error: unsupported number suffix", &$2);
1789 t->array.size = mpz_get_ui(mpq_numref(num));
1790 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1791 tok_err(c, "error: array size must be an integer",
1793 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1794 tok_err(c, "error: array size is too large",
1798 t->array.static_size = 1;
1799 t->size = t->array.size * t->array.member->size;
1800 t->align = t->array.member->align;
1803 | [ IDENTIFIER ] Type ${ {
1804 struct variable *v = var_ref(c, $2.txt);
1805 struct text noname = { "", 0 };
1808 tok_err(c, "error: name undeclared", &$2);
1809 else if (!v->constant)
1810 tok_err(c, "error: array size must be a constant", &$2);
1812 $0 = add_type(c, noname, &array_prototype);
1813 $0->array.member = $<4;
1815 $0->array.vsize = v;
1820 OptType -> Type ${ $0 = $<1; }$
1823 ###### formal type grammar
1825 | [ IDENTIFIER :: OptType ] Type ${ {
1826 struct variable *v = var_decl(c, $ID.txt);
1827 struct text noname = { "", 0 };
1833 $0 = add_type(c, noname, &array_prototype);
1834 $0->array.member = $<6;
1836 $0->array.unspec = 1;
1837 $0->array.vsize = v;
1843 ###### variable grammar
1845 | Variable [ Expression ] ${ {
1846 struct binode *b = new(binode);
1853 ###### print binode cases
1855 print_exec(b->left, -1, bracket);
1857 print_exec(b->right, -1, bracket);
1861 ###### propagate binode cases
1863 /* left must be an array, right must be a number,
1864 * result is the member type of the array
1866 propagate_types(b->right, c, ok, Tnum, 0);
1867 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1868 if (!t || t->compat != array_compat) {
1869 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1872 if (!type_compat(type, t->array.member, rules)) {
1873 type_err(c, "error: have %1 but need %2", prog,
1874 t->array.member, rules, type);
1876 return t->array.member;
1880 ###### interp binode cases
1886 lleft = linterp_exec(c, b->left, <ype);
1887 right = interp_exec(c, b->right, &rtype);
1889 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1893 if (ltype->array.static_size)
1896 ptr = *(void**)lleft;
1897 rvtype = ltype->array.member;
1898 if (i >= 0 && i < ltype->array.size)
1899 lrv = ptr + i * rvtype->size;
1901 val_init(ltype->array.member, &rv);
1908 A `struct` is a data-type that contains one or more other data-types.
1909 It differs from an array in that each member can be of a different
1910 type, and they are accessed by name rather than by number. Thus you
1911 cannot choose an element by calculation, you need to know what you
1914 The language makes no promises about how a given structure will be
1915 stored in memory - it is free to rearrange fields to suit whatever
1916 criteria seems important.
1918 Structs are declared separately from program code - they cannot be
1919 declared in-line in a variable declaration like arrays can. A struct
1920 is given a name and this name is used to identify the type - the name
1921 is not prefixed by the word `struct` as it would be in C.
1923 Structs are only treated as the same if they have the same name.
1924 Simply having the same fields in the same order is not enough. This
1925 might change once we can create structure initializers from a list of
1928 Each component datum is identified much like a variable is declared,
1929 with a name, one or two colons, and a type. The type cannot be omitted
1930 as there is no opportunity to deduce the type from usage. An initial
1931 value can be given following an equals sign, so
1933 ##### Example: a struct type
1939 would declare a type called "complex" which has two number fields,
1940 each initialised to zero.
1942 Struct will need to be declared separately from the code that uses
1943 them, so we will need to be able to print out the declaration of a
1944 struct when reprinting the whole program. So a `print_type_decl` type
1945 function will be needed.
1947 ###### type union fields
1959 ###### type functions
1960 void (*print_type_decl)(struct type *type, FILE *f);
1962 ###### value functions
1964 static void structure_init(struct type *type, struct value *val)
1968 for (i = 0; i < type->structure.nfields; i++) {
1970 v = (void*) val->ptr + type->structure.fields[i].offset;
1971 if (type->structure.fields[i].init)
1972 dup_value(type->structure.fields[i].type,
1973 type->structure.fields[i].init,
1976 val_init(type->structure.fields[i].type, v);
1980 static void structure_free(struct type *type, struct value *val)
1984 for (i = 0; i < type->structure.nfields; i++) {
1986 v = (void*)val->ptr + type->structure.fields[i].offset;
1987 free_value(type->structure.fields[i].type, v);
1991 static void structure_free_type(struct type *t)
1994 for (i = 0; i < t->structure.nfields; i++)
1995 if (t->structure.fields[i].init) {
1996 free_value(t->structure.fields[i].type,
1997 t->structure.fields[i].init);
1999 free(t->structure.fields);
2002 static struct type structure_prototype = {
2003 .init = structure_init,
2004 .free = structure_free,
2005 .free_type = structure_free_type,
2006 .print_type_decl = structure_print_type,
2020 ###### free exec cases
2022 free_exec(cast(fieldref, e)->left);
2026 ###### declare terminals
2029 ###### variable grammar
2031 | Variable . IDENTIFIER ${ {
2032 struct fieldref *fr = new_pos(fieldref, $2);
2039 ###### print exec cases
2043 struct fieldref *f = cast(fieldref, e);
2044 print_exec(f->left, -1, bracket);
2045 printf(".%.*s", f->name.len, f->name.txt);
2049 ###### ast functions
2050 static int find_struct_index(struct type *type, struct text field)
2053 for (i = 0; i < type->structure.nfields; i++)
2054 if (text_cmp(type->structure.fields[i].name, field) == 0)
2059 ###### propagate exec cases
2063 struct fieldref *f = cast(fieldref, prog);
2064 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
2067 type_err(c, "error: unknown type for field access", f->left,
2069 else if (st->init != structure_init)
2070 type_err(c, "error: field reference attempted on %1, not a struct",
2071 f->left, st, 0, NULL);
2072 else if (f->index == -2) {
2073 f->index = find_struct_index(st, f->name);
2075 type_err(c, "error: cannot find requested field in %1",
2076 f->left, st, 0, NULL);
2078 if (f->index >= 0) {
2079 struct type *ft = st->structure.fields[f->index].type;
2080 if (!type_compat(type, ft, rules))
2081 type_err(c, "error: have %1 but need %2", prog,
2088 ###### interp exec cases
2091 struct fieldref *f = cast(fieldref, e);
2093 struct value *lleft = linterp_exec(c, f->left, <ype);
2094 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
2095 rvtype = ltype->structure.fields[f->index].type;
2101 struct fieldlist *prev;
2105 ###### ast functions
2106 static void free_fieldlist(struct fieldlist *f)
2110 free_fieldlist(f->prev);
2112 free_value(f->f.type, f->f.init);
2118 ###### top level grammar
2119 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2121 add_type(c, $2.txt, &structure_prototype);
2123 struct fieldlist *f;
2125 for (f = $3; f; f=f->prev)
2128 t->structure.nfields = cnt;
2129 t->structure.fields = calloc(cnt, sizeof(struct field));
2132 int a = f->f.type->align;
2134 t->structure.fields[cnt] = f->f;
2135 if (t->size & (a-1))
2136 t->size = (t->size | (a-1)) + 1;
2137 t->structure.fields[cnt].offset = t->size;
2138 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2147 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2148 | { SimpleFieldList } ${ $0 = $<SFL; }$
2149 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2150 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2152 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2153 | FieldLines SimpleFieldList Newlines ${
2158 SimpleFieldList -> Field ${ $0 = $<F; }$
2159 | SimpleFieldList ; Field ${
2163 | SimpleFieldList ; ${
2166 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2168 Field -> IDENTIFIER : Type = Expression ${ {
2171 $0 = calloc(1, sizeof(struct fieldlist));
2172 $0->f.name = $1.txt;
2177 propagate_types($<5, c, &ok, $3, 0);
2182 struct value vl = interp_exec(c, $5, NULL);
2183 $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
2186 | IDENTIFIER : Type ${
2187 $0 = calloc(1, sizeof(struct fieldlist));
2188 $0->f.name = $1.txt;
2190 if ($0->f.type->prepare_type)
2191 $0->f.type->prepare_type(c, $0->f.type, 1);
2194 ###### forward decls
2195 static void structure_print_type(struct type *t, FILE *f);
2197 ###### value functions
2198 static void structure_print_type(struct type *t, FILE *f)
2202 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2204 for (i = 0; i < t->structure.nfields; i++) {
2205 struct field *fl = t->structure.fields + i;
2206 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2207 type_print(fl->type, f);
2208 if (fl->type->print && fl->init) {
2210 if (fl->type == Tstr)
2212 print_value(fl->type, fl->init);
2213 if (fl->type == Tstr)
2220 ###### print type decls
2225 while (target != 0) {
2227 for (t = context.typelist; t ; t=t->next)
2228 if (t->print_type_decl) {
2237 t->print_type_decl(t, stdout);
2245 A function is a named chunk of code which can be passed parameters and
2246 can return results. Each function has an implicit type which includes
2247 the set of parameters and the return value. As yet these types cannot
2248 be declared separate from the function itself.
2250 In fact, only one function is currently possible - `main`. `main` is
2251 passed an array of strings together with the size of the array, and
2252 doesn't return anything. The strings are command line arguments.
2254 The parameters can be specified either in parentheses as a list, such as
2256 ##### Example: function 1
2258 func main(av:[ac::number]string)
2261 or as an indented list of one parameter per line
2263 ##### Example: function 2
2266 argv:[argc::number]string
2278 MainFunction -> func main ( OpenScope Args ) Block Newlines ${
2281 $0->left = reorder_bilist($<Ar);
2283 var_block_close(c, CloseSequential);
2284 if (c->scope_stack && !c->parse_error) abort();
2286 | func main IN OpenScope OptNL Args OUT OptNL do Block Newlines ${
2289 $0->left = reorder_bilist($<Ar);
2291 var_block_close(c, CloseSequential);
2292 if (c->scope_stack && !c->parse_error) abort();
2294 | func main NEWLINE OpenScope OptNL do Block Newlines ${
2299 var_block_close(c, CloseSequential);
2300 if (c->scope_stack && !c->parse_error) abort();
2303 Args -> ${ $0 = NULL; }$
2304 | Varlist ${ $0 = $<1; }$
2305 | Varlist ; ${ $0 = $<1; }$
2306 | Varlist NEWLINE ${ $0 = $<1; }$
2308 Varlist -> Varlist ; ArgDecl ${
2322 ArgDecl -> IDENTIFIER : FormalType ${ {
2323 struct variable *v = var_decl(c, $1.txt);
2329 ## Executables: the elements of code
2331 Each code element needs to be parsed, printed, analysed,
2332 interpreted, and freed. There are several, so let's just start with
2333 the easy ones and work our way up.
2337 We have already met values as separate objects. When manifest
2338 constants appear in the program text, that must result in an executable
2339 which has a constant value. So the `val` structure embeds a value in
2352 ###### ast functions
2353 struct val *new_val(struct type *T, struct token tk)
2355 struct val *v = new_pos(val, tk);
2366 $0 = new_val(Tbool, $1);
2370 $0 = new_val(Tbool, $1);
2374 $0 = new_val(Tnum, $1);
2377 if (number_parse($0->val.num, tail, $1.txt) == 0)
2378 mpq_init($0->val.num);
2380 tok_err(c, "error: unsupported number suffix",
2385 $0 = new_val(Tstr, $1);
2388 string_parse(&$1, '\\', &$0->val.str, tail);
2390 tok_err(c, "error: unsupported string suffix",
2395 $0 = new_val(Tstr, $1);
2398 string_parse(&$1, '\\', &$0->val.str, tail);
2400 tok_err(c, "error: unsupported string suffix",
2405 ###### print exec cases
2408 struct val *v = cast(val, e);
2409 if (v->vtype == Tstr)
2411 print_value(v->vtype, &v->val);
2412 if (v->vtype == Tstr)
2417 ###### propagate exec cases
2420 struct val *val = cast(val, prog);
2421 if (!type_compat(type, val->vtype, rules))
2422 type_err(c, "error: expected %1%r found %2",
2423 prog, type, rules, val->vtype);
2427 ###### interp exec cases
2429 rvtype = cast(val, e)->vtype;
2430 dup_value(rvtype, &cast(val, e)->val, &rv);
2433 ###### ast functions
2434 static void free_val(struct val *v)
2437 free_value(v->vtype, &v->val);
2441 ###### free exec cases
2442 case Xval: free_val(cast(val, e)); break;
2444 ###### ast functions
2445 // Move all nodes from 'b' to 'rv', reversing their order.
2446 // In 'b' 'left' is a list, and 'right' is the last node.
2447 // In 'rv', left' is the first node and 'right' is a list.
2448 static struct binode *reorder_bilist(struct binode *b)
2450 struct binode *rv = NULL;
2453 struct exec *t = b->right;
2457 b = cast(binode, b->left);
2467 Just as we used a `val` to wrap a value into an `exec`, we similarly
2468 need a `var` to wrap a `variable` into an exec. While each `val`
2469 contained a copy of the value, each `var` holds a link to the variable
2470 because it really is the same variable no matter where it appears.
2471 When a variable is used, we need to remember to follow the `->merged`
2472 link to find the primary instance.
2480 struct variable *var;
2488 VariableDecl -> IDENTIFIER : ${ {
2489 struct variable *v = var_decl(c, $1.txt);
2490 $0 = new_pos(var, $1);
2495 v = var_ref(c, $1.txt);
2497 type_err(c, "error: variable '%v' redeclared",
2499 type_err(c, "info: this is where '%v' was first declared",
2500 v->where_decl, NULL, 0, NULL);
2503 | IDENTIFIER :: ${ {
2504 struct variable *v = var_decl(c, $1.txt);
2505 $0 = new_pos(var, $1);
2511 v = var_ref(c, $1.txt);
2513 type_err(c, "error: variable '%v' redeclared",
2515 type_err(c, "info: this is where '%v' was first declared",
2516 v->where_decl, NULL, 0, NULL);
2519 | IDENTIFIER : Type ${ {
2520 struct variable *v = var_decl(c, $1.txt);
2521 $0 = new_pos(var, $1);
2528 v = var_ref(c, $1.txt);
2530 type_err(c, "error: variable '%v' redeclared",
2532 type_err(c, "info: this is where '%v' was first declared",
2533 v->where_decl, NULL, 0, NULL);
2536 | IDENTIFIER :: Type ${ {
2537 struct variable *v = var_decl(c, $1.txt);
2538 $0 = new_pos(var, $1);
2546 v = var_ref(c, $1.txt);
2548 type_err(c, "error: variable '%v' redeclared",
2550 type_err(c, "info: this is where '%v' was first declared",
2551 v->where_decl, NULL, 0, NULL);
2556 Variable -> IDENTIFIER ${ {
2557 struct variable *v = var_ref(c, $1.txt);
2558 $0 = new_pos(var, $1);
2560 /* This might be a label - allocate a var just in case */
2561 v = var_decl(c, $1.txt);
2568 cast(var, $0)->var = v;
2572 ###### print exec cases
2575 struct var *v = cast(var, e);
2577 struct binding *b = v->var->name;
2578 printf("%.*s", b->name.len, b->name.txt);
2585 if (loc && loc->type == Xvar) {
2586 struct var *v = cast(var, loc);
2588 struct binding *b = v->var->name;
2589 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2591 fputs("???", stderr); // NOTEST
2593 fputs("NOTVAR", stderr); // NOTEST
2596 ###### propagate exec cases
2600 struct var *var = cast(var, prog);
2601 struct variable *v = var->var;
2603 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2604 return Tnone; // NOTEST
2608 if (v->constant && (rules & Rnoconstant)) {
2609 type_err(c, "error: Cannot assign to a constant: %v",
2610 prog, NULL, 0, NULL);
2611 type_err(c, "info: name was defined as a constant here",
2612 v->where_decl, NULL, 0, NULL);
2615 if (v->type == Tnone && v->where_decl == prog)
2616 type_err(c, "error: variable used but not declared: %v",
2617 prog, NULL, 0, NULL);
2618 if (v->type == NULL) {
2619 if (type && *ok != 0) {
2621 v->where_set = prog;
2626 if (!type_compat(type, v->type, rules)) {
2627 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2628 type, rules, v->type);
2629 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2630 v->type, rules, NULL);
2637 ###### interp exec cases
2640 struct var *var = cast(var, e);
2641 struct variable *v = var->var;
2645 lrv = var_value(c, v);
2650 ###### ast functions
2652 static void free_var(struct var *v)
2657 ###### free exec cases
2658 case Xvar: free_var(cast(var, e)); break;
2660 ### Expressions: Conditional
2662 Our first user of the `binode` will be conditional expressions, which
2663 is a bit odd as they actually have three components. That will be
2664 handled by having 2 binodes for each expression. The conditional
2665 expression is the lowest precedence operator which is why we define it
2666 first - to start the precedence list.
2668 Conditional expressions are of the form "value `if` condition `else`
2669 other_value". They associate to the right, so everything to the right
2670 of `else` is part of an else value, while only a higher-precedence to
2671 the left of `if` is the if values. Between `if` and `else` there is no
2672 room for ambiguity, so a full conditional expression is allowed in
2684 Expression -> Expression if Expression else Expression $$ifelse ${ {
2685 struct binode *b1 = new(binode);
2686 struct binode *b2 = new(binode);
2695 ## expression grammar
2697 ###### print binode cases
2700 b2 = cast(binode, b->right);
2701 if (bracket) printf("(");
2702 print_exec(b2->left, -1, bracket);
2704 print_exec(b->left, -1, bracket);
2706 print_exec(b2->right, -1, bracket);
2707 if (bracket) printf(")");
2710 ###### propagate binode cases
2713 /* cond must be Tbool, others must match */
2714 struct binode *b2 = cast(binode, b->right);
2717 propagate_types(b->left, c, ok, Tbool, 0);
2718 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2719 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2723 ###### interp binode cases
2726 struct binode *b2 = cast(binode, b->right);
2727 left = interp_exec(c, b->left, <ype);
2729 rv = interp_exec(c, b2->left, &rvtype);
2731 rv = interp_exec(c, b2->right, &rvtype);
2735 ### Expressions: Boolean
2737 The next class of expressions to use the `binode` will be Boolean
2738 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2739 have same corresponding precendence. The difference is that they don't
2740 evaluate the second expression if not necessary.
2749 ###### expr precedence
2754 ###### expression grammar
2755 | Expression or Expression ${ {
2756 struct binode *b = new(binode);
2762 | Expression or else Expression ${ {
2763 struct binode *b = new(binode);
2770 | Expression and Expression ${ {
2771 struct binode *b = new(binode);
2777 | Expression and then Expression ${ {
2778 struct binode *b = new(binode);
2785 | not Expression ${ {
2786 struct binode *b = new(binode);
2792 ###### print binode cases
2794 if (bracket) printf("(");
2795 print_exec(b->left, -1, bracket);
2797 print_exec(b->right, -1, bracket);
2798 if (bracket) printf(")");
2801 if (bracket) printf("(");
2802 print_exec(b->left, -1, bracket);
2803 printf(" and then ");
2804 print_exec(b->right, -1, bracket);
2805 if (bracket) printf(")");
2808 if (bracket) printf("(");
2809 print_exec(b->left, -1, bracket);
2811 print_exec(b->right, -1, bracket);
2812 if (bracket) printf(")");
2815 if (bracket) printf("(");
2816 print_exec(b->left, -1, bracket);
2817 printf(" or else ");
2818 print_exec(b->right, -1, bracket);
2819 if (bracket) printf(")");
2822 if (bracket) printf("(");
2824 print_exec(b->right, -1, bracket);
2825 if (bracket) printf(")");
2828 ###### propagate binode cases
2834 /* both must be Tbool, result is Tbool */
2835 propagate_types(b->left, c, ok, Tbool, 0);
2836 propagate_types(b->right, c, ok, Tbool, 0);
2837 if (type && type != Tbool)
2838 type_err(c, "error: %1 operation found where %2 expected", prog,
2842 ###### interp binode cases
2844 rv = interp_exec(c, b->left, &rvtype);
2845 right = interp_exec(c, b->right, &rtype);
2846 rv.bool = rv.bool && right.bool;
2849 rv = interp_exec(c, b->left, &rvtype);
2851 rv = interp_exec(c, b->right, NULL);
2854 rv = interp_exec(c, b->left, &rvtype);
2855 right = interp_exec(c, b->right, &rtype);
2856 rv.bool = rv.bool || right.bool;
2859 rv = interp_exec(c, b->left, &rvtype);
2861 rv = interp_exec(c, b->right, NULL);
2864 rv = interp_exec(c, b->right, &rvtype);
2868 ### Expressions: Comparison
2870 Of slightly higher precedence that Boolean expressions are Comparisons.
2871 A comparison takes arguments of any comparable type, but the two types
2874 To simplify the parsing we introduce an `eop` which can record an
2875 expression operator, and the `CMPop` non-terminal will match one of them.
2882 ###### ast functions
2883 static void free_eop(struct eop *e)
2897 ###### expr precedence
2898 $LEFT < > <= >= == != CMPop
2900 ###### expression grammar
2901 | Expression CMPop Expression ${ {
2902 struct binode *b = new(binode);
2912 CMPop -> < ${ $0.op = Less; }$
2913 | > ${ $0.op = Gtr; }$
2914 | <= ${ $0.op = LessEq; }$
2915 | >= ${ $0.op = GtrEq; }$
2916 | == ${ $0.op = Eql; }$
2917 | != ${ $0.op = NEql; }$
2919 ###### print binode cases
2927 if (bracket) printf("(");
2928 print_exec(b->left, -1, bracket);
2930 case Less: printf(" < "); break;
2931 case LessEq: printf(" <= "); break;
2932 case Gtr: printf(" > "); break;
2933 case GtrEq: printf(" >= "); break;
2934 case Eql: printf(" == "); break;
2935 case NEql: printf(" != "); break;
2936 default: abort(); // NOTEST
2938 print_exec(b->right, -1, bracket);
2939 if (bracket) printf(")");
2942 ###### propagate binode cases
2949 /* Both must match but not be labels, result is Tbool */
2950 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2952 propagate_types(b->right, c, ok, t, 0);
2954 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2956 t = propagate_types(b->left, c, ok, t, 0);
2958 if (!type_compat(type, Tbool, 0))
2959 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2960 Tbool, rules, type);
2963 ###### interp binode cases
2972 left = interp_exec(c, b->left, <ype);
2973 right = interp_exec(c, b->right, &rtype);
2974 cmp = value_cmp(ltype, rtype, &left, &right);
2977 case Less: rv.bool = cmp < 0; break;
2978 case LessEq: rv.bool = cmp <= 0; break;
2979 case Gtr: rv.bool = cmp > 0; break;
2980 case GtrEq: rv.bool = cmp >= 0; break;
2981 case Eql: rv.bool = cmp == 0; break;
2982 case NEql: rv.bool = cmp != 0; break;
2983 default: rv.bool = 0; break; // NOTEST
2988 ### Expressions: The rest
2990 The remaining expressions with the highest precedence are arithmetic,
2991 string concatenation, and string conversion. String concatenation
2992 (`++`) has the same precedence as multiplication and division, but lower
2995 String conversion is a temporary feature until I get a better type
2996 system. `$` is a prefix operator which expects a string and returns
2999 `+` and `-` are both infix and prefix operations (where they are
3000 absolute value and negation). These have different operator names.
3002 We also have a 'Bracket' operator which records where parentheses were
3003 found. This makes it easy to reproduce these when printing. Possibly I
3004 should only insert brackets were needed for precedence.
3014 ###### expr precedence
3020 ###### expression grammar
3021 | Expression Eop Expression ${ {
3022 struct binode *b = new(binode);
3029 | Expression Top Expression ${ {
3030 struct binode *b = new(binode);
3037 | ( Expression ) ${ {
3038 struct binode *b = new_pos(binode, $1);
3043 | Uop Expression ${ {
3044 struct binode *b = new(binode);
3049 | Value ${ $0 = $<1; }$
3050 | Variable ${ $0 = $<1; }$
3053 Eop -> + ${ $0.op = Plus; }$
3054 | - ${ $0.op = Minus; }$
3056 Uop -> + ${ $0.op = Absolute; }$
3057 | - ${ $0.op = Negate; }$
3058 | $ ${ $0.op = StringConv; }$
3060 Top -> * ${ $0.op = Times; }$
3061 | / ${ $0.op = Divide; }$
3062 | % ${ $0.op = Rem; }$
3063 | ++ ${ $0.op = Concat; }$
3065 ###### print binode cases
3072 if (bracket) printf("(");
3073 print_exec(b->left, indent, bracket);
3075 case Plus: fputs(" + ", stdout); break;
3076 case Minus: fputs(" - ", stdout); break;
3077 case Times: fputs(" * ", stdout); break;
3078 case Divide: fputs(" / ", stdout); break;
3079 case Rem: fputs(" % ", stdout); break;
3080 case Concat: fputs(" ++ ", stdout); break;
3081 default: abort(); // NOTEST
3083 print_exec(b->right, indent, bracket);
3084 if (bracket) printf(")");
3089 if (bracket) printf("(");
3091 case Absolute: fputs("+", stdout); break;
3092 case Negate: fputs("-", stdout); break;
3093 case StringConv: fputs("$", stdout); break;
3094 default: abort(); // NOTEST
3096 print_exec(b->right, indent, bracket);
3097 if (bracket) printf(")");
3101 print_exec(b->right, indent, bracket);
3105 ###### propagate binode cases
3111 /* both must be numbers, result is Tnum */
3114 /* as propagate_types ignores a NULL,
3115 * unary ops fit here too */
3116 propagate_types(b->left, c, ok, Tnum, 0);
3117 propagate_types(b->right, c, ok, Tnum, 0);
3118 if (!type_compat(type, Tnum, 0))
3119 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
3124 /* both must be Tstr, result is Tstr */
3125 propagate_types(b->left, c, ok, Tstr, 0);
3126 propagate_types(b->right, c, ok, Tstr, 0);
3127 if (!type_compat(type, Tstr, 0))
3128 type_err(c, "error: Concat returns %1 but %2 expected", prog,
3133 /* op must be string, result is number */
3134 propagate_types(b->left, c, ok, Tstr, 0);
3135 if (!type_compat(type, Tnum, 0))
3137 "error: Can only convert string to number, not %1",
3138 prog, type, 0, NULL);
3142 return propagate_types(b->right, c, ok, type, 0);
3144 ###### interp binode cases
3147 rv = interp_exec(c, b->left, &rvtype);
3148 right = interp_exec(c, b->right, &rtype);
3149 mpq_add(rv.num, rv.num, right.num);
3152 rv = interp_exec(c, b->left, &rvtype);
3153 right = interp_exec(c, b->right, &rtype);
3154 mpq_sub(rv.num, rv.num, right.num);
3157 rv = interp_exec(c, b->left, &rvtype);
3158 right = interp_exec(c, b->right, &rtype);
3159 mpq_mul(rv.num, rv.num, right.num);
3162 rv = interp_exec(c, b->left, &rvtype);
3163 right = interp_exec(c, b->right, &rtype);
3164 mpq_div(rv.num, rv.num, right.num);
3169 left = interp_exec(c, b->left, <ype);
3170 right = interp_exec(c, b->right, &rtype);
3171 mpz_init(l); mpz_init(r); mpz_init(rem);
3172 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
3173 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
3174 mpz_tdiv_r(rem, l, r);
3175 val_init(Tnum, &rv);
3176 mpq_set_z(rv.num, rem);
3177 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
3182 rv = interp_exec(c, b->right, &rvtype);
3183 mpq_neg(rv.num, rv.num);
3186 rv = interp_exec(c, b->right, &rvtype);
3187 mpq_abs(rv.num, rv.num);
3190 rv = interp_exec(c, b->right, &rvtype);
3193 left = interp_exec(c, b->left, <ype);
3194 right = interp_exec(c, b->right, &rtype);
3196 rv.str = text_join(left.str, right.str);
3199 right = interp_exec(c, b->right, &rvtype);
3203 struct text tx = right.str;
3206 if (tx.txt[0] == '-') {
3211 if (number_parse(rv.num, tail, tx) == 0)
3214 mpq_neg(rv.num, rv.num);
3216 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt);
3220 ###### value functions
3222 static struct text text_join(struct text a, struct text b)
3225 rv.len = a.len + b.len;
3226 rv.txt = malloc(rv.len);
3227 memcpy(rv.txt, a.txt, a.len);
3228 memcpy(rv.txt+a.len, b.txt, b.len);
3232 ### Blocks, Statements, and Statement lists.
3234 Now that we have expressions out of the way we need to turn to
3235 statements. There are simple statements and more complex statements.
3236 Simple statements do not contain (syntactic) newlines, complex statements do.
3238 Statements often come in sequences and we have corresponding simple
3239 statement lists and complex statement lists.
3240 The former comprise only simple statements separated by semicolons.
3241 The later comprise complex statements and simple statement lists. They are
3242 separated by newlines. Thus the semicolon is only used to separate
3243 simple statements on the one line. This may be overly restrictive,
3244 but I'm not sure I ever want a complex statement to share a line with
3247 Note that a simple statement list can still use multiple lines if
3248 subsequent lines are indented, so
3250 ###### Example: wrapped simple statement list
3255 is a single simple statement list. This might allow room for
3256 confusion, so I'm not set on it yet.
3258 A simple statement list needs no extra syntax. A complex statement
3259 list has two syntactic forms. It can be enclosed in braces (much like
3260 C blocks), or it can be introduced by an indent and continue until an
3261 unindented newline (much like Python blocks). With this extra syntax
3262 it is referred to as a block.
3264 Note that a block does not have to include any newlines if it only
3265 contains simple statements. So both of:
3267 if condition: a=b; d=f
3269 if condition { a=b; print f }
3273 In either case the list is constructed from a `binode` list with
3274 `Block` as the operator. When parsing the list it is most convenient
3275 to append to the end, so a list is a list and a statement. When using
3276 the list it is more convenient to consider a list to be a statement
3277 and a list. So we need a function to re-order a list.
3278 `reorder_bilist` serves this purpose.
3280 The only stand-alone statement we introduce at this stage is `pass`
3281 which does nothing and is represented as a `NULL` pointer in a `Block`
3282 list. Other stand-alone statements will follow once the infrastructure
3293 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3294 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3295 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3296 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3297 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3299 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3300 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3301 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3302 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3303 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3305 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3306 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3307 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3309 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3310 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3311 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3312 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3313 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3315 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3317 ComplexStatements -> ComplexStatements ComplexStatement ${
3327 | ComplexStatement ${
3339 ComplexStatement -> SimpleStatements Newlines ${
3340 $0 = reorder_bilist($<SS);
3342 | SimpleStatements ; Newlines ${
3343 $0 = reorder_bilist($<SS);
3345 ## ComplexStatement Grammar
3348 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3354 | SimpleStatement ${
3362 SimpleStatement -> pass ${ $0 = NULL; }$
3363 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3364 ## SimpleStatement Grammar
3366 ###### print binode cases
3370 if (b->left == NULL)
3373 print_exec(b->left, indent, bracket);
3376 print_exec(b->right, indent, bracket);
3379 // block, one per line
3380 if (b->left == NULL)
3381 do_indent(indent, "pass\n");
3383 print_exec(b->left, indent, bracket);
3385 print_exec(b->right, indent, bracket);
3389 ###### propagate binode cases
3392 /* If any statement returns something other than Tnone
3393 * or Tbool then all such must return same type.
3394 * As each statement may be Tnone or something else,
3395 * we must always pass NULL (unknown) down, otherwise an incorrect
3396 * error might occur. We never return Tnone unless it is
3401 for (e = b; e; e = cast(binode, e->right)) {
3402 t = propagate_types(e->left, c, ok, NULL, rules);
3403 if ((rules & Rboolok) && t == Tbool)
3405 if (t && t != Tnone && t != Tbool) {
3409 type_err(c, "error: expected %1%r, found %2",
3410 e->left, type, rules, t);
3416 ###### interp binode cases
3418 while (rvtype == Tnone &&
3421 rv = interp_exec(c, b->left, &rvtype);
3422 b = cast(binode, b->right);
3426 ### The Print statement
3428 `print` is a simple statement that takes a comma-separated list of
3429 expressions and prints the values separated by spaces and terminated
3430 by a newline. No control of formatting is possible.
3432 `print` faces the same list-ordering issue as blocks, and uses the
3438 ##### expr precedence
3441 ###### SimpleStatement Grammar
3443 | print ExpressionList ${
3444 $0 = reorder_bilist($<2);
3446 | print ExpressionList , ${
3451 $0 = reorder_bilist($0);
3462 ExpressionList -> ExpressionList , Expression ${
3475 ###### print binode cases
3478 do_indent(indent, "print");
3482 print_exec(b->left, -1, bracket);
3486 b = cast(binode, b->right);
3492 ###### propagate binode cases
3495 /* don't care but all must be consistent */
3496 propagate_types(b->left, c, ok, NULL, Rnolabel);
3497 propagate_types(b->right, c, ok, NULL, Rnolabel);
3500 ###### interp binode cases
3506 for ( ; b; b = cast(binode, b->right))
3510 left = interp_exec(c, b->left, <ype);
3511 print_value(ltype, &left);
3512 free_value(ltype, &left);
3523 ###### Assignment statement
3525 An assignment will assign a value to a variable, providing it hasn't
3526 been declared as a constant. The analysis phase ensures that the type
3527 will be correct so the interpreter just needs to perform the
3528 calculation. There is a form of assignment which declares a new
3529 variable as well as assigning a value. If a name is assigned before
3530 it is declared, and error will be raised as the name is created as
3531 `Tlabel` and it is illegal to assign to such names.
3537 ###### declare terminals
3540 ###### SimpleStatement Grammar
3541 | Variable = Expression ${
3547 | VariableDecl = Expression ${
3555 if ($1->var->where_set == NULL) {
3557 "Variable declared with no type or value: %v",
3567 ###### print binode cases
3570 do_indent(indent, "");
3571 print_exec(b->left, indent, bracket);
3573 print_exec(b->right, indent, bracket);
3580 struct variable *v = cast(var, b->left)->var;
3581 do_indent(indent, "");
3582 print_exec(b->left, indent, bracket);
3583 if (cast(var, b->left)->var->constant) {
3584 if (v->where_decl == v->where_set) {
3586 type_print(v->type, stdout);
3591 if (v->where_decl == v->where_set) {
3593 type_print(v->type, stdout);
3600 print_exec(b->right, indent, bracket);
3607 ###### propagate binode cases
3611 /* Both must match and not be labels,
3612 * Type must support 'dup',
3613 * For Assign, left must not be constant.
3616 t = propagate_types(b->left, c, ok, NULL,
3617 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3622 if (propagate_types(b->right, c, ok, t, 0) != t)
3623 if (b->left->type == Xvar)
3624 type_err(c, "info: variable '%v' was set as %1 here.",
3625 cast(var, b->left)->var->where_set, t, rules, NULL);
3627 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3629 propagate_types(b->left, c, ok, t,
3630 (b->op == Assign ? Rnoconstant : 0));
3632 if (t && t->dup == NULL)
3633 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3638 ###### interp binode cases
3641 lleft = linterp_exec(c, b->left, <ype);
3642 right = interp_exec(c, b->right, &rtype);
3644 free_value(ltype, lleft);
3645 dup_value(ltype, &right, lleft);
3652 struct variable *v = cast(var, b->left)->var;
3656 val = var_value(c, v);
3657 free_value(v->type, val);
3658 if (v->type->prepare_type)
3659 v->type->prepare_type(c, v->type, 0);
3661 right = interp_exec(c, b->right, &rtype);
3662 memcpy(val, &right, rtype->size);
3665 val_init(v->type, val);
3670 ### The `use` statement
3672 The `use` statement is the last "simple" statement. It is needed when
3673 the condition in a conditional statement is a block. `use` works much
3674 like `return` in C, but only completes the `condition`, not the whole
3680 ###### expr precedence
3683 ###### SimpleStatement Grammar
3685 $0 = new_pos(binode, $1);
3688 if ($0->right->type == Xvar) {
3689 struct var *v = cast(var, $0->right);
3690 if (v->var->type == Tnone) {
3691 /* Convert this to a label */
3694 v->var->type = Tlabel;
3695 val = global_alloc(c, Tlabel, v->var, NULL);
3701 ###### print binode cases
3704 do_indent(indent, "use ");
3705 print_exec(b->right, -1, bracket);
3710 ###### propagate binode cases
3713 /* result matches value */
3714 return propagate_types(b->right, c, ok, type, 0);
3716 ###### interp binode cases
3719 rv = interp_exec(c, b->right, &rvtype);
3722 ### The Conditional Statement
3724 This is the biggy and currently the only complex statement. This
3725 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3726 It is comprised of a number of parts, all of which are optional though
3727 set combinations apply. Each part is (usually) a key word (`then` is
3728 sometimes optional) followed by either an expression or a code block,
3729 except the `casepart` which is a "key word and an expression" followed
3730 by a code block. The code-block option is valid for all parts and,
3731 where an expression is also allowed, the code block can use the `use`
3732 statement to report a value. If the code block does not report a value
3733 the effect is similar to reporting `True`.
3735 The `else` and `case` parts, as well as `then` when combined with
3736 `if`, can contain a `use` statement which will apply to some
3737 containing conditional statement. `for` parts, `do` parts and `then`
3738 parts used with `for` can never contain a `use`, except in some
3739 subordinate conditional statement.
3741 If there is a `forpart`, it is executed first, only once.
3742 If there is a `dopart`, then it is executed repeatedly providing
3743 always that the `condpart` or `cond`, if present, does not return a non-True
3744 value. `condpart` can fail to return any value if it simply executes
3745 to completion. This is treated the same as returning `True`.
3747 If there is a `thenpart` it will be executed whenever the `condpart`
3748 or `cond` returns True (or does not return any value), but this will happen
3749 *after* `dopart` (when present).
3751 If `elsepart` is present it will be executed at most once when the
3752 condition returns `False` or some value that isn't `True` and isn't
3753 matched by any `casepart`. If there are any `casepart`s, they will be
3754 executed when the condition returns a matching value.
3756 The particular sorts of values allowed in case parts has not yet been
3757 determined in the language design, so nothing is prohibited.
3759 The various blocks in this complex statement potentially provide scope
3760 for variables as described earlier. Each such block must include the
3761 "OpenScope" nonterminal before parsing the block, and must call
3762 `var_block_close()` when closing the block.
3764 The code following "`if`", "`switch`" and "`for`" does not get its own
3765 scope, but is in a scope covering the whole statement, so names
3766 declared there cannot be redeclared elsewhere. Similarly the
3767 condition following "`while`" is in a scope the covers the body
3768 ("`do`" part) of the loop, and which does not allow conditional scope
3769 extension. Code following "`then`" (both looping and non-looping),
3770 "`else`" and "`case`" each get their own local scope.
3772 The type requirements on the code block in a `whilepart` are quite
3773 unusal. It is allowed to return a value of some identifiable type, in
3774 which case the loop aborts and an appropriate `casepart` is run, or it
3775 can return a Boolean, in which case the loop either continues to the
3776 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3777 This is different both from the `ifpart` code block which is expected to
3778 return a Boolean, or the `switchpart` code block which is expected to
3779 return the same type as the casepart values. The correct analysis of
3780 the type of the `whilepart` code block is the reason for the
3781 `Rboolok` flag which is passed to `propagate_types()`.
3783 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3792 struct exec *action;
3793 struct casepart *next;
3795 struct cond_statement {
3797 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3798 struct casepart *casepart;
3801 ###### ast functions
3803 static void free_casepart(struct casepart *cp)
3807 free_exec(cp->value);
3808 free_exec(cp->action);
3815 static void free_cond_statement(struct cond_statement *s)
3819 free_exec(s->forpart);
3820 free_exec(s->condpart);
3821 free_exec(s->dopart);
3822 free_exec(s->thenpart);
3823 free_exec(s->elsepart);
3824 free_casepart(s->casepart);
3828 ###### free exec cases
3829 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3831 ###### ComplexStatement Grammar
3832 | CondStatement ${ $0 = $<1; }$
3834 ###### expr precedence
3835 $TERM for then while do
3842 // A CondStatement must end with EOL, as does CondSuffix and
3844 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3845 // may or may not end with EOL
3846 // WhilePart and IfPart include an appropriate Suffix
3848 // Both ForPart and Whilepart open scopes, and CondSuffix only
3849 // closes one - so in the first branch here we have another to close.
3850 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3853 $0->thenpart = $<TP;
3854 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3855 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3856 var_block_close(c, CloseSequential);
3858 | ForPart OptNL WhilePart CondSuffix ${
3861 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3862 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3863 var_block_close(c, CloseSequential);
3865 | WhilePart CondSuffix ${
3867 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3868 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3870 | SwitchPart OptNL CasePart CondSuffix ${
3872 $0->condpart = $<SP;
3873 $CP->next = $0->casepart;
3874 $0->casepart = $<CP;
3876 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3878 $0->condpart = $<SP;
3879 $CP->next = $0->casepart;
3880 $0->casepart = $<CP;
3882 | IfPart IfSuffix ${
3884 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3885 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3886 // This is where we close an "if" statement
3887 var_block_close(c, CloseSequential);
3890 CondSuffix -> IfSuffix ${
3892 // This is where we close scope of the whole
3893 // "for" or "while" statement
3894 var_block_close(c, CloseSequential);
3896 | Newlines CasePart CondSuffix ${
3898 $CP->next = $0->casepart;
3899 $0->casepart = $<CP;
3901 | CasePart CondSuffix ${
3903 $CP->next = $0->casepart;
3904 $0->casepart = $<CP;
3907 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3908 | Newlines ElsePart ${ $0 = $<EP; }$
3909 | ElsePart ${$0 = $<EP; }$
3911 ElsePart -> else OpenBlock Newlines ${
3912 $0 = new(cond_statement);
3913 $0->elsepart = $<OB;
3914 var_block_close(c, CloseElse);
3916 | else OpenScope CondStatement ${
3917 $0 = new(cond_statement);
3918 $0->elsepart = $<CS;
3919 var_block_close(c, CloseElse);
3923 CasePart -> case Expression OpenScope ColonBlock ${
3924 $0 = calloc(1,sizeof(struct casepart));
3927 var_block_close(c, CloseParallel);
3931 // These scopes are closed in CondSuffix
3932 ForPart -> for OpenBlock ${
3936 ThenPart -> then OpenBlock ${
3938 var_block_close(c, CloseSequential);
3942 // This scope is closed in CondSuffix
3943 WhilePart -> while UseBlock OptNL do Block ${
3947 | while OpenScope Expression ColonBlock ${
3948 $0.condpart = $<Exp;
3952 IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
3956 | if OpenScope Expression OpenScope ColonBlock ClosePara ${
3960 | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
3966 // This scope is closed in CondSuffix
3967 SwitchPart -> switch OpenScope Expression ${
3970 | switch UseBlock ${
3974 ###### print exec cases
3976 case Xcond_statement:
3978 struct cond_statement *cs = cast(cond_statement, e);
3979 struct casepart *cp;
3981 do_indent(indent, "for");
3982 if (bracket) printf(" {\n"); else printf("\n");
3983 print_exec(cs->forpart, indent+1, bracket);
3986 do_indent(indent, "} then {\n");
3988 do_indent(indent, "then\n");
3989 print_exec(cs->thenpart, indent+1, bracket);
3991 if (bracket) do_indent(indent, "}\n");
3995 if (cs->condpart && cs->condpart->type == Xbinode &&
3996 cast(binode, cs->condpart)->op == Block) {
3998 do_indent(indent, "while {\n");
4000 do_indent(indent, "while\n");
4001 print_exec(cs->condpart, indent+1, bracket);
4003 do_indent(indent, "} do {\n");
4005 do_indent(indent, "do\n");
4006 print_exec(cs->dopart, indent+1, bracket);
4008 do_indent(indent, "}\n");
4010 do_indent(indent, "while ");
4011 print_exec(cs->condpart, 0, bracket);
4016 print_exec(cs->dopart, indent+1, bracket);
4018 do_indent(indent, "}\n");
4023 do_indent(indent, "switch");
4025 do_indent(indent, "if");
4026 if (cs->condpart && cs->condpart->type == Xbinode &&
4027 cast(binode, cs->condpart)->op == Block) {
4032 print_exec(cs->condpart, indent+1, bracket);
4034 do_indent(indent, "}\n");
4036 do_indent(indent, "then:\n");
4037 print_exec(cs->thenpart, indent+1, bracket);
4041 print_exec(cs->condpart, 0, bracket);
4047 print_exec(cs->thenpart, indent+1, bracket);
4049 do_indent(indent, "}\n");
4054 for (cp = cs->casepart; cp; cp = cp->next) {
4055 do_indent(indent, "case ");
4056 print_exec(cp->value, -1, 0);
4061 print_exec(cp->action, indent+1, bracket);
4063 do_indent(indent, "}\n");
4066 do_indent(indent, "else");
4071 print_exec(cs->elsepart, indent+1, bracket);
4073 do_indent(indent, "}\n");
4078 ###### propagate exec cases
4079 case Xcond_statement:
4081 // forpart and dopart must return Tnone
4082 // thenpart must return Tnone if there is a dopart,
4083 // otherwise it is like elsepart.
4085 // be bool if there is no casepart
4086 // match casepart->values if there is a switchpart
4087 // either be bool or match casepart->value if there
4089 // elsepart and casepart->action must match the return type
4090 // expected of this statement.
4091 struct cond_statement *cs = cast(cond_statement, prog);
4092 struct casepart *cp;
4094 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
4095 if (!type_compat(Tnone, t, 0))
4097 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
4098 if (!type_compat(Tnone, t, 0))
4101 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
4102 if (!type_compat(Tnone, t, 0))
4105 if (cs->casepart == NULL)
4106 propagate_types(cs->condpart, c, ok, Tbool, 0);
4108 /* Condpart must match case values, with bool permitted */
4110 for (cp = cs->casepart;
4111 cp && !t; cp = cp->next)
4112 t = propagate_types(cp->value, c, ok, NULL, 0);
4113 if (!t && cs->condpart)
4114 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
4115 // Now we have a type (I hope) push it down
4117 for (cp = cs->casepart; cp; cp = cp->next)
4118 propagate_types(cp->value, c, ok, t, 0);
4119 propagate_types(cs->condpart, c, ok, t, Rboolok);
4122 // (if)then, else, and case parts must return expected type.
4123 if (!cs->dopart && !type)
4124 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
4126 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
4127 for (cp = cs->casepart;
4130 type = propagate_types(cp->action, c, ok, NULL, rules);
4133 propagate_types(cs->thenpart, c, ok, type, rules);
4134 propagate_types(cs->elsepart, c, ok, type, rules);
4135 for (cp = cs->casepart; cp ; cp = cp->next)
4136 propagate_types(cp->action, c, ok, type, rules);
4142 ###### interp exec cases
4143 case Xcond_statement:
4145 struct value v, cnd;
4146 struct type *vtype, *cndtype;
4147 struct casepart *cp;
4148 struct cond_statement *cs = cast(cond_statement, e);
4151 interp_exec(c, cs->forpart, NULL);
4154 cnd = interp_exec(c, cs->condpart, &cndtype);
4157 if (!(cndtype == Tnone ||
4158 (cndtype == Tbool && cnd.bool != 0)))
4160 // cnd is Tnone or Tbool, doesn't need to be freed
4162 interp_exec(c, cs->dopart, NULL);
4165 rv = interp_exec(c, cs->thenpart, &rvtype);
4166 if (rvtype != Tnone || !cs->dopart)
4168 free_value(rvtype, &rv);
4171 } while (cs->dopart);
4173 for (cp = cs->casepart; cp; cp = cp->next) {
4174 v = interp_exec(c, cp->value, &vtype);
4175 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
4176 free_value(vtype, &v);
4177 free_value(cndtype, &cnd);
4178 rv = interp_exec(c, cp->action, &rvtype);
4181 free_value(vtype, &v);
4183 free_value(cndtype, &cnd);
4185 rv = interp_exec(c, cs->elsepart, &rvtype);
4192 ### Top level structure
4194 All the language elements so far can be used in various places. Now
4195 it is time to clarify what those places are.
4197 At the top level of a file there will be a number of declarations.
4198 Many of the things that can be declared haven't been described yet,
4199 such as functions, procedures, imports, and probably more.
4200 For now there are two sorts of things that can appear at the top
4201 level. They are predefined constants, `struct` types, and the `main`
4202 function. While the syntax will allow the `main` function to appear
4203 multiple times, that will trigger an error if it is actually attempted.
4205 The various declarations do not return anything. They store the
4206 various declarations in the parse context.
4208 ###### Parser: grammar
4211 Ocean -> OptNL DeclarationList
4213 ## declare terminals
4220 DeclarationList -> Declaration
4221 | DeclarationList Declaration
4223 Declaration -> ERROR Newlines ${
4225 "error: unhandled parse error", &$1);
4231 ## top level grammar
4235 ### The `const` section
4237 As well as being defined in with the code that uses them, constants
4238 can be declared at the top level. These have full-file scope, so they
4239 are always `InScope`. The value of a top level constant can be given
4240 as an expression, and this is evaluated immediately rather than in the
4241 later interpretation stage. Once we add functions to the language, we
4242 will need rules concern which, if any, can be used to define a top
4245 Constants are defined in a section that starts with the reserved word
4246 `const` and then has a block with a list of assignment statements.
4247 For syntactic consistency, these must use the double-colon syntax to
4248 make it clear that they are constants. Type can also be given: if
4249 not, the type will be determined during analysis, as with other
4252 As the types constants are inserted at the head of a list, printing
4253 them in the same order that they were read is not straight forward.
4254 We take a quadratic approach here and count the number of constants
4255 (variables of depth 0), then count down from there, each time
4256 searching through for the Nth constant for decreasing N.
4258 ###### top level grammar
4262 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4263 | const { SimpleConstList } Newlines
4264 | const IN OptNL ConstList OUT Newlines
4265 | const SimpleConstList Newlines
4267 ConstList -> ConstList SimpleConstLine
4269 SimpleConstList -> SimpleConstList ; Const
4272 SimpleConstLine -> SimpleConstList Newlines
4273 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4276 CType -> Type ${ $0 = $<1; }$
4279 Const -> IDENTIFIER :: CType = Expression ${ {
4283 v = var_decl(c, $1.txt);
4285 struct var *var = new_pos(var, $1);
4286 v->where_decl = var;
4291 v = var_ref(c, $1.txt);
4292 tok_err(c, "error: name already declared", &$1);
4293 type_err(c, "info: this is where '%v' was first declared",
4294 v->where_decl, NULL, 0, NULL);
4298 propagate_types($5, c, &ok, $3, 0);
4303 struct value res = interp_exec(c, $5, &v->type);
4304 global_alloc(c, v->type, v, &res);
4308 ###### print const decls
4313 while (target != 0) {
4315 for (v = context.in_scope; v; v=v->in_scope)
4316 if (v->depth == 0) {
4327 struct value *val = var_value(&context, v);
4328 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4329 type_print(v->type, stdout);
4331 if (v->type == Tstr)
4333 print_value(v->type, val);
4334 if (v->type == Tstr)
4342 ### Finally the whole `main` function.
4344 An Ocean program can currently have only one function - `main` - and
4345 that must exist. It expects an array of strings with a provided size.
4346 Following this is a `block` which is the code to execute.
4348 As this is the top level, several things are handled a bit
4350 The function is not interpreted by `interp_exec` as that isn't
4351 passed the argument list which the program requires. Similarly type
4352 analysis is a bit more interesting at this level.
4354 ###### top level grammar
4356 DeclareFunction -> MainFunction ${ {
4358 type_err(c, "\"main\" defined a second time",
4364 ###### print binode cases
4367 do_indent(indent, "func main(");
4368 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4369 struct variable *v = cast(var, b2->left)->var;
4371 print_exec(b2->left, 0, 0);
4373 type_print(v->type, stdout);
4379 print_exec(b->right, indent+1, bracket);
4381 do_indent(indent, "}\n");
4384 ###### propagate binode cases
4386 case Func: abort(); // NOTEST
4388 ###### core functions
4390 static int analyse_prog(struct exec *prog, struct parse_context *c)
4392 struct binode *bp = cast(binode, prog);
4396 struct type *argv_type;
4397 struct text argv_type_name = { " argv", 5 };
4402 argv_type = add_type(c, argv_type_name, &array_prototype);
4403 argv_type->array.member = Tstr;
4404 argv_type->array.unspec = 1;
4406 for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
4410 propagate_types(b->left, c, &ok, argv_type, 0);
4412 default: /* invalid */ // NOTEST
4413 propagate_types(b->left, c, &ok, Tnone, 0); // NOTEST
4419 propagate_types(bp->right, c, &ok, Tnone, 0);
4424 /* Make sure everything is still consistent */
4425 propagate_types(bp->right, c, &ok, Tnone, 0);
4432 static void interp_prog(struct parse_context *c, struct exec *prog,
4433 int argc, char **argv)
4435 struct binode *p = cast(binode, prog);
4443 al = cast(binode, p->left);
4445 struct var *v = cast(var, al->left);
4446 struct value *vl = var_value(c, v->var);
4456 mpq_set_ui(argcq, argc, 1);
4457 memcpy(var_value(c, t->array.vsize), &argcq, sizeof(argcq));
4458 t->prepare_type(c, t, 0);
4459 array_init(v->var->type, vl);
4460 for (i = 0; i < argc; i++) {
4461 struct value *vl2 = vl->array + i * v->var->type->array.member->size;
4464 arg.str.txt = argv[i];
4465 arg.str.len = strlen(argv[i]);
4466 free_value(Tstr, vl2);
4467 dup_value(Tstr, &arg, vl2);
4471 al = cast(binode, al->right);
4473 v = interp_exec(c, p->right, &vtype);
4474 free_value(vtype, &v);
4477 ###### interp binode cases
4479 case Func: abort(); // NOTEST
4481 ## And now to test it out.
4483 Having a language requires having a "hello world" program. I'll
4484 provide a little more than that: a program that prints "Hello world"
4485 finds the GCD of two numbers, prints the first few elements of
4486 Fibonacci, performs a binary search for a number, and a few other
4487 things which will likely grow as the languages grows.
4489 ###### File: oceani.mk
4492 @echo "===== DEMO ====="
4493 ./oceani --section "demo: hello" oceani.mdc 55 33
4499 four ::= 2 + 2 ; five ::= 10/2
4500 const pie ::= "I like Pie";
4501 cake ::= "The cake is"
4512 print "Hello World, what lovely oceans you have!"
4513 print "Are there", five, "?"
4514 print pi, pie, "but", cake
4516 A := $argv[1]; B := $argv[2]
4518 /* When a variable is defined in both branches of an 'if',
4519 * and used afterwards, the variables are merged.
4525 print "Is", A, "bigger than", B,"? ", bigger
4526 /* If a variable is not used after the 'if', no
4527 * merge happens, so types can be different
4530 double:string = "yes"
4531 print A, "is more than twice", B, "?", double
4534 print "double", B, "is", double
4539 if a > 0 and then b > 0:
4545 print "GCD of", A, "and", B,"is", a
4547 print a, "is not positive, cannot calculate GCD"
4549 print b, "is not positive, cannot calculate GCD"
4554 print "Fibonacci:", f1,f2,
4555 then togo = togo - 1
4563 /* Binary search... */
4568 mid := (lo + hi) / 2
4580 print "Yay, I found", target
4582 print "Closest I found was", mid
4587 // "middle square" PRNG. Not particularly good, but one my
4588 // Dad taught me - the first one I ever heard of.
4589 for i:=1; then i = i + 1; while i < size:
4590 n := list[i-1] * list[i-1]
4591 list[i] = (n / 100) % 10 000
4593 print "Before sort:",
4594 for i:=0; then i = i + 1; while i < size:
4598 for i := 1; then i=i+1; while i < size:
4599 for j:=i-1; then j=j-1; while j >= 0:
4600 if list[j] > list[j+1]:
4604 print " After sort:",
4605 for i:=0; then i = i + 1; while i < size:
4609 if 1 == 2 then print "yes"; else print "no"
4613 bob.alive = (bob.name == "Hello")
4614 print "bob", "is" if bob.alive else "isn't", "alive"