1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 The "func" clause currently only allows a "main" function to be
45 declared. That will be extended when proper function support is added.
47 An element that is present purely to make a usable language, and
48 without any expectation that they will remain, is the "print" statement
49 which performs simple output.
51 The current scalar types are "number", "Boolean", and "string".
52 Boolean will likely stay in its current form, the other two might, but
53 could just as easily be changed.
57 Versions of the interpreter which obviously do not support a complete
58 language will be named after creeks and streams. This one is Jamison
61 Once we have something reasonably resembling a complete language, the
62 names of rivers will be used.
63 Early versions of the compiler will be named after seas. Major
64 releases of the compiler will be named after oceans. Hopefully I will
65 be finished once I get to the Pacific Ocean release.
69 As well as parsing and executing a program, the interpreter can print
70 out the program from the parsed internal structure. This is useful
71 for validating the parsing.
72 So the main requirements of the interpreter are:
74 - Parse the program, possibly with tracing,
75 - Analyse the parsed program to ensure consistency,
77 - Execute the "main" function in the program, if no parsing or
78 consistency errors were found.
80 This is all performed by a single C program extracted with
83 There will be two formats for printing the program: a default and one
84 that uses bracketing. So a `--bracket` command line option is needed
85 for that. Normally the first code section found is used, however an
86 alternate section can be requested so that a file (such as this one)
87 can contain multiple programs. This is effected with the `--section`
90 This code must be compiled with `-fplan9-extensions` so that anonymous
91 structures can be used.
93 ###### File: oceani.mk
95 myCFLAGS := -Wall -g -fplan9-extensions
96 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
97 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
98 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
100 all :: $(LDLIBS) oceani
101 oceani.c oceani.h : oceani.mdc parsergen
102 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
103 oceani.mk: oceani.mdc md2c
106 oceani: oceani.o $(LDLIBS)
107 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
109 ###### Parser: header
111 struct parse_context;
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 ###### Parser: reduce
131 struct parse_context *c = config2context(config);
139 #include <sys/mman.h>
158 static char Usage[] =
159 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
160 static const struct option long_options[] = {
161 {"trace", 0, NULL, 't'},
162 {"print", 0, NULL, 'p'},
163 {"noexec", 0, NULL, 'n'},
164 {"brackets", 0, NULL, 'b'},
165 {"section", 1, NULL, 's'},
168 const char *options = "tpnbs";
170 static void pr_err(char *msg) // NOTEST
172 fprintf(stderr, "%s\n", msg); // NOTEST
175 int main(int argc, char *argv[])
180 struct section *s, *ss;
181 char *section = NULL;
182 struct parse_context context = {
184 .ignored = (1 << TK_mark),
185 .number_chars = ".,_+- ",
190 int doprint=0, dotrace=0, doexec=1, brackets=0;
192 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
195 case 't': dotrace=1; break;
196 case 'p': doprint=1; break;
197 case 'n': doexec=0; break;
198 case 'b': brackets=1; break;
199 case 's': section = optarg; break;
200 default: fprintf(stderr, Usage);
204 if (optind >= argc) {
205 fprintf(stderr, "oceani: no input file given\n");
208 fd = open(argv[optind], O_RDONLY);
210 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
213 context.file_name = argv[optind];
214 len = lseek(fd, 0, 2);
215 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
216 s = code_extract(file, file+len, pr_err);
218 fprintf(stderr, "oceani: could not find any code in %s\n",
223 ## context initialization
226 for (ss = s; ss; ss = ss->next) {
227 struct text sec = ss->section;
228 if (sec.len == strlen(section) &&
229 strncmp(sec.txt, section, sec.len) == 0)
233 fprintf(stderr, "oceani: cannot find section %s\n",
239 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
242 fprintf(stderr, "oceani: no main function found.\n");
243 context.parse_error = 1;
245 if (context.prog && !context.parse_error) {
246 if (!analyse_prog(context.prog, &context)) {
247 fprintf(stderr, "oceani: type error in program - not running.\n");
248 context.parse_error = 1;
251 if (context.prog && doprint) {
254 print_exec(context.prog, 0, brackets);
256 if (context.prog && doexec && !context.parse_error)
257 interp_prog(&context, context.prog, argc - optind, argv+optind);
258 free_exec(context.prog);
261 struct section *t = s->next;
267 ## free context types
268 ## free context storage
269 exit(context.parse_error ? 1 : 0);
274 The four requirements of parse, analyse, print, interpret apply to
275 each language element individually so that is how most of the code
278 Three of the four are fairly self explanatory. The one that requires
279 a little explanation is the analysis step.
281 The current language design does not require the types of variables to
282 be declared, but they must still have a single type. Different
283 operations impose different requirements on the variables, for example
284 addition requires both arguments to be numeric, and assignment
285 requires the variable on the left to have the same type as the
286 expression on the right.
288 Analysis involves propagating these type requirements around and
289 consequently setting the type of each variable. If any requirements
290 are violated (e.g. a string is compared with a number) or if a
291 variable needs to have two different types, then an error is raised
292 and the program will not run.
294 If the same variable is declared in both branchs of an 'if/else', or
295 in all cases of a 'switch' then the multiple instances may be merged
296 into just one variable if the variable is referenced after the
297 conditional statement. When this happens, the types must naturally be
298 consistent across all the branches. When the variable is not used
299 outside the if, the variables in the different branches are distinct
300 and can be of different types.
302 Undeclared names may only appear in "use" statements and "case" expressions.
303 These names are given a type of "label" and a unique value.
304 This allows them to fill the role of a name in an enumerated type, which
305 is useful for testing the `switch` statement.
307 As we will see, the condition part of a `while` statement can return
308 either a Boolean or some other type. This requires that the expected
309 type that gets passed around comprises a type and a flag to indicate
310 that `Tbool` is also permitted.
312 As there are, as yet, no distinct types that are compatible, there
313 isn't much subtlety in the analysis. When we have distinct number
314 types, this will become more interesting.
318 When analysis discovers an inconsistency it needs to report an error;
319 just refusing to run the code ensures that the error doesn't cascade,
320 but by itself it isn't very useful. A clear understanding of the sort
321 of error message that are useful will help guide the process of
324 At a simplistic level, the only sort of error that type analysis can
325 report is that the type of some construct doesn't match a contextual
326 requirement. For example, in `4 + "hello"` the addition provides a
327 contextual requirement for numbers, but `"hello"` is not a number. In
328 this particular example no further information is needed as the types
329 are obvious from local information. When a variable is involved that
330 isn't the case. It may be helpful to explain why the variable has a
331 particular type, by indicating the location where the type was set,
332 whether by declaration or usage.
334 Using a recursive-descent analysis we can easily detect a problem at
335 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
336 will detect that one argument is not a number and the usage of `hello`
337 will detect that a number was wanted, but not provided. In this
338 (early) version of the language, we will generate error reports at
339 multiple locations, so the use of `hello` will report an error and
340 explain were the value was set, and the addition will report an error
341 and say why numbers are needed. To be able to report locations for
342 errors, each language element will need to record a file location
343 (line and column) and each variable will need to record the language
344 element where its type was set. For now we will assume that each line
345 of an error message indicates one location in the file, and up to 2
346 types. So we provide a `printf`-like function which takes a format, a
347 location (a `struct exec` which has not yet been introduced), and 2
348 types. "`%1`" reports the first type, "`%2`" reports the second. We
349 will need a function to print the location, once we know how that is
350 stored. e As will be explained later, there are sometimes extra rules for
351 type matching and they might affect error messages, we need to pass those
354 As well as type errors, we sometimes need to report problems with
355 tokens, which might be unexpected or might name a type that has not
356 been defined. For these we have `tok_err()` which reports an error
357 with a given token. Each of the error functions sets the flag in the
358 context so indicate that parsing failed.
362 static void fput_loc(struct exec *loc, FILE *f);
364 ###### core functions
366 static void type_err(struct parse_context *c,
367 char *fmt, struct exec *loc,
368 struct type *t1, int rules, struct type *t2)
370 fprintf(stderr, "%s:", c->file_name);
371 fput_loc(loc, stderr);
372 for (; *fmt ; fmt++) {
379 case '%': fputc(*fmt, stderr); break; // NOTEST
380 default: fputc('?', stderr); break; // NOTEST
382 type_print(t1, stderr);
385 type_print(t2, stderr);
394 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
396 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
397 t->txt.len, t->txt.txt);
401 ## Entities: declared and predeclared.
403 There are various "things" that the language and/or the interpreter
404 needs to know about to parse and execute a program. These include
405 types, variables, values, and executable code. These are all lumped
406 together under the term "entities" (calling them "objects" would be
407 confusing) and introduced here. The following section will present the
408 different specific code elements which comprise or manipulate these
413 Values come in a wide range of types, with more likely to be added.
414 Each type needs to be able to print its own values (for convenience at
415 least) as well as to compare two values, at least for equality and
416 possibly for order. For now, values might need to be duplicated and
417 freed, though eventually such manipulations will be better integrated
420 Rather than requiring every numeric type to support all numeric
421 operations (add, multiple, etc), we allow types to be able to present
422 as one of a few standard types: integer, float, and fraction. The
423 existence of these conversion functions eventually enable types to
424 determine if they are compatible with other types, though such types
425 have not yet been implemented.
427 Named type are stored in a simple linked list. Objects of each type are
428 "values" which are often passed around by value.
435 ## value union fields
443 void (*init)(struct type *type, struct value *val);
444 void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
445 void (*print)(struct type *type, struct value *val);
446 void (*print_type)(struct type *type, FILE *f);
447 int (*cmp_order)(struct type *t1, struct type *t2,
448 struct value *v1, struct value *v2);
449 int (*cmp_eq)(struct type *t1, struct type *t2,
450 struct value *v1, struct value *v2);
451 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
452 void (*free)(struct type *type, struct value *val);
453 void (*free_type)(struct type *t);
454 long long (*to_int)(struct value *v);
455 double (*to_float)(struct value *v);
456 int (*to_mpq)(mpq_t *q, struct value *v);
465 struct type *typelist;
469 static struct type *find_type(struct parse_context *c, struct text s)
471 struct type *l = c->typelist;
474 text_cmp(l->name, s) != 0)
479 static struct type *add_type(struct parse_context *c, struct text s,
484 n = calloc(1, sizeof(*n));
487 n->next = c->typelist;
492 static void free_type(struct type *t)
494 /* The type is always a reference to something in the
495 * context, so we don't need to free anything.
499 static void free_value(struct type *type, struct value *v)
503 memset(v, 0x5a, type->size);
507 static void type_print(struct type *type, FILE *f)
510 fputs("*unknown*type*", f); // NOTEST
511 else if (type->name.len)
512 fprintf(f, "%.*s", type->name.len, type->name.txt);
513 else if (type->print_type)
514 type->print_type(type, f);
516 fputs("*invalid*type*", f); // NOTEST
519 static void val_init(struct type *type, struct value *val)
521 if (type && type->init)
522 type->init(type, val);
525 static void dup_value(struct type *type,
526 struct value *vold, struct value *vnew)
528 if (type && type->dup)
529 type->dup(type, vold, vnew);
532 static int value_cmp(struct type *tl, struct type *tr,
533 struct value *left, struct value *right)
535 if (tl && tl->cmp_order)
536 return tl->cmp_order(tl, tr, left, right);
537 if (tl && tl->cmp_eq) // NOTEST
538 return tl->cmp_eq(tl, tr, left, right); // NOTEST
542 static void print_value(struct type *type, struct value *v)
544 if (type && type->print)
545 type->print(type, v);
547 printf("*Unknown*"); // NOTEST
552 static void free_value(struct type *type, struct value *v);
553 static int type_compat(struct type *require, struct type *have, int rules);
554 static void type_print(struct type *type, FILE *f);
555 static void val_init(struct type *type, struct value *v);
556 static void dup_value(struct type *type,
557 struct value *vold, struct value *vnew);
558 static int value_cmp(struct type *tl, struct type *tr,
559 struct value *left, struct value *right);
560 static void print_value(struct type *type, struct value *v);
562 ###### free context types
564 while (context.typelist) {
565 struct type *t = context.typelist;
567 context.typelist = t->next;
573 Type can be specified for local variables, for fields in a structure,
574 for formal parameters to functions, and possibly elsewhere. Different
575 rules may apply in different contexts. As a minimum, a named type may
576 always be used. Currently the type of a formal parameter can be
577 different from types in other contexts, so we have a separate grammar
583 Type -> IDENTIFIER ${
584 $0 = find_type(c, $1.txt);
587 "error: undefined type", &$1);
594 FormalType -> Type ${ $0 = $<1; }$
595 ## formal type grammar
599 Values of the base types can be numbers, which we represent as
600 multi-precision fractions, strings, Booleans and labels. When
601 analysing the program we also need to allow for places where no value
602 is meaningful (type `Tnone`) and where we don't know what type to
603 expect yet (type is `NULL`).
605 Values are never shared, they are always copied when used, and freed
606 when no longer needed.
608 When propagating type information around the program, we need to
609 determine if two types are compatible, where type `NULL` is compatible
610 with anything. There are two special cases with type compatibility,
611 both related to the Conditional Statement which will be described
612 later. In some cases a Boolean can be accepted as well as some other
613 primary type, and in others any type is acceptable except a label (`Vlabel`).
614 A separate function encoding these cases will simplify some code later.
616 ###### type functions
618 int (*compat)(struct type *this, struct type *other);
622 static int type_compat(struct type *require, struct type *have, int rules)
624 if ((rules & Rboolok) && have == Tbool)
626 if ((rules & Rnolabel) && have == Tlabel)
628 if (!require || !have)
632 return require->compat(require, have);
634 return require == have;
639 #include "parse_string.h"
640 #include "parse_number.h"
643 myLDLIBS := libnumber.o libstring.o -lgmp
644 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
646 ###### type union fields
647 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
649 ###### value union fields
656 static void _free_value(struct type *type, struct value *v)
660 switch (type->vtype) {
662 case Vstr: free(v->str.txt); break;
663 case Vnum: mpq_clear(v->num); break;
669 ###### value functions
671 static void _val_init(struct type *type, struct value *val)
673 switch(type->vtype) {
674 case Vnone: // NOTEST
677 mpq_init(val->num); break;
679 val->str.txt = malloc(1);
691 static void _dup_value(struct type *type,
692 struct value *vold, struct value *vnew)
694 switch (type->vtype) {
695 case Vnone: // NOTEST
698 vnew->label = vold->label;
701 vnew->bool = vold->bool;
705 mpq_set(vnew->num, vold->num);
708 vnew->str.len = vold->str.len;
709 vnew->str.txt = malloc(vnew->str.len);
710 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
715 static int _value_cmp(struct type *tl, struct type *tr,
716 struct value *left, struct value *right)
720 return tl - tr; // NOTEST
722 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
723 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
724 case Vstr: cmp = text_cmp(left->str, right->str); break;
725 case Vbool: cmp = left->bool - right->bool; break;
726 case Vnone: cmp = 0; // NOTEST
731 static void _print_value(struct type *type, struct value *v)
733 switch (type->vtype) {
734 case Vnone: // NOTEST
735 printf("*no-value*"); break; // NOTEST
736 case Vlabel: // NOTEST
737 printf("*label-%p*", v->label); break; // NOTEST
739 printf("%.*s", v->str.len, v->str.txt); break;
741 printf("%s", v->bool ? "True":"False"); break;
746 mpf_set_q(fl, v->num);
747 gmp_printf("%Fg", fl);
754 static void _free_value(struct type *type, struct value *v);
756 static struct type base_prototype = {
758 .print = _print_value,
759 .cmp_order = _value_cmp,
760 .cmp_eq = _value_cmp,
765 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
768 static struct type *add_base_type(struct parse_context *c, char *n,
769 enum vtype vt, int size)
771 struct text txt = { n, strlen(n) };
774 t = add_type(c, txt, &base_prototype);
777 t->align = size > sizeof(void*) ? sizeof(void*) : size;
778 if (t->size & (t->align - 1))
779 t->size = (t->size | (t->align - 1)) + 1; // NOTEST
783 ###### context initialization
785 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
786 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
787 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
788 Tnone = add_base_type(&context, "none", Vnone, 0);
789 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
793 Variables are scoped named values. We store the names in a linked list
794 of "bindings" sorted in lexical order, and use sequential search and
801 struct binding *next; // in lexical order
805 This linked list is stored in the parse context so that "reduce"
806 functions can find or add variables, and so the analysis phase can
807 ensure that every variable gets a type.
811 struct binding *varlist; // In lexical order
815 static struct binding *find_binding(struct parse_context *c, struct text s)
817 struct binding **l = &c->varlist;
822 (cmp = text_cmp((*l)->name, s)) < 0)
826 n = calloc(1, sizeof(*n));
833 Each name can be linked to multiple variables defined in different
834 scopes. Each scope starts where the name is declared and continues
835 until the end of the containing code block. Scopes of a given name
836 cannot nest, so a declaration while a name is in-scope is an error.
838 ###### binding fields
839 struct variable *var;
843 struct variable *previous;
845 struct binding *name;
846 struct exec *where_decl;// where name was declared
847 struct exec *where_set; // where type was set
851 When a scope closes, the values of the variables might need to be freed.
852 This happens in the context of some `struct exec` and each `exec` will
853 need to know which variables need to be freed when it completes.
856 struct variable *to_free;
858 ####### variable fields
859 struct exec *cleanup_exec;
860 struct variable *next_free;
862 ####### interp exec cleanup
865 for (v = e->to_free; v; v = v->next_free) {
866 struct value *val = var_value(c, v);
867 free_value(v->type, val);
872 static void variable_unlink_exec(struct variable *v)
874 struct variable **vp;
875 if (!v->cleanup_exec)
877 for (vp = &v->cleanup_exec->to_free;
878 *vp; vp = &(*vp)->next_free) {
882 v->cleanup_exec = NULL;
887 While the naming seems strange, we include local constants in the
888 definition of variables. A name declared `var := value` can
889 subsequently be changed, but a name declared `var ::= value` cannot -
892 ###### variable fields
895 Scopes in parallel branches can be partially merged. More
896 specifically, if a given name is declared in both branches of an
897 if/else then its scope is a candidate for merging. Similarly if
898 every branch of an exhaustive switch (e.g. has an "else" clause)
899 declares a given name, then the scopes from the branches are
900 candidates for merging.
902 Note that names declared inside a loop (which is only parallel to
903 itself) are never visible after the loop. Similarly names defined in
904 scopes which are not parallel, such as those started by `for` and
905 `switch`, are never visible after the scope. Only variables defined in
906 both `then` and `else` (including the implicit then after an `if`, and
907 excluding `then` used with `for`) and in all `case`s and `else` of a
908 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
910 Labels, which are a bit like variables, follow different rules.
911 Labels are not explicitly declared, but if an undeclared name appears
912 in a context where a label is legal, that effectively declares the
913 name as a label. The declaration remains in force (or in scope) at
914 least to the end of the immediately containing block and conditionally
915 in any larger containing block which does not declare the name in some
916 other way. Importantly, the conditional scope extension happens even
917 if the label is only used in one parallel branch of a conditional --
918 when used in one branch it is treated as having been declared in all
921 Merge candidates are tentatively visible beyond the end of the
922 branching statement which creates them. If the name is used, the
923 merge is affirmed and they become a single variable visible at the
924 outer layer. If not - if it is redeclared first - the merge lapses.
926 To track scopes we have an extra stack, implemented as a linked list,
927 which roughly parallels the parse stack and which is used exclusively
928 for scoping. When a new scope is opened, a new frame is pushed and
929 the child-count of the parent frame is incremented. This child-count
930 is used to distinguish between the first of a set of parallel scopes,
931 in which declared variables must not be in scope, and subsequent
932 branches, whether they may already be conditionally scoped.
934 To push a new frame *before* any code in the frame is parsed, we need a
935 grammar reduction. This is most easily achieved with a grammar
936 element which derives the empty string, and creates the new scope when
937 it is recognised. This can be placed, for example, between a keyword
938 like "if" and the code following it.
942 struct scope *parent;
948 struct scope *scope_stack;
951 static void scope_pop(struct parse_context *c)
953 struct scope *s = c->scope_stack;
955 c->scope_stack = s->parent;
960 static void scope_push(struct parse_context *c)
962 struct scope *s = calloc(1, sizeof(*s));
964 c->scope_stack->child_count += 1;
965 s->parent = c->scope_stack;
973 OpenScope -> ${ scope_push(c); }$
975 Each variable records a scope depth and is in one of four states:
977 - "in scope". This is the case between the declaration of the
978 variable and the end of the containing block, and also between
979 the usage with affirms a merge and the end of that block.
981 The scope depth is not greater than the current parse context scope
982 nest depth. When the block of that depth closes, the state will
983 change. To achieve this, all "in scope" variables are linked
984 together as a stack in nesting order.
986 - "pending". The "in scope" block has closed, but other parallel
987 scopes are still being processed. So far, every parallel block at
988 the same level that has closed has declared the name.
990 The scope depth is the depth of the last parallel block that
991 enclosed the declaration, and that has closed.
993 - "conditionally in scope". The "in scope" block and all parallel
994 scopes have closed, and no further mention of the name has been seen.
995 This state includes a secondary nest depth (`min_depth`) which records
996 the outermost scope seen since the variable became conditionally in
997 scope. If a use of the name is found, the variable becomes "in scope"
998 and that secondary depth becomes the recorded scope depth. If the
999 name is declared as a new variable, the old variable becomes "out of
1000 scope" and the recorded scope depth stays unchanged.
1002 - "out of scope". The variable is neither in scope nor conditionally
1003 in scope. It is permanently out of scope now and can be removed from
1004 the "in scope" stack.
1006 ###### variable fields
1007 int depth, min_depth;
1008 enum { OutScope, PendingScope, CondScope, InScope } scope;
1009 struct variable *in_scope;
1011 ###### parse context
1013 struct variable *in_scope;
1015 All variables with the same name are linked together using the
1016 'previous' link. Those variable that have been affirmatively merged all
1017 have a 'merged' pointer that points to one primary variable - the most
1018 recently declared instance. When merging variables, we need to also
1019 adjust the 'merged' pointer on any other variables that had previously
1020 been merged with the one that will no longer be primary.
1022 A variable that is no longer the most recent instance of a name may
1023 still have "pending" scope, if it might still be merged with most
1024 recent instance. These variables don't really belong in the
1025 "in_scope" list, but are not immediately removed when a new instance
1026 is found. Instead, they are detected and ignored when considering the
1027 list of in_scope names.
1029 The storage of the value of a variable will be described later. For now
1030 we just need to know that when a variable goes out of scope, it might
1031 need to be freed. For this we need to be able to find it, so assume that
1032 `var_value()` will provide that.
1034 ###### variable fields
1035 struct variable *merged;
1037 ###### ast functions
1039 static void variable_merge(struct variable *primary, struct variable *secondary)
1043 primary = primary->merged;
1045 for (v = primary->previous; v; v=v->previous)
1046 if (v == secondary || v == secondary->merged ||
1047 v->merged == secondary ||
1048 v->merged == secondary->merged) {
1049 v->scope = OutScope;
1050 v->merged = primary;
1051 variable_unlink_exec(v);
1055 ###### forward decls
1056 static struct value *var_value(struct parse_context *c, struct variable *v);
1058 ###### free global vars
1060 while (context.varlist) {
1061 struct binding *b = context.varlist;
1062 struct variable *v = b->var;
1063 context.varlist = b->next;
1066 struct variable *t = v;
1070 free_value(t->type, var_value(&context, t));
1072 free_exec(t->where_decl);
1078 #### Manipulating Bindings
1080 When a name is conditionally visible, a new declaration discards the
1081 old binding - the condition lapses. Conversely a usage of the name
1082 affirms the visibility and extends it to the end of the containing
1083 block - i.e. the block that contains both the original declaration and
1084 the latest usage. This is determined from `min_depth`. When a
1085 conditionally visible variable gets affirmed like this, it is also
1086 merged with other conditionally visible variables with the same name.
1088 When we parse a variable declaration we either report an error if the
1089 name is currently bound, or create a new variable at the current nest
1090 depth if the name is unbound or bound to a conditionally scoped or
1091 pending-scope variable. If the previous variable was conditionally
1092 scoped, it and its homonyms becomes out-of-scope.
1094 When we parse a variable reference (including non-declarative assignment
1095 "foo = bar") we report an error if the name is not bound or is bound to
1096 a pending-scope variable; update the scope if the name is bound to a
1097 conditionally scoped variable; or just proceed normally if the named
1098 variable is in scope.
1100 When we exit a scope, any variables bound at this level are either
1101 marked out of scope or pending-scoped, depending on whether the scope
1102 was sequential or parallel. Here a "parallel" scope means the "then"
1103 or "else" part of a conditional, or any "case" or "else" branch of a
1104 switch. Other scopes are "sequential".
1106 When exiting a parallel scope we check if there are any variables that
1107 were previously pending and are still visible. If there are, then
1108 they weren't redeclared in the most recent scope, so they cannot be
1109 merged and must become out-of-scope. If it is not the first of
1110 parallel scopes (based on `child_count`), we check that there was a
1111 previous binding that is still pending-scope. If there isn't, the new
1112 variable must now be out-of-scope.
1114 When exiting a sequential scope that immediately enclosed parallel
1115 scopes, we need to resolve any pending-scope variables. If there was
1116 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1117 we need to mark all pending-scope variable as out-of-scope. Otherwise
1118 all pending-scope variables become conditionally scoped.
1121 enum closetype { CloseSequential, CloseParallel, CloseElse };
1123 ###### ast functions
1125 static struct variable *var_decl(struct parse_context *c, struct text s)
1127 struct binding *b = find_binding(c, s);
1128 struct variable *v = b->var;
1130 switch (v ? v->scope : OutScope) {
1132 /* Caller will report the error */
1136 v && v->scope == CondScope;
1138 v->scope = OutScope;
1142 v = calloc(1, sizeof(*v));
1143 v->previous = b->var;
1147 v->min_depth = v->depth = c->scope_depth;
1149 v->in_scope = c->in_scope;
1154 static struct variable *var_ref(struct parse_context *c, struct text s)
1156 struct binding *b = find_binding(c, s);
1157 struct variable *v = b->var;
1158 struct variable *v2;
1160 switch (v ? v->scope : OutScope) {
1163 /* Caller will report the error */
1166 /* All CondScope variables of this name need to be merged
1167 * and become InScope
1169 v->depth = v->min_depth;
1171 for (v2 = v->previous;
1172 v2 && v2->scope == CondScope;
1174 variable_merge(v, v2);
1182 static void var_block_close(struct parse_context *c, enum closetype ct,
1185 /* Close off all variables that are in_scope.
1186 * Some variables in c->scope may already be not-in-scope,
1187 * such as when a PendingScope variable is hidden by a new
1188 * variable with the same name.
1189 * So we check for v->name->var != v and drop them.
1190 * If we choose to make a variable OutScope, we drop it
1193 struct variable *v, **vp, *v2;
1196 for (vp = &c->in_scope;
1197 (v = *vp) && v->min_depth > c->scope_depth;
1198 (v->scope == OutScope || v->name->var != v)
1199 ? (*vp = v->in_scope, 0)
1200 : ( vp = &v->in_scope, 0)) {
1201 v->min_depth = c->scope_depth;
1202 if (v->name->var != v)
1203 /* This is still in scope, but we haven't just
1207 v->min_depth = c->scope_depth;
1208 if (v->scope == InScope) {
1209 /* This variable gets cleaned up when 'e' finishes */
1210 variable_unlink_exec(v);
1211 v->cleanup_exec = e;
1212 v->next_free = e->to_free;
1217 case CloseParallel: /* handle PendingScope */
1221 if (c->scope_stack->child_count == 1)
1222 /* first among parallel branches */
1223 v->scope = PendingScope;
1224 else if (v->previous &&
1225 v->previous->scope == PendingScope)
1226 /* all previous branches used name */
1227 v->scope = PendingScope;
1228 else if (v->type == Tlabel)
1229 /* Labels remain pending even when not used */
1230 v->scope = PendingScope; // UNTESTED
1232 v->scope = OutScope;
1233 if (ct == CloseElse) {
1234 /* All Pending variables with this name
1235 * are now Conditional */
1237 v2 && v2->scope == PendingScope;
1239 v2->scope = CondScope;
1243 /* Not possible as it would require
1244 * parallel scope to be nested immediately
1245 * in a parallel scope, and that never
1249 /* Not possible as we already tested for
1255 case CloseSequential:
1256 if (v->type == Tlabel)
1257 v->scope = PendingScope;
1260 v->scope = OutScope;
1263 /* There was no 'else', so we can only become
1264 * conditional if we know the cases were exhaustive,
1265 * and that doesn't mean anything yet.
1266 * So only labels become conditional..
1269 v2 && v2->scope == PendingScope;
1271 if (v2->type == Tlabel)
1272 v2->scope = CondScope;
1274 v2->scope = OutScope;
1277 case OutScope: break;
1286 The value of a variable is store separately from the variable, on an
1287 analogue of a stack frame. There are (currently) two frames that can be
1288 active. A global frame which currently only stores constants, and a
1289 stacked frame which stores local variables. Each variable knows if it
1290 is global or not, and what its index into the frame is.
1292 Values in the global frame are known immediately they are relevant, so
1293 the frame needs to be reallocated as it grows so it can store those
1294 values. The local frame doesn't get values until the interpreted phase
1295 is started, so there is no need to allocate until the size is known.
1297 ###### variable fields
1301 ###### parse context
1303 short global_size, global_alloc;
1305 void *global, *local;
1307 ###### ast functions
1309 static struct value *var_value(struct parse_context *c, struct variable *v)
1312 if (!c->local || !v->type)
1313 return NULL; // NOTEST
1314 if (v->frame_pos + v->type->size > c->local_size) {
1315 printf("INVALID frame_pos\n"); // NOTEST
1318 return c->local + v->frame_pos;
1320 if (c->global_size > c->global_alloc) {
1321 int old = c->global_alloc;
1322 c->global_alloc = (c->global_size | 1023) + 1024;
1323 c->global = realloc(c->global, c->global_alloc);
1324 memset(c->global + old, 0, c->global_alloc - old);
1326 return c->global + v->frame_pos;
1329 static struct value *global_alloc(struct parse_context *c, struct type *t,
1330 struct variable *v, struct value *init)
1333 struct variable scratch;
1335 if (t->prepare_type)
1336 t->prepare_type(c, t, 1); // NOTEST
1338 if (c->global_size & (t->align - 1))
1339 c->global_size = (c->global_size + t->align) & ~(t->align-1); // UNTESTED
1344 v->frame_pos = c->global_size;
1346 c->global_size += v->type->size;
1347 ret = var_value(c, v);
1349 memcpy(ret, init, t->size);
1355 As global values are found -- struct field initializers, labels etc --
1356 `global_alloc()` is called to record the value in the global frame.
1358 When the program is fully parsed, we need to walk the list of variables
1359 to find any that weren't merged away and that aren't global, and to
1360 calculate the frame size and assign a frame position for each variable.
1361 For this we have `scope_finalize()`.
1363 ###### ast functions
1365 static void scope_finalize(struct parse_context *c)
1369 for (b = c->varlist; b; b = b->next) {
1371 for (v = b->var; v; v = v->previous) {
1372 struct type *t = v->type;
1377 if (c->local_size & (t->align - 1))
1378 c->local_size = (c->local_size + t->align) & ~(t->align-1);
1379 v->frame_pos = c->local_size;
1380 c->local_size += v->type->size;
1383 c->local = calloc(1, c->local_size);
1386 ###### free context storage
1387 free(context.global);
1388 free(context.local);
1392 Executables can be lots of different things. In many cases an
1393 executable is just an operation combined with one or two other
1394 executables. This allows for expressions and lists etc. Other times an
1395 executable is something quite specific like a constant or variable name.
1396 So we define a `struct exec` to be a general executable with a type, and
1397 a `struct binode` which is a subclass of `exec`, forms a node in a
1398 binary tree, and holds an operation. There will be other subclasses,
1399 and to access these we need to be able to `cast` the `exec` into the
1400 various other types. The first field in any `struct exec` is the type
1401 from the `exec_types` enum.
1404 #define cast(structname, pointer) ({ \
1405 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1406 if (__mptr && *__mptr != X##structname) abort(); \
1407 (struct structname *)( (char *)__mptr);})
1409 #define new(structname) ({ \
1410 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1411 __ptr->type = X##structname; \
1412 __ptr->line = -1; __ptr->column = -1; \
1415 #define new_pos(structname, token) ({ \
1416 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1417 __ptr->type = X##structname; \
1418 __ptr->line = token.line; __ptr->column = token.col; \
1427 enum exec_types type;
1436 struct exec *left, *right;
1439 ###### ast functions
1441 static int __fput_loc(struct exec *loc, FILE *f)
1445 if (loc->line >= 0) {
1446 fprintf(f, "%d:%d: ", loc->line, loc->column);
1449 if (loc->type == Xbinode)
1450 return __fput_loc(cast(binode,loc)->left, f) ||
1451 __fput_loc(cast(binode,loc)->right, f); // NOTEST
1454 static void fput_loc(struct exec *loc, FILE *f)
1456 if (!__fput_loc(loc, f))
1457 fprintf(f, "??:??: "); // NOTEST
1460 Each different type of `exec` node needs a number of functions defined,
1461 a bit like methods. We must be able to free it, print it, analyse it
1462 and execute it. Once we have specific `exec` types we will need to
1463 parse them too. Let's take this a bit more slowly.
1467 The parser generator requires a `free_foo` function for each struct
1468 that stores attributes and they will often be `exec`s and subtypes
1469 there-of. So we need `free_exec` which can handle all the subtypes,
1470 and we need `free_binode`.
1472 ###### ast functions
1474 static void free_binode(struct binode *b)
1479 free_exec(b->right);
1483 ###### core functions
1484 static void free_exec(struct exec *e)
1493 ###### forward decls
1495 static void free_exec(struct exec *e);
1497 ###### free exec cases
1498 case Xbinode: free_binode(cast(binode, e)); break;
1502 Printing an `exec` requires that we know the current indent level for
1503 printing line-oriented components. As will become clear later, we
1504 also want to know what sort of bracketing to use.
1506 ###### ast functions
1508 static void do_indent(int i, char *str)
1515 ###### core functions
1516 static void print_binode(struct binode *b, int indent, int bracket)
1520 ## print binode cases
1524 static void print_exec(struct exec *e, int indent, int bracket)
1530 print_binode(cast(binode, e), indent, bracket); break;
1535 do_indent(indent, "/* FREE");
1536 for (v = e->to_free; v; v = v->next_free)
1537 printf(" %.*s(%c%d+%d)", v->name->name.len, v->name->name.txt,
1538 v->global ? 'G':'L',
1539 v->frame_pos, v->type ? v->type->size:0);
1544 ###### forward decls
1546 static void print_exec(struct exec *e, int indent, int bracket);
1550 As discussed, analysis involves propagating type requirements around the
1551 program and looking for errors.
1553 So `propagate_types` is passed an expected type (being a `struct type`
1554 pointer together with some `val_rules` flags) that the `exec` is
1555 expected to return, and returns the type that it does return, either
1556 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1557 by reference. It is set to `0` when an error is found, and `2` when
1558 any change is made. If it remains unchanged at `1`, then no more
1559 propagation is needed.
1563 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1567 if (rules & Rnolabel)
1568 fputs(" (labels not permitted)", stderr);
1571 ###### core functions
1573 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1574 struct type *type, int rules);
1575 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1576 struct type *type, int rules)
1583 switch (prog->type) {
1586 struct binode *b = cast(binode, prog);
1588 ## propagate binode cases
1592 ## propagate exec cases
1597 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1598 struct type *type, int rules)
1600 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1609 Interpreting an `exec` doesn't require anything but the `exec`. State
1610 is stored in variables and each variable will be directly linked from
1611 within the `exec` tree. The exception to this is the `main` function
1612 which needs to look at command line arguments. This function will be
1613 interpreted separately.
1615 Each `exec` can return a value combined with a type in `struct lrval`.
1616 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1617 the location of a value, which can be updated, in `lval`. Others will
1618 set `lval` to NULL indicating that there is a value of appropriate type
1621 ###### core functions
1625 struct value rval, *lval;
1628 static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
1630 static struct value interp_exec(struct parse_context *c, struct exec *e,
1631 struct type **typeret)
1633 struct lrval ret = _interp_exec(c, e);
1635 if (!ret.type) abort();
1637 *typeret = ret.type;
1639 dup_value(ret.type, ret.lval, &ret.rval);
1643 static struct value *linterp_exec(struct parse_context *c, struct exec *e,
1644 struct type **typeret)
1646 struct lrval ret = _interp_exec(c, e);
1649 *typeret = ret.type;
1651 free_value(ret.type, &ret.rval);
1655 static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
1658 struct value rv = {}, *lrv = NULL;
1659 struct type *rvtype;
1661 rvtype = ret.type = Tnone;
1671 struct binode *b = cast(binode, e);
1672 struct value left, right, *lleft;
1673 struct type *ltype, *rtype;
1674 ltype = rtype = Tnone;
1676 ## interp binode cases
1678 free_value(ltype, &left);
1679 free_value(rtype, &right);
1682 ## interp exec cases
1687 ## interp exec cleanup
1693 Now that we have the shape of the interpreter in place we can add some
1694 complex types and connected them in to the data structures and the
1695 different phases of parse, analyse, print, interpret.
1697 Thus far we have arrays and structs.
1701 Arrays can be declared by giving a size and a type, as `[size]type' so
1702 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1703 size can be either a literal number, or a named constant. Some day an
1704 arbitrary expression will be supported.
1706 As a formal parameter to a function, the array can be declared with a
1707 new variable as the size: `name:[size::number]string`. The `size`
1708 variable is set to the size of the array and must be a constant. As
1709 `number` is the only supported type, it can be left out:
1710 `name:[size::]string`.
1712 Arrays cannot be assigned. When pointers are introduced we will also
1713 introduce array slices which can refer to part or all of an array -
1714 the assignment syntax will create a slice. For now, an array can only
1715 ever be referenced by the name it is declared with. It is likely that
1716 a "`copy`" primitive will eventually be define which can be used to
1717 make a copy of an array with controllable recursive depth.
1719 For now we have two sorts of array, those with fixed size either because
1720 it is given as a literal number or because it is a struct member (which
1721 cannot have a runtime-changing size), and those with a size that is
1722 determined at runtime - local variables with a const size. The former
1723 have their size calculated at parse time, the latter at run time.
1725 For the latter type, the `size` field of the type is the size of a
1726 pointer, and the array is reallocated every time it comes into scope.
1728 We differentiate struct fields with a const size from local variables
1729 with a const size by whether they are prepared at parse time or not.
1731 ###### type union fields
1734 int unspec; // size is unspecified - vsize must be set.
1737 struct variable *vsize;
1738 struct type *member;
1741 ###### value union fields
1742 void *array; // used if not static_size
1744 ###### value functions
1746 static void array_prepare_type(struct parse_context *c, struct type *type,
1749 struct value *vsize;
1751 if (!type->array.vsize || type->array.static_size)
1754 vsize = var_value(c, type->array.vsize);
1756 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
1757 type->array.size = mpz_get_si(q);
1761 type->array.static_size = 1;
1762 type->size = type->array.size * type->array.member->size;
1763 type->align = type->array.member->align;
1767 static void array_init(struct type *type, struct value *val)
1770 void *ptr = val->ptr;
1774 if (!type->array.static_size) {
1775 val->array = calloc(type->array.size,
1776 type->array.member->size);
1779 for (i = 0; i < type->array.size; i++) {
1781 v = (void*)ptr + i * type->array.member->size;
1782 val_init(type->array.member, v);
1786 static void array_free(struct type *type, struct value *val)
1789 void *ptr = val->ptr;
1791 if (!type->array.static_size)
1793 for (i = 0; i < type->array.size; i++) {
1795 v = (void*)ptr + i * type->array.member->size;
1796 free_value(type->array.member, v);
1798 if (!type->array.static_size)
1802 static int array_compat(struct type *require, struct type *have)
1804 if (have->compat != require->compat)
1805 return 0; // UNTESTED
1806 /* Both are arrays, so we can look at details */
1807 if (!type_compat(require->array.member, have->array.member, 0))
1809 if (have->array.unspec && require->array.unspec) {
1810 if (have->array.vsize && require->array.vsize &&
1811 have->array.vsize != require->array.vsize) // UNTESTED
1812 /* sizes might not be the same */
1813 return 0; // UNTESTED
1816 if (have->array.unspec || require->array.unspec)
1817 return 1; // UNTESTED
1818 if (require->array.vsize == NULL && have->array.vsize == NULL)
1819 return require->array.size == have->array.size;
1821 return require->array.vsize == have->array.vsize; // UNTESTED
1824 static void array_print_type(struct type *type, FILE *f)
1827 if (type->array.vsize) {
1828 struct binding *b = type->array.vsize->name;
1829 fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
1830 type->array.unspec ? "::" : "");
1832 fprintf(f, "%d]", type->array.size);
1833 type_print(type->array.member, f);
1836 static struct type array_prototype = {
1838 .prepare_type = array_prepare_type,
1839 .print_type = array_print_type,
1840 .compat = array_compat,
1842 .size = sizeof(void*),
1843 .align = sizeof(void*),
1846 ###### declare terminals
1851 | [ NUMBER ] Type ${ {
1854 struct text noname = { "", 0 };
1857 $0 = t = add_type(c, noname, &array_prototype);
1858 t->array.member = $<4;
1859 t->array.vsize = NULL;
1860 if (number_parse(num, tail, $2.txt) == 0)
1861 tok_err(c, "error: unrecognised number", &$2);
1863 tok_err(c, "error: unsupported number suffix", &$2);
1865 t->array.size = mpz_get_ui(mpq_numref(num));
1866 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1867 tok_err(c, "error: array size must be an integer",
1869 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1870 tok_err(c, "error: array size is too large",
1874 t->array.static_size = 1;
1875 t->size = t->array.size * t->array.member->size;
1876 t->align = t->array.member->align;
1879 | [ IDENTIFIER ] Type ${ {
1880 struct variable *v = var_ref(c, $2.txt);
1881 struct text noname = { "", 0 };
1884 tok_err(c, "error: name undeclared", &$2);
1885 else if (!v->constant)
1886 tok_err(c, "error: array size must be a constant", &$2);
1888 $0 = add_type(c, noname, &array_prototype);
1889 $0->array.member = $<4;
1891 $0->array.vsize = v;
1896 OptType -> Type ${ $0 = $<1; }$
1899 ###### formal type grammar
1901 | [ IDENTIFIER :: OptType ] Type ${ {
1902 struct variable *v = var_decl(c, $ID.txt);
1903 struct text noname = { "", 0 };
1909 $0 = add_type(c, noname, &array_prototype);
1910 $0->array.member = $<6;
1912 $0->array.unspec = 1;
1913 $0->array.vsize = v;
1919 ###### variable grammar
1921 | Variable [ Expression ] ${ {
1922 struct binode *b = new(binode);
1929 ###### print binode cases
1931 print_exec(b->left, -1, bracket);
1933 print_exec(b->right, -1, bracket);
1937 ###### propagate binode cases
1939 /* left must be an array, right must be a number,
1940 * result is the member type of the array
1942 propagate_types(b->right, c, ok, Tnum, 0);
1943 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1944 if (!t || t->compat != array_compat) {
1945 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1948 if (!type_compat(type, t->array.member, rules)) {
1949 type_err(c, "error: have %1 but need %2", prog,
1950 t->array.member, rules, type);
1952 return t->array.member;
1956 ###### interp binode cases
1962 lleft = linterp_exec(c, b->left, <ype);
1963 right = interp_exec(c, b->right, &rtype);
1965 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1969 if (ltype->array.static_size)
1972 ptr = *(void**)lleft;
1973 rvtype = ltype->array.member;
1974 if (i >= 0 && i < ltype->array.size)
1975 lrv = ptr + i * rvtype->size;
1977 val_init(ltype->array.member, &rv);
1984 A `struct` is a data-type that contains one or more other data-types.
1985 It differs from an array in that each member can be of a different
1986 type, and they are accessed by name rather than by number. Thus you
1987 cannot choose an element by calculation, you need to know what you
1990 The language makes no promises about how a given structure will be
1991 stored in memory - it is free to rearrange fields to suit whatever
1992 criteria seems important.
1994 Structs are declared separately from program code - they cannot be
1995 declared in-line in a variable declaration like arrays can. A struct
1996 is given a name and this name is used to identify the type - the name
1997 is not prefixed by the word `struct` as it would be in C.
1999 Structs are only treated as the same if they have the same name.
2000 Simply having the same fields in the same order is not enough. This
2001 might change once we can create structure initializers from a list of
2004 Each component datum is identified much like a variable is declared,
2005 with a name, one or two colons, and a type. The type cannot be omitted
2006 as there is no opportunity to deduce the type from usage. An initial
2007 value can be given following an equals sign, so
2009 ##### Example: a struct type
2015 would declare a type called "complex" which has two number fields,
2016 each initialised to zero.
2018 Struct will need to be declared separately from the code that uses
2019 them, so we will need to be able to print out the declaration of a
2020 struct when reprinting the whole program. So a `print_type_decl` type
2021 function will be needed.
2023 ###### type union fields
2035 ###### type functions
2036 void (*print_type_decl)(struct type *type, FILE *f);
2038 ###### value functions
2040 static void structure_init(struct type *type, struct value *val)
2044 for (i = 0; i < type->structure.nfields; i++) {
2046 v = (void*) val->ptr + type->structure.fields[i].offset;
2047 if (type->structure.fields[i].init)
2048 dup_value(type->structure.fields[i].type,
2049 type->structure.fields[i].init,
2052 val_init(type->structure.fields[i].type, v);
2056 static void structure_free(struct type *type, struct value *val)
2060 for (i = 0; i < type->structure.nfields; i++) {
2062 v = (void*)val->ptr + type->structure.fields[i].offset;
2063 free_value(type->structure.fields[i].type, v);
2067 static void structure_free_type(struct type *t)
2070 for (i = 0; i < t->structure.nfields; i++)
2071 if (t->structure.fields[i].init) {
2072 free_value(t->structure.fields[i].type,
2073 t->structure.fields[i].init);
2075 free(t->structure.fields);
2078 static struct type structure_prototype = {
2079 .init = structure_init,
2080 .free = structure_free,
2081 .free_type = structure_free_type,
2082 .print_type_decl = structure_print_type,
2096 ###### free exec cases
2098 free_exec(cast(fieldref, e)->left);
2102 ###### declare terminals
2105 ###### variable grammar
2107 | Variable . IDENTIFIER ${ {
2108 struct fieldref *fr = new_pos(fieldref, $2);
2115 ###### print exec cases
2119 struct fieldref *f = cast(fieldref, e);
2120 print_exec(f->left, -1, bracket);
2121 printf(".%.*s", f->name.len, f->name.txt);
2125 ###### ast functions
2126 static int find_struct_index(struct type *type, struct text field)
2129 for (i = 0; i < type->structure.nfields; i++)
2130 if (text_cmp(type->structure.fields[i].name, field) == 0)
2135 ###### propagate exec cases
2139 struct fieldref *f = cast(fieldref, prog);
2140 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
2143 type_err(c, "error: unknown type for field access", f->left, // UNTESTED
2145 else if (st->init != structure_init)
2146 type_err(c, "error: field reference attempted on %1, not a struct",
2147 f->left, st, 0, NULL);
2148 else if (f->index == -2) {
2149 f->index = find_struct_index(st, f->name);
2151 type_err(c, "error: cannot find requested field in %1",
2152 f->left, st, 0, NULL);
2154 if (f->index >= 0) {
2155 struct type *ft = st->structure.fields[f->index].type;
2156 if (!type_compat(type, ft, rules))
2157 type_err(c, "error: have %1 but need %2", prog,
2164 ###### interp exec cases
2167 struct fieldref *f = cast(fieldref, e);
2169 struct value *lleft = linterp_exec(c, f->left, <ype);
2170 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
2171 rvtype = ltype->structure.fields[f->index].type;
2177 struct fieldlist *prev;
2181 ###### ast functions
2182 static void free_fieldlist(struct fieldlist *f)
2186 free_fieldlist(f->prev);
2188 free_value(f->f.type, f->f.init); // UNTESTED
2189 free(f->f.init); // UNTESTED
2194 ###### top level grammar
2195 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2197 add_type(c, $2.txt, &structure_prototype);
2199 struct fieldlist *f;
2201 for (f = $3; f; f=f->prev)
2204 t->structure.nfields = cnt;
2205 t->structure.fields = calloc(cnt, sizeof(struct field));
2208 int a = f->f.type->align;
2210 t->structure.fields[cnt] = f->f;
2211 if (t->size & (a-1))
2212 t->size = (t->size | (a-1)) + 1;
2213 t->structure.fields[cnt].offset = t->size;
2214 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2223 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2224 | { SimpleFieldList } ${ $0 = $<SFL; }$
2225 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2226 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2228 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2229 | FieldLines SimpleFieldList Newlines ${
2234 SimpleFieldList -> Field ${ $0 = $<F; }$
2235 | SimpleFieldList ; Field ${
2239 | SimpleFieldList ; ${
2242 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2244 Field -> IDENTIFIER : Type = Expression ${ {
2247 $0 = calloc(1, sizeof(struct fieldlist));
2248 $0->f.name = $1.txt;
2253 propagate_types($<5, c, &ok, $3, 0);
2256 c->parse_error = 1; // UNTESTED
2258 struct value vl = interp_exec(c, $5, NULL);
2259 $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
2262 | IDENTIFIER : Type ${
2263 $0 = calloc(1, sizeof(struct fieldlist));
2264 $0->f.name = $1.txt;
2266 if ($0->f.type->prepare_type)
2267 $0->f.type->prepare_type(c, $0->f.type, 1);
2270 ###### forward decls
2271 static void structure_print_type(struct type *t, FILE *f);
2273 ###### value functions
2274 static void structure_print_type(struct type *t, FILE *f) // UNTESTED
2278 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2280 for (i = 0; i < t->structure.nfields; i++) {
2281 struct field *fl = t->structure.fields + i;
2282 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2283 type_print(fl->type, f);
2284 if (fl->type->print && fl->init) {
2286 if (fl->type == Tstr)
2287 fprintf(f, "\""); // UNTESTED
2288 print_value(fl->type, fl->init);
2289 if (fl->type == Tstr)
2290 fprintf(f, "\""); // UNTESTED
2296 ###### print type decls
2298 struct type *t; // UNTESTED
2301 while (target != 0) {
2303 for (t = context.typelist; t ; t=t->next)
2304 if (t->print_type_decl) {
2313 t->print_type_decl(t, stdout);
2321 A function is a named chunk of code which can be passed parameters and
2322 can return results. Each function has an implicit type which includes
2323 the set of parameters and the return value. As yet these types cannot
2324 be declared separate from the function itself.
2326 In fact, only one function is currently possible - `main`. `main` is
2327 passed an array of strings together with the size of the array, and
2328 doesn't return anything. The strings are command line arguments.
2330 The parameters can be specified either in parentheses as a list, such as
2332 ##### Example: function 1
2334 func main(av:[ac::number]string)
2337 or as an indented list of one parameter per line
2339 ##### Example: function 2
2342 argv:[argc::number]string
2354 MainFunction -> func main ( OpenScope Args ) Block Newlines ${
2357 $0->left = reorder_bilist($<Ar);
2359 var_block_close(c, CloseSequential, $0);
2360 if (c->scope_stack && !c->parse_error) abort();
2362 | func main IN OpenScope OptNL Args OUT OptNL do Block Newlines ${
2365 $0->left = reorder_bilist($<Ar);
2367 var_block_close(c, CloseSequential, $0);
2368 if (c->scope_stack && !c->parse_error) abort();
2370 | func main NEWLINE OpenScope OptNL do Block Newlines ${
2375 var_block_close(c, CloseSequential, $0);
2376 if (c->scope_stack && !c->parse_error) abort();
2379 Args -> ${ $0 = NULL; }$
2380 | Varlist ${ $0 = $<1; }$
2381 | Varlist ; ${ $0 = $<1; }$
2382 | Varlist NEWLINE ${ $0 = $<1; }$
2384 Varlist -> Varlist ; ArgDecl ${ // UNTESTED
2398 ArgDecl -> IDENTIFIER : FormalType ${ {
2399 struct variable *v = var_decl(c, $1.txt);
2405 ## Executables: the elements of code
2407 Each code element needs to be parsed, printed, analysed,
2408 interpreted, and freed. There are several, so let's just start with
2409 the easy ones and work our way up.
2413 We have already met values as separate objects. When manifest
2414 constants appear in the program text, that must result in an executable
2415 which has a constant value. So the `val` structure embeds a value in
2428 ###### ast functions
2429 struct val *new_val(struct type *T, struct token tk)
2431 struct val *v = new_pos(val, tk);
2442 $0 = new_val(Tbool, $1);
2446 $0 = new_val(Tbool, $1);
2450 $0 = new_val(Tnum, $1);
2453 if (number_parse($0->val.num, tail, $1.txt) == 0)
2454 mpq_init($0->val.num); // UNTESTED
2456 tok_err(c, "error: unsupported number suffix",
2461 $0 = new_val(Tstr, $1);
2464 string_parse(&$1, '\\', &$0->val.str, tail);
2466 tok_err(c, "error: unsupported string suffix",
2471 $0 = new_val(Tstr, $1);
2474 string_parse(&$1, '\\', &$0->val.str, tail);
2476 tok_err(c, "error: unsupported string suffix",
2481 ###### print exec cases
2484 struct val *v = cast(val, e);
2485 if (v->vtype == Tstr)
2487 print_value(v->vtype, &v->val);
2488 if (v->vtype == Tstr)
2493 ###### propagate exec cases
2496 struct val *val = cast(val, prog);
2497 if (!type_compat(type, val->vtype, rules))
2498 type_err(c, "error: expected %1%r found %2",
2499 prog, type, rules, val->vtype);
2503 ###### interp exec cases
2505 rvtype = cast(val, e)->vtype;
2506 dup_value(rvtype, &cast(val, e)->val, &rv);
2509 ###### ast functions
2510 static void free_val(struct val *v)
2513 free_value(v->vtype, &v->val);
2517 ###### free exec cases
2518 case Xval: free_val(cast(val, e)); break;
2520 ###### ast functions
2521 // Move all nodes from 'b' to 'rv', reversing their order.
2522 // In 'b' 'left' is a list, and 'right' is the last node.
2523 // In 'rv', left' is the first node and 'right' is a list.
2524 static struct binode *reorder_bilist(struct binode *b)
2526 struct binode *rv = NULL;
2529 struct exec *t = b->right;
2533 b = cast(binode, b->left);
2543 Just as we used a `val` to wrap a value into an `exec`, we similarly
2544 need a `var` to wrap a `variable` into an exec. While each `val`
2545 contained a copy of the value, each `var` holds a link to the variable
2546 because it really is the same variable no matter where it appears.
2547 When a variable is used, we need to remember to follow the `->merged`
2548 link to find the primary instance.
2556 struct variable *var;
2564 VariableDecl -> IDENTIFIER : ${ {
2565 struct variable *v = var_decl(c, $1.txt);
2566 $0 = new_pos(var, $1);
2571 v = var_ref(c, $1.txt);
2573 type_err(c, "error: variable '%v' redeclared",
2575 type_err(c, "info: this is where '%v' was first declared",
2576 v->where_decl, NULL, 0, NULL);
2579 | IDENTIFIER :: ${ {
2580 struct variable *v = var_decl(c, $1.txt);
2581 $0 = new_pos(var, $1);
2587 v = var_ref(c, $1.txt);
2589 type_err(c, "error: variable '%v' redeclared",
2591 type_err(c, "info: this is where '%v' was first declared",
2592 v->where_decl, NULL, 0, NULL);
2595 | IDENTIFIER : Type ${ {
2596 struct variable *v = var_decl(c, $1.txt);
2597 $0 = new_pos(var, $1);
2604 v = var_ref(c, $1.txt);
2606 type_err(c, "error: variable '%v' redeclared",
2608 type_err(c, "info: this is where '%v' was first declared",
2609 v->where_decl, NULL, 0, NULL);
2612 | IDENTIFIER :: Type ${ {
2613 struct variable *v = var_decl(c, $1.txt);
2614 $0 = new_pos(var, $1);
2622 v = var_ref(c, $1.txt);
2624 type_err(c, "error: variable '%v' redeclared",
2626 type_err(c, "info: this is where '%v' was first declared",
2627 v->where_decl, NULL, 0, NULL);
2632 Variable -> IDENTIFIER ${ {
2633 struct variable *v = var_ref(c, $1.txt);
2634 $0 = new_pos(var, $1);
2636 /* This might be a label - allocate a var just in case */
2637 v = var_decl(c, $1.txt);
2644 cast(var, $0)->var = v;
2648 ###### print exec cases
2651 struct var *v = cast(var, e);
2653 struct binding *b = v->var->name;
2654 printf("%.*s", b->name.len, b->name.txt);
2661 if (loc && loc->type == Xvar) {
2662 struct var *v = cast(var, loc);
2664 struct binding *b = v->var->name;
2665 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2667 fputs("???", stderr); // NOTEST
2669 fputs("NOTVAR", stderr); // NOTEST
2672 ###### propagate exec cases
2676 struct var *var = cast(var, prog);
2677 struct variable *v = var->var;
2679 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2680 return Tnone; // NOTEST
2683 if (v->constant && (rules & Rnoconstant)) {
2684 type_err(c, "error: Cannot assign to a constant: %v",
2685 prog, NULL, 0, NULL);
2686 type_err(c, "info: name was defined as a constant here",
2687 v->where_decl, NULL, 0, NULL);
2690 if (v->type == Tnone && v->where_decl == prog)
2691 type_err(c, "error: variable used but not declared: %v",
2692 prog, NULL, 0, NULL);
2693 if (v->type == NULL) {
2694 if (type && *ok != 0) {
2696 v->where_set = prog;
2701 if (!type_compat(type, v->type, rules)) {
2702 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2703 type, rules, v->type);
2704 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2705 v->type, rules, NULL);
2712 ###### interp exec cases
2715 struct var *var = cast(var, e);
2716 struct variable *v = var->var;
2719 lrv = var_value(c, v);
2724 ###### ast functions
2726 static void free_var(struct var *v)
2731 ###### free exec cases
2732 case Xvar: free_var(cast(var, e)); break;
2734 ### Expressions: Conditional
2736 Our first user of the `binode` will be conditional expressions, which
2737 is a bit odd as they actually have three components. That will be
2738 handled by having 2 binodes for each expression. The conditional
2739 expression is the lowest precedence operator which is why we define it
2740 first - to start the precedence list.
2742 Conditional expressions are of the form "value `if` condition `else`
2743 other_value". They associate to the right, so everything to the right
2744 of `else` is part of an else value, while only a higher-precedence to
2745 the left of `if` is the if values. Between `if` and `else` there is no
2746 room for ambiguity, so a full conditional expression is allowed in
2758 Expression -> Expression if Expression else Expression $$ifelse ${ {
2759 struct binode *b1 = new(binode);
2760 struct binode *b2 = new(binode);
2769 ## expression grammar
2771 ###### print binode cases
2774 b2 = cast(binode, b->right);
2775 if (bracket) printf("(");
2776 print_exec(b2->left, -1, bracket);
2778 print_exec(b->left, -1, bracket);
2780 print_exec(b2->right, -1, bracket);
2781 if (bracket) printf(")");
2784 ###### propagate binode cases
2787 /* cond must be Tbool, others must match */
2788 struct binode *b2 = cast(binode, b->right);
2791 propagate_types(b->left, c, ok, Tbool, 0);
2792 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2793 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2797 ###### interp binode cases
2800 struct binode *b2 = cast(binode, b->right);
2801 left = interp_exec(c, b->left, <ype);
2803 rv = interp_exec(c, b2->left, &rvtype); // UNTESTED
2805 rv = interp_exec(c, b2->right, &rvtype);
2809 ### Expressions: Boolean
2811 The next class of expressions to use the `binode` will be Boolean
2812 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2813 have same corresponding precendence. The difference is that they don't
2814 evaluate the second expression if not necessary.
2823 ###### expr precedence
2828 ###### expression grammar
2829 | Expression or Expression ${ {
2830 struct binode *b = new(binode);
2836 | Expression or else Expression ${ {
2837 struct binode *b = new(binode);
2844 | Expression and Expression ${ {
2845 struct binode *b = new(binode);
2851 | Expression and then Expression ${ {
2852 struct binode *b = new(binode);
2859 | not Expression ${ {
2860 struct binode *b = new(binode);
2866 ###### print binode cases
2868 if (bracket) printf("(");
2869 print_exec(b->left, -1, bracket);
2871 print_exec(b->right, -1, bracket);
2872 if (bracket) printf(")");
2875 if (bracket) printf("(");
2876 print_exec(b->left, -1, bracket);
2877 printf(" and then ");
2878 print_exec(b->right, -1, bracket);
2879 if (bracket) printf(")");
2882 if (bracket) printf("(");
2883 print_exec(b->left, -1, bracket);
2885 print_exec(b->right, -1, bracket);
2886 if (bracket) printf(")");
2889 if (bracket) printf("(");
2890 print_exec(b->left, -1, bracket);
2891 printf(" or else ");
2892 print_exec(b->right, -1, bracket);
2893 if (bracket) printf(")");
2896 if (bracket) printf("(");
2898 print_exec(b->right, -1, bracket);
2899 if (bracket) printf(")");
2902 ###### propagate binode cases
2908 /* both must be Tbool, result is Tbool */
2909 propagate_types(b->left, c, ok, Tbool, 0);
2910 propagate_types(b->right, c, ok, Tbool, 0);
2911 if (type && type != Tbool)
2912 type_err(c, "error: %1 operation found where %2 expected", prog,
2916 ###### interp binode cases
2918 rv = interp_exec(c, b->left, &rvtype);
2919 right = interp_exec(c, b->right, &rtype);
2920 rv.bool = rv.bool && right.bool;
2923 rv = interp_exec(c, b->left, &rvtype);
2925 rv = interp_exec(c, b->right, NULL);
2928 rv = interp_exec(c, b->left, &rvtype);
2929 right = interp_exec(c, b->right, &rtype);
2930 rv.bool = rv.bool || right.bool;
2933 rv = interp_exec(c, b->left, &rvtype);
2935 rv = interp_exec(c, b->right, NULL);
2938 rv = interp_exec(c, b->right, &rvtype);
2942 ### Expressions: Comparison
2944 Of slightly higher precedence that Boolean expressions are Comparisons.
2945 A comparison takes arguments of any comparable type, but the two types
2948 To simplify the parsing we introduce an `eop` which can record an
2949 expression operator, and the `CMPop` non-terminal will match one of them.
2956 ###### ast functions
2957 static void free_eop(struct eop *e)
2971 ###### expr precedence
2972 $LEFT < > <= >= == != CMPop
2974 ###### expression grammar
2975 | Expression CMPop Expression ${ {
2976 struct binode *b = new(binode);
2986 CMPop -> < ${ $0.op = Less; }$
2987 | > ${ $0.op = Gtr; }$
2988 | <= ${ $0.op = LessEq; }$
2989 | >= ${ $0.op = GtrEq; }$
2990 | == ${ $0.op = Eql; }$
2991 | != ${ $0.op = NEql; }$
2993 ###### print binode cases
3001 if (bracket) printf("(");
3002 print_exec(b->left, -1, bracket);
3004 case Less: printf(" < "); break;
3005 case LessEq: printf(" <= "); break;
3006 case Gtr: printf(" > "); break;
3007 case GtrEq: printf(" >= "); break;
3008 case Eql: printf(" == "); break;
3009 case NEql: printf(" != "); break;
3010 default: abort(); // NOTEST
3012 print_exec(b->right, -1, bracket);
3013 if (bracket) printf(")");
3016 ###### propagate binode cases
3023 /* Both must match but not be labels, result is Tbool */
3024 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
3026 propagate_types(b->right, c, ok, t, 0);
3028 t = propagate_types(b->right, c, ok, NULL, Rnolabel); // UNTESTED
3030 t = propagate_types(b->left, c, ok, t, 0); // UNTESTED
3032 if (!type_compat(type, Tbool, 0))
3033 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
3034 Tbool, rules, type);
3037 ###### interp binode cases
3046 left = interp_exec(c, b->left, <ype);
3047 right = interp_exec(c, b->right, &rtype);
3048 cmp = value_cmp(ltype, rtype, &left, &right);
3051 case Less: rv.bool = cmp < 0; break;
3052 case LessEq: rv.bool = cmp <= 0; break;
3053 case Gtr: rv.bool = cmp > 0; break;
3054 case GtrEq: rv.bool = cmp >= 0; break;
3055 case Eql: rv.bool = cmp == 0; break;
3056 case NEql: rv.bool = cmp != 0; break;
3057 default: rv.bool = 0; break; // NOTEST
3062 ### Expressions: The rest
3064 The remaining expressions with the highest precedence are arithmetic,
3065 string concatenation, and string conversion. String concatenation
3066 (`++`) has the same precedence as multiplication and division, but lower
3069 String conversion is a temporary feature until I get a better type
3070 system. `$` is a prefix operator which expects a string and returns
3073 `+` and `-` are both infix and prefix operations (where they are
3074 absolute value and negation). These have different operator names.
3076 We also have a 'Bracket' operator which records where parentheses were
3077 found. This makes it easy to reproduce these when printing. Possibly I
3078 should only insert brackets were needed for precedence.
3088 ###### expr precedence
3094 ###### expression grammar
3095 | Expression Eop Expression ${ {
3096 struct binode *b = new(binode);
3103 | Expression Top Expression ${ {
3104 struct binode *b = new(binode);
3111 | ( Expression ) ${ {
3112 struct binode *b = new_pos(binode, $1);
3117 | Uop Expression ${ {
3118 struct binode *b = new(binode);
3123 | Value ${ $0 = $<1; }$
3124 | Variable ${ $0 = $<1; }$
3127 Eop -> + ${ $0.op = Plus; }$
3128 | - ${ $0.op = Minus; }$
3130 Uop -> + ${ $0.op = Absolute; }$
3131 | - ${ $0.op = Negate; }$
3132 | $ ${ $0.op = StringConv; }$
3134 Top -> * ${ $0.op = Times; }$
3135 | / ${ $0.op = Divide; }$
3136 | % ${ $0.op = Rem; }$
3137 | ++ ${ $0.op = Concat; }$
3139 ###### print binode cases
3146 if (bracket) printf("(");
3147 print_exec(b->left, indent, bracket);
3149 case Plus: fputs(" + ", stdout); break;
3150 case Minus: fputs(" - ", stdout); break;
3151 case Times: fputs(" * ", stdout); break;
3152 case Divide: fputs(" / ", stdout); break;
3153 case Rem: fputs(" % ", stdout); break;
3154 case Concat: fputs(" ++ ", stdout); break;
3155 default: abort(); // NOTEST
3157 print_exec(b->right, indent, bracket);
3158 if (bracket) printf(")");
3163 if (bracket) printf("(");
3165 case Absolute: fputs("+", stdout); break;
3166 case Negate: fputs("-", stdout); break;
3167 case StringConv: fputs("$", stdout); break;
3168 default: abort(); // NOTEST
3170 print_exec(b->right, indent, bracket);
3171 if (bracket) printf(")");
3175 print_exec(b->right, indent, bracket);
3179 ###### propagate binode cases
3185 /* both must be numbers, result is Tnum */
3188 /* as propagate_types ignores a NULL,
3189 * unary ops fit here too */
3190 propagate_types(b->left, c, ok, Tnum, 0);
3191 propagate_types(b->right, c, ok, Tnum, 0);
3192 if (!type_compat(type, Tnum, 0))
3193 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
3198 /* both must be Tstr, result is Tstr */
3199 propagate_types(b->left, c, ok, Tstr, 0);
3200 propagate_types(b->right, c, ok, Tstr, 0);
3201 if (!type_compat(type, Tstr, 0))
3202 type_err(c, "error: Concat returns %1 but %2 expected", prog,
3207 /* op must be string, result is number */
3208 propagate_types(b->left, c, ok, Tstr, 0);
3209 if (!type_compat(type, Tnum, 0))
3210 type_err(c, // UNTESTED
3211 "error: Can only convert string to number, not %1",
3212 prog, type, 0, NULL);
3216 return propagate_types(b->right, c, ok, type, 0);
3218 ###### interp binode cases
3221 rv = interp_exec(c, b->left, &rvtype);
3222 right = interp_exec(c, b->right, &rtype);
3223 mpq_add(rv.num, rv.num, right.num);
3226 rv = interp_exec(c, b->left, &rvtype);
3227 right = interp_exec(c, b->right, &rtype);
3228 mpq_sub(rv.num, rv.num, right.num);
3231 rv = interp_exec(c, b->left, &rvtype);
3232 right = interp_exec(c, b->right, &rtype);
3233 mpq_mul(rv.num, rv.num, right.num);
3236 rv = interp_exec(c, b->left, &rvtype);
3237 right = interp_exec(c, b->right, &rtype);
3238 mpq_div(rv.num, rv.num, right.num);
3243 left = interp_exec(c, b->left, <ype);
3244 right = interp_exec(c, b->right, &rtype);
3245 mpz_init(l); mpz_init(r); mpz_init(rem);
3246 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
3247 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
3248 mpz_tdiv_r(rem, l, r);
3249 val_init(Tnum, &rv);
3250 mpq_set_z(rv.num, rem);
3251 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
3256 rv = interp_exec(c, b->right, &rvtype);
3257 mpq_neg(rv.num, rv.num);
3260 rv = interp_exec(c, b->right, &rvtype);
3261 mpq_abs(rv.num, rv.num);
3264 rv = interp_exec(c, b->right, &rvtype);
3267 left = interp_exec(c, b->left, <ype);
3268 right = interp_exec(c, b->right, &rtype);
3270 rv.str = text_join(left.str, right.str);
3273 right = interp_exec(c, b->right, &rvtype);
3277 struct text tx = right.str;
3280 if (tx.txt[0] == '-') {
3281 neg = 1; // UNTESTED
3282 tx.txt++; // UNTESTED
3283 tx.len--; // UNTESTED
3285 if (number_parse(rv.num, tail, tx) == 0)
3286 mpq_init(rv.num); // UNTESTED
3288 mpq_neg(rv.num, rv.num); // UNTESTED
3290 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt); // UNTESTED
3294 ###### value functions
3296 static struct text text_join(struct text a, struct text b)
3299 rv.len = a.len + b.len;
3300 rv.txt = malloc(rv.len);
3301 memcpy(rv.txt, a.txt, a.len);
3302 memcpy(rv.txt+a.len, b.txt, b.len);
3306 ### Blocks, Statements, and Statement lists.
3308 Now that we have expressions out of the way we need to turn to
3309 statements. There are simple statements and more complex statements.
3310 Simple statements do not contain (syntactic) newlines, complex statements do.
3312 Statements often come in sequences and we have corresponding simple
3313 statement lists and complex statement lists.
3314 The former comprise only simple statements separated by semicolons.
3315 The later comprise complex statements and simple statement lists. They are
3316 separated by newlines. Thus the semicolon is only used to separate
3317 simple statements on the one line. This may be overly restrictive,
3318 but I'm not sure I ever want a complex statement to share a line with
3321 Note that a simple statement list can still use multiple lines if
3322 subsequent lines are indented, so
3324 ###### Example: wrapped simple statement list
3329 is a single simple statement list. This might allow room for
3330 confusion, so I'm not set on it yet.
3332 A simple statement list needs no extra syntax. A complex statement
3333 list has two syntactic forms. It can be enclosed in braces (much like
3334 C blocks), or it can be introduced by an indent and continue until an
3335 unindented newline (much like Python blocks). With this extra syntax
3336 it is referred to as a block.
3338 Note that a block does not have to include any newlines if it only
3339 contains simple statements. So both of:
3341 if condition: a=b; d=f
3343 if condition { a=b; print f }
3347 In either case the list is constructed from a `binode` list with
3348 `Block` as the operator. When parsing the list it is most convenient
3349 to append to the end, so a list is a list and a statement. When using
3350 the list it is more convenient to consider a list to be a statement
3351 and a list. So we need a function to re-order a list.
3352 `reorder_bilist` serves this purpose.
3354 The only stand-alone statement we introduce at this stage is `pass`
3355 which does nothing and is represented as a `NULL` pointer in a `Block`
3356 list. Other stand-alone statements will follow once the infrastructure
3367 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3368 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3369 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3370 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3371 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3373 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3374 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3375 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3376 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3377 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3379 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3380 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3381 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3383 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3384 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3385 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3386 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3387 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3389 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3391 ComplexStatements -> ComplexStatements ComplexStatement ${
3401 | ComplexStatement ${
3413 ComplexStatement -> SimpleStatements Newlines ${
3414 $0 = reorder_bilist($<SS);
3416 | SimpleStatements ; Newlines ${
3417 $0 = reorder_bilist($<SS);
3419 ## ComplexStatement Grammar
3422 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3428 | SimpleStatement ${
3436 SimpleStatement -> pass ${ $0 = NULL; }$
3437 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3438 ## SimpleStatement Grammar
3440 ###### print binode cases
3444 if (b->left == NULL) // UNTESTED
3445 printf("pass"); // UNTESTED
3447 print_exec(b->left, indent, bracket); // UNTESTED
3448 if (b->right) { // UNTESTED
3449 printf("; "); // UNTESTED
3450 print_exec(b->right, indent, bracket); // UNTESTED
3453 // block, one per line
3454 if (b->left == NULL)
3455 do_indent(indent, "pass\n");
3457 print_exec(b->left, indent, bracket);
3459 print_exec(b->right, indent, bracket);
3463 ###### propagate binode cases
3466 /* If any statement returns something other than Tnone
3467 * or Tbool then all such must return same type.
3468 * As each statement may be Tnone or something else,
3469 * we must always pass NULL (unknown) down, otherwise an incorrect
3470 * error might occur. We never return Tnone unless it is
3475 for (e = b; e; e = cast(binode, e->right)) {
3476 t = propagate_types(e->left, c, ok, NULL, rules);
3477 if ((rules & Rboolok) && t == Tbool)
3479 if (t && t != Tnone && t != Tbool) {
3483 type_err(c, "error: expected %1%r, found %2",
3484 e->left, type, rules, t);
3490 ###### interp binode cases
3492 while (rvtype == Tnone &&
3495 rv = interp_exec(c, b->left, &rvtype);
3496 b = cast(binode, b->right);
3500 ### The Print statement
3502 `print` is a simple statement that takes a comma-separated list of
3503 expressions and prints the values separated by spaces and terminated
3504 by a newline. No control of formatting is possible.
3506 `print` faces the same list-ordering issue as blocks, and uses the
3512 ##### expr precedence
3515 ###### SimpleStatement Grammar
3517 | print ExpressionList ${
3518 $0 = reorder_bilist($<2);
3520 | print ExpressionList , ${
3525 $0 = reorder_bilist($0);
3536 ExpressionList -> ExpressionList , Expression ${
3549 ###### print binode cases
3552 do_indent(indent, "print");
3556 print_exec(b->left, -1, bracket);
3560 b = cast(binode, b->right);
3566 ###### propagate binode cases
3569 /* don't care but all must be consistent */
3570 propagate_types(b->left, c, ok, NULL, Rnolabel);
3571 propagate_types(b->right, c, ok, NULL, Rnolabel);
3574 ###### interp binode cases
3580 for ( ; b; b = cast(binode, b->right))
3584 left = interp_exec(c, b->left, <ype);
3585 print_value(ltype, &left);
3586 free_value(ltype, &left);
3597 ###### Assignment statement
3599 An assignment will assign a value to a variable, providing it hasn't
3600 been declared as a constant. The analysis phase ensures that the type
3601 will be correct so the interpreter just needs to perform the
3602 calculation. There is a form of assignment which declares a new
3603 variable as well as assigning a value. If a name is assigned before
3604 it is declared, and error will be raised as the name is created as
3605 `Tlabel` and it is illegal to assign to such names.
3611 ###### declare terminals
3614 ###### SimpleStatement Grammar
3615 | Variable = Expression ${
3621 | VariableDecl = Expression ${
3629 if ($1->var->where_set == NULL) {
3631 "Variable declared with no type or value: %v",
3641 ###### print binode cases
3644 do_indent(indent, "");
3645 print_exec(b->left, indent, bracket);
3647 print_exec(b->right, indent, bracket);
3654 struct variable *v = cast(var, b->left)->var;
3655 do_indent(indent, "");
3656 print_exec(b->left, indent, bracket);
3657 if (cast(var, b->left)->var->constant) {
3659 if (v->where_decl == v->where_set) {
3660 type_print(v->type, stdout);
3665 if (v->where_decl == v->where_set) {
3666 type_print(v->type, stdout);
3672 print_exec(b->right, indent, bracket);
3679 ###### propagate binode cases
3683 /* Both must match and not be labels,
3684 * Type must support 'dup',
3685 * For Assign, left must not be constant.
3688 t = propagate_types(b->left, c, ok, NULL,
3689 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3694 if (propagate_types(b->right, c, ok, t, 0) != t)
3695 if (b->left->type == Xvar)
3696 type_err(c, "info: variable '%v' was set as %1 here.",
3697 cast(var, b->left)->var->where_set, t, rules, NULL);
3699 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3701 propagate_types(b->left, c, ok, t,
3702 (b->op == Assign ? Rnoconstant : 0));
3704 if (t && t->dup == NULL)
3705 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3710 ###### interp binode cases
3713 lleft = linterp_exec(c, b->left, <ype);
3714 right = interp_exec(c, b->right, &rtype);
3716 free_value(ltype, lleft);
3717 dup_value(ltype, &right, lleft);
3724 struct variable *v = cast(var, b->left)->var;
3727 val = var_value(c, v);
3728 if (v->type->prepare_type)
3729 v->type->prepare_type(c, v->type, 0);
3731 right = interp_exec(c, b->right, &rtype);
3732 memcpy(val, &right, rtype->size);
3735 val_init(v->type, val);
3740 ### The `use` statement
3742 The `use` statement is the last "simple" statement. It is needed when
3743 the condition in a conditional statement is a block. `use` works much
3744 like `return` in C, but only completes the `condition`, not the whole
3750 ###### expr precedence
3753 ###### SimpleStatement Grammar
3755 $0 = new_pos(binode, $1);
3758 if ($0->right->type == Xvar) {
3759 struct var *v = cast(var, $0->right);
3760 if (v->var->type == Tnone) {
3761 /* Convert this to a label */
3764 v->var->type = Tlabel;
3765 val = global_alloc(c, Tlabel, v->var, NULL);
3771 ###### print binode cases
3774 do_indent(indent, "use ");
3775 print_exec(b->right, -1, bracket);
3780 ###### propagate binode cases
3783 /* result matches value */
3784 return propagate_types(b->right, c, ok, type, 0);
3786 ###### interp binode cases
3789 rv = interp_exec(c, b->right, &rvtype);
3792 ### The Conditional Statement
3794 This is the biggy and currently the only complex statement. This
3795 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3796 It is comprised of a number of parts, all of which are optional though
3797 set combinations apply. Each part is (usually) a key word (`then` is
3798 sometimes optional) followed by either an expression or a code block,
3799 except the `casepart` which is a "key word and an expression" followed
3800 by a code block. The code-block option is valid for all parts and,
3801 where an expression is also allowed, the code block can use the `use`
3802 statement to report a value. If the code block does not report a value
3803 the effect is similar to reporting `True`.
3805 The `else` and `case` parts, as well as `then` when combined with
3806 `if`, can contain a `use` statement which will apply to some
3807 containing conditional statement. `for` parts, `do` parts and `then`
3808 parts used with `for` can never contain a `use`, except in some
3809 subordinate conditional statement.
3811 If there is a `forpart`, it is executed first, only once.
3812 If there is a `dopart`, then it is executed repeatedly providing
3813 always that the `condpart` or `cond`, if present, does not return a non-True
3814 value. `condpart` can fail to return any value if it simply executes
3815 to completion. This is treated the same as returning `True`.
3817 If there is a `thenpart` it will be executed whenever the `condpart`
3818 or `cond` returns True (or does not return any value), but this will happen
3819 *after* `dopart` (when present).
3821 If `elsepart` is present it will be executed at most once when the
3822 condition returns `False` or some value that isn't `True` and isn't
3823 matched by any `casepart`. If there are any `casepart`s, they will be
3824 executed when the condition returns a matching value.
3826 The particular sorts of values allowed in case parts has not yet been
3827 determined in the language design, so nothing is prohibited.
3829 The various blocks in this complex statement potentially provide scope
3830 for variables as described earlier. Each such block must include the
3831 "OpenScope" nonterminal before parsing the block, and must call
3832 `var_block_close()` when closing the block.
3834 The code following "`if`", "`switch`" and "`for`" does not get its own
3835 scope, but is in a scope covering the whole statement, so names
3836 declared there cannot be redeclared elsewhere. Similarly the
3837 condition following "`while`" is in a scope the covers the body
3838 ("`do`" part) of the loop, and which does not allow conditional scope
3839 extension. Code following "`then`" (both looping and non-looping),
3840 "`else`" and "`case`" each get their own local scope.
3842 The type requirements on the code block in a `whilepart` are quite
3843 unusal. It is allowed to return a value of some identifiable type, in
3844 which case the loop aborts and an appropriate `casepart` is run, or it
3845 can return a Boolean, in which case the loop either continues to the
3846 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3847 This is different both from the `ifpart` code block which is expected to
3848 return a Boolean, or the `switchpart` code block which is expected to
3849 return the same type as the casepart values. The correct analysis of
3850 the type of the `whilepart` code block is the reason for the
3851 `Rboolok` flag which is passed to `propagate_types()`.
3853 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3854 defined. As there are two scopes which cover multiple parts - one for
3855 the whole statement and one for "while" and "do" - and as we will use
3856 the 'struct exec' to track scopes, we actually need two new types of
3857 exec. One is a `binode` for the looping part, the rest is the
3858 `cond_statement`. The `cond_statement` will use an auxilliary `struct
3859 casepart` to track a list of case parts.
3870 struct exec *action;
3871 struct casepart *next;
3873 struct cond_statement {
3875 struct exec *forpart, *condpart, *thenpart, *elsepart;
3876 struct binode *looppart;
3877 struct casepart *casepart;
3880 ###### ast functions
3882 static void free_casepart(struct casepart *cp)
3886 free_exec(cp->value);
3887 free_exec(cp->action);
3894 static void free_cond_statement(struct cond_statement *s)
3898 free_exec(s->forpart);
3899 free_exec(s->condpart);
3900 free_exec(s->looppart);
3901 free_exec(s->thenpart);
3902 free_exec(s->elsepart);
3903 free_casepart(s->casepart);
3907 ###### free exec cases
3908 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3910 ###### ComplexStatement Grammar
3911 | CondStatement ${ $0 = $<1; }$
3913 ###### expr precedence
3914 $TERM for then while do
3921 // A CondStatement must end with EOL, as does CondSuffix and
3923 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3924 // may or may not end with EOL
3925 // WhilePart and IfPart include an appropriate Suffix
3927 // ForPart, SwitchPart, and IfPart open scopes, o we have to close
3928 // them. WhilePart opens and closes its own scope.
3929 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3932 $0->thenpart = $<TP;
3933 $0->looppart = $<WP;
3934 var_block_close(c, CloseSequential, $0);
3936 | ForPart OptNL WhilePart CondSuffix ${
3939 $0->looppart = $<WP;
3940 var_block_close(c, CloseSequential, $0);
3942 | WhilePart CondSuffix ${
3944 $0->looppart = $<WP;
3946 | SwitchPart OptNL CasePart CondSuffix ${
3948 $0->condpart = $<SP;
3949 $CP->next = $0->casepart;
3950 $0->casepart = $<CP;
3951 var_block_close(c, CloseSequential, $0);
3953 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3955 $0->condpart = $<SP;
3956 $CP->next = $0->casepart;
3957 $0->casepart = $<CP;
3958 var_block_close(c, CloseSequential, $0);
3960 | IfPart IfSuffix ${
3962 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3963 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3964 // This is where we close an "if" statement
3965 var_block_close(c, CloseSequential, $0);
3968 CondSuffix -> IfSuffix ${
3971 | Newlines CasePart CondSuffix ${
3973 $CP->next = $0->casepart;
3974 $0->casepart = $<CP;
3976 | CasePart CondSuffix ${
3978 $CP->next = $0->casepart;
3979 $0->casepart = $<CP;
3982 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3983 | Newlines ElsePart ${ $0 = $<EP; }$
3984 | ElsePart ${$0 = $<EP; }$
3986 ElsePart -> else OpenBlock Newlines ${
3987 $0 = new(cond_statement);
3988 $0->elsepart = $<OB;
3989 var_block_close(c, CloseElse, $0->elsepart);
3991 | else OpenScope CondStatement ${
3992 $0 = new(cond_statement);
3993 $0->elsepart = $<CS;
3994 var_block_close(c, CloseElse, $0->elsepart);
3998 CasePart -> case Expression OpenScope ColonBlock ${
3999 $0 = calloc(1,sizeof(struct casepart));
4002 var_block_close(c, CloseParallel, $0->action);
4006 // These scopes are closed in CondStatement
4007 ForPart -> for OpenBlock ${
4011 ThenPart -> then OpenBlock ${
4013 var_block_close(c, CloseSequential, $0);
4017 // This scope is closed in CondStatement
4018 WhilePart -> while UseBlock OptNL do OpenBlock ${
4023 var_block_close(c, CloseSequential, $0->right);
4024 var_block_close(c, CloseSequential, $0);
4026 | while OpenScope Expression OpenScope ColonBlock ${
4031 var_block_close(c, CloseSequential, $0->right);
4032 var_block_close(c, CloseSequential, $0);
4036 IfPart -> if UseBlock OptNL then OpenBlock ${
4039 var_block_close(c, CloseParallel, $0.thenpart);
4041 | if OpenScope Expression OpenScope ColonBlock ${
4044 var_block_close(c, CloseParallel, $0.thenpart);
4046 | if OpenScope Expression OpenScope OptNL then Block ${
4049 var_block_close(c, CloseParallel, $0.thenpart);
4053 // This scope is closed in CondStatement
4054 SwitchPart -> switch OpenScope Expression ${
4057 | switch UseBlock ${
4061 ###### print binode cases
4063 if (b->left && b->left->type == Xbinode &&
4064 cast(binode, b->left)->op == Block) {
4066 do_indent(indent, "while {\n");
4068 do_indent(indent, "while\n");
4069 print_exec(b->left, indent+1, bracket);
4071 do_indent(indent, "} do {\n");
4073 do_indent(indent, "do\n");
4074 print_exec(b->right, indent+1, bracket);
4076 do_indent(indent, "}\n");
4078 do_indent(indent, "while ");
4079 print_exec(b->left, 0, bracket);
4084 print_exec(b->right, indent+1, bracket);
4086 do_indent(indent, "}\n");
4090 ###### print exec cases
4092 case Xcond_statement:
4094 struct cond_statement *cs = cast(cond_statement, e);
4095 struct casepart *cp;
4097 do_indent(indent, "for");
4098 if (bracket) printf(" {\n"); else printf("\n");
4099 print_exec(cs->forpart, indent+1, bracket);
4102 do_indent(indent, "} then {\n");
4104 do_indent(indent, "then\n");
4105 print_exec(cs->thenpart, indent+1, bracket);
4107 if (bracket) do_indent(indent, "}\n");
4110 print_exec(cs->looppart, indent, bracket);
4114 do_indent(indent, "switch");
4116 do_indent(indent, "if");
4117 if (cs->condpart && cs->condpart->type == Xbinode &&
4118 cast(binode, cs->condpart)->op == Block) {
4123 print_exec(cs->condpart, indent+1, bracket);
4125 do_indent(indent, "}\n");
4127 do_indent(indent, "then\n");
4128 print_exec(cs->thenpart, indent+1, bracket);
4132 print_exec(cs->condpart, 0, bracket);
4138 print_exec(cs->thenpart, indent+1, bracket);
4140 do_indent(indent, "}\n");
4145 for (cp = cs->casepart; cp; cp = cp->next) {
4146 do_indent(indent, "case ");
4147 print_exec(cp->value, -1, 0);
4152 print_exec(cp->action, indent+1, bracket);
4154 do_indent(indent, "}\n");
4157 do_indent(indent, "else");
4162 print_exec(cs->elsepart, indent+1, bracket);
4164 do_indent(indent, "}\n");
4169 ###### propagate binode cases
4171 t = propagate_types(b->right, c, ok, Tnone, 0);
4172 if (!type_compat(Tnone, t, 0))
4173 *ok = 0; // UNTESTED
4174 return propagate_types(b->left, c, ok, type, rules);
4176 ###### propagate exec cases
4177 case Xcond_statement:
4179 // forpart and looppart->right must return Tnone
4180 // thenpart must return Tnone if there is a loopart,
4181 // otherwise it is like elsepart.
4183 // be bool if there is no casepart
4184 // match casepart->values if there is a switchpart
4185 // either be bool or match casepart->value if there
4187 // elsepart and casepart->action must match the return type
4188 // expected of this statement.
4189 struct cond_statement *cs = cast(cond_statement, prog);
4190 struct casepart *cp;
4192 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
4193 if (!type_compat(Tnone, t, 0))
4194 *ok = 0; // UNTESTED
4197 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
4198 if (!type_compat(Tnone, t, 0))
4199 *ok = 0; // UNTESTED
4201 if (cs->casepart == NULL) {
4202 propagate_types(cs->condpart, c, ok, Tbool, 0);
4203 propagate_types(cs->looppart, c, ok, Tbool, 0);
4205 /* Condpart must match case values, with bool permitted */
4207 for (cp = cs->casepart;
4208 cp && !t; cp = cp->next)
4209 t = propagate_types(cp->value, c, ok, NULL, 0);
4210 if (!t && cs->condpart)
4211 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok); // UNTESTED
4212 if (!t && cs->looppart)
4213 t = propagate_types(cs->looppart, c, ok, NULL, Rboolok); // UNTESTED
4214 // Now we have a type (I hope) push it down
4216 for (cp = cs->casepart; cp; cp = cp->next)
4217 propagate_types(cp->value, c, ok, t, 0);
4218 propagate_types(cs->condpart, c, ok, t, Rboolok);
4219 propagate_types(cs->looppart, c, ok, t, Rboolok);
4222 // (if)then, else, and case parts must return expected type.
4223 if (!cs->looppart && !type)
4224 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
4226 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
4227 for (cp = cs->casepart;
4229 cp = cp->next) // UNTESTED
4230 type = propagate_types(cp->action, c, ok, NULL, rules); // UNTESTED
4233 propagate_types(cs->thenpart, c, ok, type, rules);
4234 propagate_types(cs->elsepart, c, ok, type, rules);
4235 for (cp = cs->casepart; cp ; cp = cp->next)
4236 propagate_types(cp->action, c, ok, type, rules);
4242 ###### interp binode cases
4244 // This just performs one iterration of the loop
4245 rv = interp_exec(c, b->left, &rvtype);
4246 if (rvtype == Tnone ||
4247 (rvtype == Tbool && rv.bool != 0))
4248 // cnd is Tnone or Tbool, doesn't need to be freed
4249 interp_exec(c, b->right, NULL);
4252 ###### interp exec cases
4253 case Xcond_statement:
4255 struct value v, cnd;
4256 struct type *vtype, *cndtype;
4257 struct casepart *cp;
4258 struct cond_statement *cs = cast(cond_statement, e);
4261 interp_exec(c, cs->forpart, NULL);
4263 while ((cnd = interp_exec(c, cs->looppart, &cndtype)),
4264 cndtype == Tnone || (cndtype == Tbool && cnd.bool != 0))
4265 interp_exec(c, cs->thenpart, NULL);
4267 cnd = interp_exec(c, cs->condpart, &cndtype);
4268 if ((cndtype == Tnone ||
4269 (cndtype == Tbool && cnd.bool != 0))) {
4270 // cnd is Tnone or Tbool, doesn't need to be freed
4271 rv = interp_exec(c, cs->thenpart, &rvtype);
4272 // skip else (and cases)
4276 for (cp = cs->casepart; cp; cp = cp->next) {
4277 v = interp_exec(c, cp->value, &vtype);
4278 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
4279 free_value(vtype, &v);
4280 free_value(cndtype, &cnd);
4281 rv = interp_exec(c, cp->action, &rvtype);
4284 free_value(vtype, &v);
4286 free_value(cndtype, &cnd);
4288 rv = interp_exec(c, cs->elsepart, &rvtype);
4295 ### Top level structure
4297 All the language elements so far can be used in various places. Now
4298 it is time to clarify what those places are.
4300 At the top level of a file there will be a number of declarations.
4301 Many of the things that can be declared haven't been described yet,
4302 such as functions, procedures, imports, and probably more.
4303 For now there are two sorts of things that can appear at the top
4304 level. They are predefined constants, `struct` types, and the `main`
4305 function. While the syntax will allow the `main` function to appear
4306 multiple times, that will trigger an error if it is actually attempted.
4308 The various declarations do not return anything. They store the
4309 various declarations in the parse context.
4311 ###### Parser: grammar
4314 Ocean -> OptNL DeclarationList
4316 ## declare terminals
4323 DeclarationList -> Declaration
4324 | DeclarationList Declaration
4326 Declaration -> ERROR Newlines ${
4327 tok_err(c, // UNTESTED
4328 "error: unhandled parse error", &$1);
4334 ## top level grammar
4338 ### The `const` section
4340 As well as being defined in with the code that uses them, constants
4341 can be declared at the top level. These have full-file scope, so they
4342 are always `InScope`. The value of a top level constant can be given
4343 as an expression, and this is evaluated immediately rather than in the
4344 later interpretation stage. Once we add functions to the language, we
4345 will need rules concern which, if any, can be used to define a top
4348 Constants are defined in a section that starts with the reserved word
4349 `const` and then has a block with a list of assignment statements.
4350 For syntactic consistency, these must use the double-colon syntax to
4351 make it clear that they are constants. Type can also be given: if
4352 not, the type will be determined during analysis, as with other
4355 As the types constants are inserted at the head of a list, printing
4356 them in the same order that they were read is not straight forward.
4357 We take a quadratic approach here and count the number of constants
4358 (variables of depth 0), then count down from there, each time
4359 searching through for the Nth constant for decreasing N.
4361 ###### top level grammar
4365 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4366 | const { SimpleConstList } Newlines
4367 | const IN OptNL ConstList OUT Newlines
4368 | const SimpleConstList Newlines
4370 ConstList -> ConstList SimpleConstLine
4372 SimpleConstList -> SimpleConstList ; Const
4375 SimpleConstLine -> SimpleConstList Newlines
4376 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4379 CType -> Type ${ $0 = $<1; }$
4382 Const -> IDENTIFIER :: CType = Expression ${ {
4386 v = var_decl(c, $1.txt);
4388 struct var *var = new_pos(var, $1);
4389 v->where_decl = var;
4394 v = var_ref(c, $1.txt);
4395 tok_err(c, "error: name already declared", &$1);
4396 type_err(c, "info: this is where '%v' was first declared",
4397 v->where_decl, NULL, 0, NULL);
4401 propagate_types($5, c, &ok, $3, 0);
4406 struct value res = interp_exec(c, $5, &v->type);
4407 global_alloc(c, v->type, v, &res);
4411 ###### print const decls
4416 while (target != 0) {
4418 for (v = context.in_scope; v; v=v->in_scope)
4419 if (v->depth == 0) {
4430 struct value *val = var_value(&context, v);
4431 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4432 type_print(v->type, stdout);
4434 if (v->type == Tstr)
4436 print_value(v->type, val);
4437 if (v->type == Tstr)
4445 ### Finally the whole `main` function.
4447 An Ocean program can currently have only one function - `main` - and
4448 that must exist. It expects an array of strings with a provided size.
4449 Following this is a `block` which is the code to execute.
4451 As this is the top level, several things are handled a bit
4453 The function is not interpreted by `interp_exec` as that isn't
4454 passed the argument list which the program requires. Similarly type
4455 analysis is a bit more interesting at this level.
4457 ###### top level grammar
4459 DeclareFunction -> MainFunction ${ {
4461 type_err(c, "\"main\" defined a second time",
4467 ###### print binode cases
4470 do_indent(indent, "func main(");
4471 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4472 struct variable *v = cast(var, b2->left)->var;
4474 print_exec(b2->left, 0, 0);
4476 type_print(v->type, stdout);
4482 print_exec(b->right, indent+1, bracket);
4484 do_indent(indent, "}\n");
4487 ###### propagate binode cases
4489 case Func: abort(); // NOTEST
4491 ###### core functions
4493 static int analyse_prog(struct exec *prog, struct parse_context *c)
4495 struct binode *bp = cast(binode, prog);
4499 struct type *argv_type;
4500 struct text argv_type_name = { " argv", 5 };
4505 argv_type = add_type(c, argv_type_name, &array_prototype);
4506 argv_type->array.member = Tstr;
4507 argv_type->array.unspec = 1;
4509 for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
4513 propagate_types(b->left, c, &ok, argv_type, 0);
4515 default: /* invalid */ // NOTEST
4516 propagate_types(b->left, c, &ok, Tnone, 0); // NOTEST
4522 propagate_types(bp->right, c, &ok, Tnone, 0);
4527 /* Make sure everything is still consistent */
4528 propagate_types(bp->right, c, &ok, Tnone, 0);
4530 return 0; // UNTESTED
4535 static void interp_prog(struct parse_context *c, struct exec *prog,
4536 int argc, char **argv)
4538 struct binode *p = cast(binode, prog);
4546 al = cast(binode, p->left);
4548 struct var *v = cast(var, al->left);
4549 struct value *vl = var_value(c, v->var);
4559 mpq_set_ui(argcq, argc, 1);
4560 memcpy(var_value(c, t->array.vsize), &argcq, sizeof(argcq));
4561 t->prepare_type(c, t, 0);
4562 array_init(v->var->type, vl);
4563 for (i = 0; i < argc; i++) {
4564 struct value *vl2 = vl->array + i * v->var->type->array.member->size;
4567 arg.str.txt = argv[i];
4568 arg.str.len = strlen(argv[i]);
4569 free_value(Tstr, vl2);
4570 dup_value(Tstr, &arg, vl2);
4574 al = cast(binode, al->right);
4576 v = interp_exec(c, p, &vtype);
4577 free_value(vtype, &v);
4580 ###### interp binode cases
4581 case List: abort(); // NOTEST
4584 rv = interp_exec(c, b->right, &rvtype);
4587 ## And now to test it out.
4589 Having a language requires having a "hello world" program. I'll
4590 provide a little more than that: a program that prints "Hello world"
4591 finds the GCD of two numbers, prints the first few elements of
4592 Fibonacci, performs a binary search for a number, and a few other
4593 things which will likely grow as the languages grows.
4595 ###### File: oceani.mk
4598 @echo "===== DEMO ====="
4599 ./oceani --section "demo: hello" oceani.mdc 55 33
4605 four ::= 2 + 2 ; five ::= 10/2
4606 const pie ::= "I like Pie";
4607 cake ::= "The cake is"
4618 print "Hello World, what lovely oceans you have!"
4619 print "Are there", five, "?"
4620 print pi, pie, "but", cake
4622 A := $argv[1]; B := $argv[2]
4624 /* When a variable is defined in both branches of an 'if',
4625 * and used afterwards, the variables are merged.
4631 print "Is", A, "bigger than", B,"? ", bigger
4632 /* If a variable is not used after the 'if', no
4633 * merge happens, so types can be different
4636 double:string = "yes"
4637 print A, "is more than twice", B, "?", double
4640 print "double", B, "is", double
4645 if a > 0 and then b > 0:
4651 print "GCD of", A, "and", B,"is", a
4653 print a, "is not positive, cannot calculate GCD"
4655 print b, "is not positive, cannot calculate GCD"
4660 print "Fibonacci:", f1,f2,
4661 then togo = togo - 1
4669 /* Binary search... */
4674 mid := (lo + hi) / 2
4687 print "Yay, I found", target
4689 print "Closest I found was", lo
4694 // "middle square" PRNG. Not particularly good, but one my
4695 // Dad taught me - the first one I ever heard of.
4696 for i:=1; then i = i + 1; while i < size:
4697 n := list[i-1] * list[i-1]
4698 list[i] = (n / 100) % 10 000
4700 print "Before sort:",
4701 for i:=0; then i = i + 1; while i < size:
4705 for i := 1; then i=i+1; while i < size:
4706 for j:=i-1; then j=j-1; while j >= 0:
4707 if list[j] > list[j+1]:
4711 print " After sort:",
4712 for i:=0; then i = i + 1; while i < size:
4716 if 1 == 2 then print "yes"; else print "no"
4720 bob.alive = (bob.name == "Hello")
4721 print "bob", "is" if bob.alive else "isn't", "alive"