1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs. This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
108 struct parse_context;
110 struct parse_context {
111 struct token_config config;
120 #define container_of(ptr, type, member) ({ \
121 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
122 (type *)( (char *)__mptr - offsetof(type,member) );})
124 #define config2context(_conf) container_of(_conf, struct parse_context, \
127 ###### Parser: reduce
128 struct parse_context *c = config2context(config);
136 #include <sys/mman.h>
155 static char Usage[] =
156 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
157 static const struct option long_options[] = {
158 {"trace", 0, NULL, 't'},
159 {"print", 0, NULL, 'p'},
160 {"noexec", 0, NULL, 'n'},
161 {"brackets", 0, NULL, 'b'},
162 {"section", 1, NULL, 's'},
165 const char *options = "tpnbs";
166 int main(int argc, char *argv[])
171 struct section *s, *ss;
172 char *section = NULL;
173 struct parse_context context = {
175 .ignored = (1 << TK_mark),
176 .number_chars = ".,_+- ",
181 int doprint=0, dotrace=0, doexec=1, brackets=0;
183 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
186 case 't': dotrace=1; break;
187 case 'p': doprint=1; break;
188 case 'n': doexec=0; break;
189 case 'b': brackets=1; break;
190 case 's': section = optarg; break;
191 default: fprintf(stderr, Usage);
195 if (optind >= argc) {
196 fprintf(stderr, "oceani: no input file given\n");
199 fd = open(argv[optind], O_RDONLY);
201 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
204 context.file_name = argv[optind];
205 len = lseek(fd, 0, 2);
206 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
207 s = code_extract(file, file+len, NULL);
209 fprintf(stderr, "oceani: could not find any code in %s\n",
214 ## context initialization
217 for (ss = s; ss; ss = ss->next) {
218 struct text sec = ss->section;
219 if (sec.len == strlen(section) &&
220 strncmp(sec.txt, section, sec.len) == 0)
224 fprintf(stderr, "oceani: cannot find section %s\n",
230 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
233 fprintf(stderr, "oceani: no program found.\n");
234 context.parse_error = 1;
236 if (context.prog && doprint) {
239 print_exec(context.prog, 0, brackets);
241 if (context.prog && doexec && !context.parse_error) {
242 if (!analyse_prog(context.prog, &context)) {
243 fprintf(stderr, "oceani: type error in program - not running.\n");
246 interp_prog(&context, context.prog, argc - optind, argv+optind);
248 free_exec(context.prog);
251 struct section *t = s->next;
257 ## free context types
258 exit(context.parse_error ? 1 : 0);
263 The four requirements of parse, analyse, print, interpret apply to
264 each language element individually so that is how most of the code
267 Three of the four are fairly self explanatory. The one that requires
268 a little explanation is the analysis step.
270 The current language design does not require the types of variables to
271 be declared, but they must still have a single type. Different
272 operations impose different requirements on the variables, for example
273 addition requires both arguments to be numeric, and assignment
274 requires the variable on the left to have the same type as the
275 expression on the right.
277 Analysis involves propagating these type requirements around and
278 consequently setting the type of each variable. If any requirements
279 are violated (e.g. a string is compared with a number) or if a
280 variable needs to have two different types, then an error is raised
281 and the program will not run.
283 If the same variable is declared in both branchs of an 'if/else', or
284 in all cases of a 'switch' then the multiple instances may be merged
285 into just one variable if the variable is referenced after the
286 conditional statement. When this happens, the types must naturally be
287 consistent across all the branches. When the variable is not used
288 outside the if, the variables in the different branches are distinct
289 and can be of different types.
291 Undeclared names may only appear in "use" statements and "case" expressions.
292 These names are given a type of "label" and a unique value.
293 This allows them to fill the role of a name in an enumerated type, which
294 is useful for testing the `switch` statement.
296 As we will see, the condition part of a `while` statement can return
297 either a Boolean or some other type. This requires that the expected
298 type that gets passed around comprises a type and a flag to indicate
299 that `Tbool` is also permitted.
301 As there are, as yet, no distinct types that are compatible, there
302 isn't much subtlety in the analysis. When we have distinct number
303 types, this will become more interesting.
307 When analysis discovers an inconsistency it needs to report an error;
308 just refusing to run the code ensures that the error doesn't cascade,
309 but by itself it isn't very useful. A clear understanding of the sort
310 of error message that are useful will help guide the process of
313 At a simplistic level, the only sort of error that type analysis can
314 report is that the type of some construct doesn't match a contextual
315 requirement. For example, in `4 + "hello"` the addition provides a
316 contextual requirement for numbers, but `"hello"` is not a number. In
317 this particular example no further information is needed as the types
318 are obvious from local information. When a variable is involved that
319 isn't the case. It may be helpful to explain why the variable has a
320 particular type, by indicating the location where the type was set,
321 whether by declaration or usage.
323 Using a recursive-descent analysis we can easily detect a problem at
324 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
325 will detect that one argument is not a number and the usage of `hello`
326 will detect that a number was wanted, but not provided. In this
327 (early) version of the language, we will generate error reports at
328 multiple locations, so the use of `hello` will report an error and
329 explain were the value was set, and the addition will report an error
330 and say why numbers are needed. To be able to report locations for
331 errors, each language element will need to record a file location
332 (line and column) and each variable will need to record the language
333 element where its type was set. For now we will assume that each line
334 of an error message indicates one location in the file, and up to 2
335 types. So we provide a `printf`-like function which takes a format, a
336 location (a `struct exec` which has not yet been introduced), and 2
337 types. "`%1`" reports the first type, "`%2`" reports the second. We
338 will need a function to print the location, once we know how that is
339 stored. e As will be explained later, there are sometimes extra rules for
340 type matching and they might affect error messages, we need to pass those
343 As well as type errors, we sometimes need to report problems with
344 tokens, which might be unexpected or might name a type that has not
345 been defined. For these we have `tok_err()` which reports an error
346 with a given token. Each of the error functions sets the flag in the
347 context so indicate that parsing failed.
351 static void fput_loc(struct exec *loc, FILE *f);
353 ###### core functions
355 static void type_err(struct parse_context *c,
356 char *fmt, struct exec *loc,
357 struct type *t1, int rules, struct type *t2)
359 fprintf(stderr, "%s:", c->file_name);
360 fput_loc(loc, stderr);
361 for (; *fmt ; fmt++) {
368 case '%': fputc(*fmt, stderr); break; // NOTEST
369 default: fputc('?', stderr); break; // NOTEST
371 type_print(t1, stderr);
374 type_print(t2, stderr);
383 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
385 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
386 t->txt.len, t->txt.txt);
390 ## Entities: declared and predeclared.
392 There are various "things" that the language and/or the interpreter
393 needs to know about to parse and execute a program. These include
394 types, variables, values, and executable code. These are all lumped
395 together under the term "entities" (calling them "objects" would be
396 confusing) and introduced here. The following section will present the
397 different specific code elements which comprise or manipulate these
402 Values come in a wide range of types, with more likely to be added.
403 Each type needs to be able to print its own values (for convenience at
404 least) as well as to compare two values, at least for equality and
405 possibly for order. For now, values might need to be duplicated and
406 freed, though eventually such manipulations will be better integrated
409 Rather than requiring every numeric type to support all numeric
410 operations (add, multiple, etc), we allow types to be able to present
411 as one of a few standard types: integer, float, and fraction. The
412 existence of these conversion functions eventually enable types to
413 determine if they are compatible with other types, though such types
414 have not yet been implemented.
416 Named type are stored in a simple linked list. Objects of each type are
417 "values" which are often passed around by value.
424 ## value union fields
432 void (*init)(struct type *type, struct value *val);
433 void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
434 void (*print)(struct type *type, struct value *val);
435 void (*print_type)(struct type *type, FILE *f);
436 int (*cmp_order)(struct type *t1, struct type *t2,
437 struct value *v1, struct value *v2);
438 int (*cmp_eq)(struct type *t1, struct type *t2,
439 struct value *v1, struct value *v2);
440 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
441 void (*free)(struct type *type, struct value *val);
442 void (*free_type)(struct type *t);
443 long long (*to_int)(struct value *v);
444 double (*to_float)(struct value *v);
445 int (*to_mpq)(mpq_t *q, struct value *v);
454 struct type *typelist;
458 static struct type *find_type(struct parse_context *c, struct text s)
460 struct type *l = c->typelist;
463 text_cmp(l->name, s) != 0)
468 static struct type *add_type(struct parse_context *c, struct text s,
473 n = calloc(1, sizeof(*n));
476 n->next = c->typelist;
481 static void free_type(struct type *t)
483 /* The type is always a reference to something in the
484 * context, so we don't need to free anything.
488 static void free_value(struct type *type, struct value *v)
494 static void type_print(struct type *type, FILE *f)
497 fputs("*unknown*type*", f);
498 else if (type->name.len)
499 fprintf(f, "%.*s", type->name.len, type->name.txt);
500 else if (type->print_type)
501 type->print_type(type, f);
503 fputs("*invalid*type*", f); // NOTEST
506 static void val_init(struct type *type, struct value *val)
508 if (type && type->init)
509 type->init(type, val);
512 static void dup_value(struct type *type,
513 struct value *vold, struct value *vnew)
515 if (type && type->dup)
516 type->dup(type, vold, vnew);
519 static int value_cmp(struct type *tl, struct type *tr,
520 struct value *left, struct value *right)
522 if (tl && tl->cmp_order)
523 return tl->cmp_order(tl, tr, left, right);
524 if (tl && tl->cmp_eq)
525 return tl->cmp_eq(tl, tr, left, right);
529 static void print_value(struct type *type, struct value *v)
531 if (type && type->print)
532 type->print(type, v);
534 printf("*Unknown*"); // NOTEST
539 static void free_value(struct type *type, struct value *v);
540 static int type_compat(struct type *require, struct type *have, int rules);
541 static void type_print(struct type *type, FILE *f);
542 static void val_init(struct type *type, struct value *v);
543 static void dup_value(struct type *type,
544 struct value *vold, struct value *vnew);
545 static int value_cmp(struct type *tl, struct type *tr,
546 struct value *left, struct value *right);
547 static void print_value(struct type *type, struct value *v);
549 ###### free context types
551 while (context.typelist) {
552 struct type *t = context.typelist;
554 context.typelist = t->next;
562 Values of the base types can be numbers, which we represent as
563 multi-precision fractions, strings, Booleans and labels. When
564 analysing the program we also need to allow for places where no value
565 is meaningful (type `Tnone`) and where we don't know what type to
566 expect yet (type is `NULL`).
568 Values are never shared, they are always copied when used, and freed
569 when no longer needed.
571 When propagating type information around the program, we need to
572 determine if two types are compatible, where type `NULL` is compatible
573 with anything. There are two special cases with type compatibility,
574 both related to the Conditional Statement which will be described
575 later. In some cases a Boolean can be accepted as well as some other
576 primary type, and in others any type is acceptable except a label (`Vlabel`).
577 A separate function encoding these cases will simplify some code later.
579 ###### type functions
581 int (*compat)(struct type *this, struct type *other);
585 static int type_compat(struct type *require, struct type *have, int rules)
587 if ((rules & Rboolok) && have == Tbool)
589 if ((rules & Rnolabel) && have == Tlabel)
591 if (!require || !have)
595 return require->compat(require, have);
597 return require == have;
602 #include "parse_string.h"
603 #include "parse_number.h"
606 myLDLIBS := libnumber.o libstring.o -lgmp
607 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
609 ###### type union fields
610 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
612 ###### value union fields
619 static void _free_value(struct type *type, struct value *v)
623 switch (type->vtype) {
625 case Vstr: free(v->str.txt); break;
626 case Vnum: mpq_clear(v->num); break;
632 ###### value functions
634 static void _val_init(struct type *type, struct value *val)
636 switch(type->vtype) {
637 case Vnone: // NOTEST
640 mpq_init(val->num); break;
642 val->str.txt = malloc(1);
648 case Vlabel: // NOTEST
649 val->label = NULL; // NOTEST
654 static void _dup_value(struct type *type,
655 struct value *vold, struct value *vnew)
657 switch (type->vtype) {
658 case Vnone: // NOTEST
661 vnew->label = vold->label;
664 vnew->bool = vold->bool;
668 mpq_set(vnew->num, vold->num);
671 vnew->str.len = vold->str.len;
672 vnew->str.txt = malloc(vnew->str.len);
673 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
678 static int _value_cmp(struct type *tl, struct type *tr,
679 struct value *left, struct value *right)
683 return tl - tr; // NOTEST
685 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
686 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
687 case Vstr: cmp = text_cmp(left->str, right->str); break;
688 case Vbool: cmp = left->bool - right->bool; break;
689 case Vnone: cmp = 0; // NOTEST
694 static void _print_value(struct type *type, struct value *v)
696 switch (type->vtype) {
697 case Vnone: // NOTEST
698 printf("*no-value*"); break; // NOTEST
699 case Vlabel: // NOTEST
700 printf("*label-%p*", v->label); break; // NOTEST
702 printf("%.*s", v->str.len, v->str.txt); break;
704 printf("%s", v->bool ? "True":"False"); break;
709 mpf_set_q(fl, v->num);
710 gmp_printf("%Fg", fl);
717 static void _free_value(struct type *type, struct value *v);
719 static struct type base_prototype = {
721 .print = _print_value,
722 .cmp_order = _value_cmp,
723 .cmp_eq = _value_cmp,
728 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
731 static struct type *add_base_type(struct parse_context *c, char *n,
732 enum vtype vt, int size)
734 struct text txt = { n, strlen(n) };
737 t = add_type(c, txt, &base_prototype);
740 t->align = size > sizeof(void*) ? sizeof(void*) : size;
741 if (t->size & (t->align - 1))
742 t->size = (t->size | (t->align - 1)) + 1;
746 ###### context initialization
748 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
749 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
750 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
751 Tnone = add_base_type(&context, "none", Vnone, 0);
752 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
756 Variables are scoped named values. We store the names in a linked list
757 of "bindings" sorted in lexical order, and use sequential search and
764 struct binding *next; // in lexical order
768 This linked list is stored in the parse context so that "reduce"
769 functions can find or add variables, and so the analysis phase can
770 ensure that every variable gets a type.
774 struct binding *varlist; // In lexical order
778 static struct binding *find_binding(struct parse_context *c, struct text s)
780 struct binding **l = &c->varlist;
785 (cmp = text_cmp((*l)->name, s)) < 0)
789 n = calloc(1, sizeof(*n));
796 Each name can be linked to multiple variables defined in different
797 scopes. Each scope starts where the name is declared and continues
798 until the end of the containing code block. Scopes of a given name
799 cannot nest, so a declaration while a name is in-scope is an error.
801 ###### binding fields
802 struct variable *var;
806 struct variable *previous;
808 struct binding *name;
809 struct exec *where_decl;// where name was declared
810 struct exec *where_set; // where type was set
814 While the naming seems strange, we include local constants in the
815 definition of variables. A name declared `var := value` can
816 subsequently be changed, but a name declared `var ::= value` cannot -
819 ###### variable fields
822 Scopes in parallel branches can be partially merged. More
823 specifically, if a given name is declared in both branches of an
824 if/else then its scope is a candidate for merging. Similarly if
825 every branch of an exhaustive switch (e.g. has an "else" clause)
826 declares a given name, then the scopes from the branches are
827 candidates for merging.
829 Note that names declared inside a loop (which is only parallel to
830 itself) are never visible after the loop. Similarly names defined in
831 scopes which are not parallel, such as those started by `for` and
832 `switch`, are never visible after the scope. Only variables defined in
833 both `then` and `else` (including the implicit then after an `if`, and
834 excluding `then` used with `for`) and in all `case`s and `else` of a
835 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
837 Labels, which are a bit like variables, follow different rules.
838 Labels are not explicitly declared, but if an undeclared name appears
839 in a context where a label is legal, that effectively declares the
840 name as a label. The declaration remains in force (or in scope) at
841 least to the end of the immediately containing block and conditionally
842 in any larger containing block which does not declare the name in some
843 other way. Importantly, the conditional scope extension happens even
844 if the label is only used in one parallel branch of a conditional --
845 when used in one branch it is treated as having been declared in all
848 Merge candidates are tentatively visible beyond the end of the
849 branching statement which creates them. If the name is used, the
850 merge is affirmed and they become a single variable visible at the
851 outer layer. If not - if it is redeclared first - the merge lapses.
853 To track scopes we have an extra stack, implemented as a linked list,
854 which roughly parallels the parse stack and which is used exclusively
855 for scoping. When a new scope is opened, a new frame is pushed and
856 the child-count of the parent frame is incremented. This child-count
857 is used to distinguish between the first of a set of parallel scopes,
858 in which declared variables must not be in scope, and subsequent
859 branches, whether they may already be conditionally scoped.
861 To push a new frame *before* any code in the frame is parsed, we need a
862 grammar reduction. This is most easily achieved with a grammar
863 element which derives the empty string, and creates the new scope when
864 it is recognised. This can be placed, for example, between a keyword
865 like "if" and the code following it.
869 struct scope *parent;
875 struct scope *scope_stack;
878 static void scope_pop(struct parse_context *c)
880 struct scope *s = c->scope_stack;
882 c->scope_stack = s->parent;
887 static void scope_push(struct parse_context *c)
889 struct scope *s = calloc(1, sizeof(*s));
891 c->scope_stack->child_count += 1;
892 s->parent = c->scope_stack;
900 OpenScope -> ${ scope_push(c); }$
901 ClosePara -> ${ var_block_close(c, CloseParallel); }$
903 Each variable records a scope depth and is in one of four states:
905 - "in scope". This is the case between the declaration of the
906 variable and the end of the containing block, and also between
907 the usage with affirms a merge and the end of that block.
909 The scope depth is not greater than the current parse context scope
910 nest depth. When the block of that depth closes, the state will
911 change. To achieve this, all "in scope" variables are linked
912 together as a stack in nesting order.
914 - "pending". The "in scope" block has closed, but other parallel
915 scopes are still being processed. So far, every parallel block at
916 the same level that has closed has declared the name.
918 The scope depth is the depth of the last parallel block that
919 enclosed the declaration, and that has closed.
921 - "conditionally in scope". The "in scope" block and all parallel
922 scopes have closed, and no further mention of the name has been
923 seen. This state includes a secondary nest depth which records the
924 outermost scope seen since the variable became conditionally in
925 scope. If a use of the name is found, the variable becomes "in
926 scope" and that secondary depth becomes the recorded scope depth.
927 If the name is declared as a new variable, the old variable becomes
928 "out of scope" and the recorded scope depth stays unchanged.
930 - "out of scope". The variable is neither in scope nor conditionally
931 in scope. It is permanently out of scope now and can be removed from
932 the "in scope" stack.
934 ###### variable fields
935 int depth, min_depth;
936 enum { OutScope, PendingScope, CondScope, InScope } scope;
937 struct variable *in_scope;
941 struct variable *in_scope;
943 All variables with the same name are linked together using the
944 'previous' link. Those variable that have been affirmatively merged all
945 have a 'merged' pointer that points to one primary variable - the most
946 recently declared instance. When merging variables, we need to also
947 adjust the 'merged' pointer on any other variables that had previously
948 been merged with the one that will no longer be primary.
950 A variable that is no longer the most recent instance of a name may
951 still have "pending" scope, if it might still be merged with most
952 recent instance. These variables don't really belong in the
953 "in_scope" list, but are not immediately removed when a new instance
954 is found. Instead, they are detected and ignored when considering the
955 list of in_scope names.
957 The storage of the value of a variable will be described later. For now
958 we just need to know that when a variable goes out of scope, it might
959 need to be freed. For this we need to be able to find it, so assume that
960 `var_value()` will provide that.
962 ###### variable fields
963 struct variable *merged;
967 static void variable_merge(struct variable *primary, struct variable *secondary)
973 primary = primary->merged;
975 for (v = primary->previous; v; v=v->previous)
976 if (v == secondary || v == secondary->merged ||
977 v->merged == secondary ||
978 (v->merged && v->merged == secondary->merged)) {
985 static struct value *var_value(struct parse_context *c, struct variable *v);
987 ###### free context vars
989 while (context.varlist) {
990 struct binding *b = context.varlist;
991 struct variable *v = b->var;
992 context.varlist = b->next;
995 struct variable *t = v;
998 free_value(t->type, var_value(&context, t));
1000 // This is a global constant
1001 free_exec(t->where_decl);
1006 #### Manipulating Bindings
1008 When a name is conditionally visible, a new declaration discards the
1009 old binding - the condition lapses. Conversely a usage of the name
1010 affirms the visibility and extends it to the end of the containing
1011 block - i.e. the block that contains both the original declaration and
1012 the latest usage. This is determined from `min_depth`. When a
1013 conditionally visible variable gets affirmed like this, it is also
1014 merged with other conditionally visible variables with the same name.
1016 When we parse a variable declaration we either report an error if the
1017 name is currently bound, or create a new variable at the current nest
1018 depth if the name is unbound or bound to a conditionally scoped or
1019 pending-scope variable. If the previous variable was conditionally
1020 scoped, it and its homonyms becomes out-of-scope.
1022 When we parse a variable reference (including non-declarative assignment
1023 "foo = bar") we report an error if the name is not bound or is bound to
1024 a pending-scope variable; update the scope if the name is bound to a
1025 conditionally scoped variable; or just proceed normally if the named
1026 variable is in scope.
1028 When we exit a scope, any variables bound at this level are either
1029 marked out of scope or pending-scoped, depending on whether the scope
1030 was sequential or parallel. Here a "parallel" scope means the "then"
1031 or "else" part of a conditional, or any "case" or "else" branch of a
1032 switch. Other scopes are "sequential".
1034 When exiting a parallel scope we check if there are any variables that
1035 were previously pending and are still visible. If there are, then
1036 there weren't redeclared in the most recent scope, so they cannot be
1037 merged and must become out-of-scope. If it is not the first of
1038 parallel scopes (based on `child_count`), we check that there was a
1039 previous binding that is still pending-scope. If there isn't, the new
1040 variable must now be out-of-scope.
1042 When exiting a sequential scope that immediately enclosed parallel
1043 scopes, we need to resolve any pending-scope variables. If there was
1044 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1045 we need to mark all pending-scope variable as out-of-scope. Otherwise
1046 all pending-scope variables become conditionally scoped.
1049 enum closetype { CloseSequential, CloseParallel, CloseElse };
1051 ###### ast functions
1053 static struct variable *var_decl(struct parse_context *c, struct text s)
1055 struct binding *b = find_binding(c, s);
1056 struct variable *v = b->var;
1058 switch (v ? v->scope : OutScope) {
1060 /* Caller will report the error */
1064 v && v->scope == CondScope;
1066 v->scope = OutScope;
1070 v = calloc(1, sizeof(*v));
1071 v->previous = b->var;
1074 v->min_depth = v->depth = c->scope_depth;
1076 v->in_scope = c->in_scope;
1081 static struct variable *var_ref(struct parse_context *c, struct text s)
1083 struct binding *b = find_binding(c, s);
1084 struct variable *v = b->var;
1085 struct variable *v2;
1087 switch (v ? v->scope : OutScope) {
1090 /* Caller will report the error */
1093 /* All CondScope variables of this name need to be merged
1094 * and become InScope
1096 v->depth = v->min_depth;
1098 for (v2 = v->previous;
1099 v2 && v2->scope == CondScope;
1101 variable_merge(v, v2);
1109 static void var_block_close(struct parse_context *c, enum closetype ct)
1111 /* Close off all variables that are in_scope */
1112 struct variable *v, **vp, *v2;
1115 for (vp = &c->in_scope;
1116 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1118 if (v->name->var == v) switch (ct) {
1120 case CloseParallel: /* handle PendingScope */
1124 if (c->scope_stack->child_count == 1)
1125 v->scope = PendingScope;
1126 else if (v->previous &&
1127 v->previous->scope == PendingScope)
1128 v->scope = PendingScope;
1129 else if (v->type == Tlabel)
1130 v->scope = PendingScope;
1131 else if (v->name->var == v)
1132 v->scope = OutScope;
1133 if (ct == CloseElse) {
1134 /* All Pending variables with this name
1135 * are now Conditional */
1137 v2 && v2->scope == PendingScope;
1139 v2->scope = CondScope;
1144 v2 && v2->scope == PendingScope;
1146 if (v2->type != Tlabel)
1147 v2->scope = OutScope;
1149 case OutScope: break;
1152 case CloseSequential:
1153 if (v->type == Tlabel)
1154 v->scope = PendingScope;
1157 v->scope = OutScope;
1160 /* There was no 'else', so we can only become
1161 * conditional if we know the cases were exhaustive,
1162 * and that doesn't mean anything yet.
1163 * So only labels become conditional..
1166 v2 && v2->scope == PendingScope;
1168 if (v2->type == Tlabel) {
1169 v2->scope = CondScope;
1170 v2->min_depth = c->scope_depth;
1172 v2->scope = OutScope;
1175 case OutScope: break;
1179 if (v->scope == OutScope || v->name->var != v)
1188 The value of a variable is store separately from the variable, on an
1189 analogue of a stack frame. There are (currently) two frames that can be
1190 active. A global frame which currently only stores constants, and a
1191 stacked frame which stores local variables. Each variable knows if it
1192 is global or not, and what its index into the frame is.
1194 Values in the global frame are known immediately they are relevant, so
1195 the frame needs to be reallocated as it grows so it can store those
1196 values. The local frame doesn't get values until the interpreted phase
1197 is started, so there is no need to allocate until the size is known.
1199 ###### variable fields
1203 ###### parse context
1205 short global_size, global_alloc;
1207 void *global, *local;
1209 ###### ast functions
1211 static struct value *var_value(struct parse_context *c, struct variable *v)
1214 if (!c->local || !v->type)
1216 if (v->frame_pos + v->type->size > c->local_size) {
1217 printf("INVALID frame_pos\n"); // NOTEST
1220 return c->local + v->frame_pos;
1222 if (c->global_size > c->global_alloc) {
1223 int old = c->global_alloc;
1224 c->global_alloc = (c->global_size | 1023) + 1024;
1225 c->global = realloc(c->global, c->global_alloc);
1226 memset(c->global + old, 0, c->global_alloc - old);
1228 return c->global + v->frame_pos;
1231 static struct value *global_alloc(struct parse_context *c, struct type *t,
1232 struct variable *v, struct value *init)
1235 struct variable scratch;
1237 if (t->prepare_type)
1238 t->prepare_type(c, t, 1);
1240 if (c->global_size & (t->align - 1))
1241 c->global_size = (c->global_size + t->align) & ~(t->align-1);
1246 v->frame_pos = c->global_size;
1248 c->global_size += v->type->size;
1249 ret = var_value(c, v);
1251 memcpy(ret, init, t->size);
1257 As global values are found -- struct field initializers, labels etc --
1258 `global_alloc()` is called to record the value in the global frame.
1260 When the program is fully parsed, we need to walk the list of variables
1261 to find any that weren't merged away and that aren't global, and to
1262 calculate the frame size and assign a frame position for each variable.
1263 For this we have `scope_finalize()`.
1265 ###### ast functions
1267 static void scope_finalize(struct parse_context *c)
1271 for (b = c->varlist; b; b = b->next) {
1273 for (v = b->var; v; v = v->previous) {
1274 struct type *t = v->type;
1275 if (v->merged && v->merged != v)
1279 if (c->local_size & (t->align - 1))
1280 c->local_size = (c->local_size + t->align) & ~(t->align-1);
1281 v->frame_pos = c->local_size;
1282 c->local_size += v->type->size;
1285 c->local = calloc(1, c->local_size);
1288 ###### free context vars
1289 free(context.global);
1290 free(context.local);
1294 Executables can be lots of different things. In many cases an
1295 executable is just an operation combined with one or two other
1296 executables. This allows for expressions and lists etc. Other times an
1297 executable is something quite specific like a constant or variable name.
1298 So we define a `struct exec` to be a general executable with a type, and
1299 a `struct binode` which is a subclass of `exec`, forms a node in a
1300 binary tree, and holds an operation. There will be other subclasses,
1301 and to access these we need to be able to `cast` the `exec` into the
1302 various other types. The first field in any `struct exec` is the type
1303 from the `exec_types` enum.
1306 #define cast(structname, pointer) ({ \
1307 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1308 if (__mptr && *__mptr != X##structname) abort(); \
1309 (struct structname *)( (char *)__mptr);})
1311 #define new(structname) ({ \
1312 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1313 __ptr->type = X##structname; \
1314 __ptr->line = -1; __ptr->column = -1; \
1317 #define new_pos(structname, token) ({ \
1318 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1319 __ptr->type = X##structname; \
1320 __ptr->line = token.line; __ptr->column = token.col; \
1329 enum exec_types type;
1337 struct exec *left, *right;
1340 ###### ast functions
1342 static int __fput_loc(struct exec *loc, FILE *f)
1346 if (loc->line >= 0) {
1347 fprintf(f, "%d:%d: ", loc->line, loc->column);
1350 if (loc->type == Xbinode)
1351 return __fput_loc(cast(binode,loc)->left, f) ||
1352 __fput_loc(cast(binode,loc)->right, f);
1355 static void fput_loc(struct exec *loc, FILE *f)
1357 if (!__fput_loc(loc, f))
1358 fprintf(f, "??:??: "); // NOTEST
1361 Each different type of `exec` node needs a number of functions defined,
1362 a bit like methods. We must be able to free it, print it, analyse it
1363 and execute it. Once we have specific `exec` types we will need to
1364 parse them too. Let's take this a bit more slowly.
1368 The parser generator requires a `free_foo` function for each struct
1369 that stores attributes and they will often be `exec`s and subtypes
1370 there-of. So we need `free_exec` which can handle all the subtypes,
1371 and we need `free_binode`.
1373 ###### ast functions
1375 static void free_binode(struct binode *b)
1380 free_exec(b->right);
1384 ###### core functions
1385 static void free_exec(struct exec *e)
1394 ###### forward decls
1396 static void free_exec(struct exec *e);
1398 ###### free exec cases
1399 case Xbinode: free_binode(cast(binode, e)); break;
1403 Printing an `exec` requires that we know the current indent level for
1404 printing line-oriented components. As will become clear later, we
1405 also want to know what sort of bracketing to use.
1407 ###### ast functions
1409 static void do_indent(int i, char *str)
1416 ###### core functions
1417 static void print_binode(struct binode *b, int indent, int bracket)
1421 ## print binode cases
1425 static void print_exec(struct exec *e, int indent, int bracket)
1431 print_binode(cast(binode, e), indent, bracket); break;
1436 ###### forward decls
1438 static void print_exec(struct exec *e, int indent, int bracket);
1442 As discussed, analysis involves propagating type requirements around the
1443 program and looking for errors.
1445 So `propagate_types` is passed an expected type (being a `struct type`
1446 pointer together with some `val_rules` flags) that the `exec` is
1447 expected to return, and returns the type that it does return, either
1448 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1449 by reference. It is set to `0` when an error is found, and `2` when
1450 any change is made. If it remains unchanged at `1`, then no more
1451 propagation is needed.
1455 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1459 if (rules & Rnolabel)
1460 fputs(" (labels not permitted)", stderr);
1463 ###### core functions
1465 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1466 struct type *type, int rules);
1467 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1468 struct type *type, int rules)
1475 switch (prog->type) {
1478 struct binode *b = cast(binode, prog);
1480 ## propagate binode cases
1484 ## propagate exec cases
1489 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1490 struct type *type, int rules)
1492 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1501 Interpreting an `exec` doesn't require anything but the `exec`. State
1502 is stored in variables and each variable will be directly linked from
1503 within the `exec` tree. The exception to this is the whole `program`
1504 which needs to look at command line arguments. The `program` will be
1505 interpreted separately.
1507 Each `exec` can return a value combined with a type in `struct lrval`.
1508 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1509 the location of a value, which can be updated, in `lval`. Others will
1510 set `lval` to NULL indicating that there is a value of appropriate type
1514 ###### core functions
1518 struct value rval, *lval;
1521 static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
1523 static struct value interp_exec(struct parse_context *c, struct exec *e,
1524 struct type **typeret)
1526 struct lrval ret = _interp_exec(c, e);
1528 if (!ret.type) abort();
1530 *typeret = ret.type;
1532 dup_value(ret.type, ret.lval, &ret.rval);
1536 static struct value *linterp_exec(struct parse_context *c, struct exec *e,
1537 struct type **typeret)
1539 struct lrval ret = _interp_exec(c, e);
1542 *typeret = ret.type;
1544 free_value(ret.type, &ret.rval);
1548 static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
1551 struct value rv = {}, *lrv = NULL;
1552 struct type *rvtype;
1554 rvtype = ret.type = Tnone;
1564 struct binode *b = cast(binode, e);
1565 struct value left, right, *lleft;
1566 struct type *ltype, *rtype;
1567 ltype = rtype = Tnone;
1569 ## interp binode cases
1571 free_value(ltype, &left);
1572 free_value(rtype, &right);
1575 ## interp exec cases
1585 Now that we have the shape of the interpreter in place we can add some
1586 complex types and connected them in to the data structures and the
1587 different phases of parse, analyse, print, interpret.
1589 Thus far we have arrays and structs.
1593 Arrays can be declared by giving a size and a type, as `[size]type' so
1594 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1595 size can be either a literal number, or a named constant. Some day an
1596 arbitrary expression will be supported.
1598 Arrays cannot be assigned. When pointers are introduced we will also
1599 introduce array slices which can refer to part or all of an array -
1600 the assignment syntax will create a slice. For now, an array can only
1601 ever be referenced by the name it is declared with. It is likely that
1602 a "`copy`" primitive will eventually be define which can be used to
1603 make a copy of an array with controllable recursive depth.
1605 For now we have two sorts of array, those with fixed size either because
1606 it is given as a literal number or because it is a struct member (which
1607 cannot have a runtime-changing size), and those with a size that is
1608 determined at runtime - local variables with a const size. The former
1609 have their size calculated at parse time, the latter at run time.
1611 For the latter type, the `size` field of the type is the size of a
1612 pointer, and the array is reallocated every time it comes into scope.
1614 We differentiate struct fields with a const size from local variables
1615 with a const size by whether they are prepared at parse time or not.
1617 ###### type union fields
1622 struct variable *vsize;
1623 struct type *member;
1626 ###### value union fields
1627 void *array; // used if not static_size
1629 ###### value functions
1631 static void array_prepare_type(struct parse_context *c, struct type *type,
1634 struct value *vsize;
1636 if (!type->array.vsize || type->array.static_size)
1639 vsize = var_value(c, type->array.vsize);
1641 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
1642 type->array.size = mpz_get_si(q);
1646 type->array.static_size = 1;
1647 type->size = type->array.size * type->array.member->size;
1648 type->align = type->array.member->align;
1652 static void array_init(struct type *type, struct value *val)
1655 void *ptr = val->ptr;
1659 if (!type->array.static_size) {
1660 val->array = calloc(type->array.size,
1661 type->array.member->size);
1664 for (i = 0; i < type->array.size; i++) {
1666 v = (void*)ptr + i * type->array.member->size;
1667 val_init(type->array.member, v);
1671 static void array_free(struct type *type, struct value *val)
1674 void *ptr = val->ptr;
1676 if (!type->array.static_size)
1678 for (i = 0; i < type->array.size; i++) {
1680 v = (void*)ptr + i * type->array.member->size;
1681 free_value(type->array.member, v);
1683 if (!type->array.static_size)
1687 static int array_compat(struct type *require, struct type *have)
1689 if (have->compat != require->compat)
1691 /* Both are arrays, so we can look at details */
1692 if (!type_compat(require->array.member, have->array.member, 0))
1694 if (require->array.vsize == NULL && have->array.vsize == NULL)
1695 return require->array.size == have->array.size;
1697 return require->array.vsize == have->array.vsize;
1700 static void array_print_type(struct type *type, FILE *f)
1703 if (type->array.vsize) {
1704 struct binding *b = type->array.vsize->name;
1705 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1707 fprintf(f, "%d]", type->array.size);
1708 type_print(type->array.member, f);
1711 static struct type array_prototype = {
1713 .prepare_type = array_prepare_type,
1714 .print_type = array_print_type,
1715 .compat = array_compat,
1717 .size = sizeof(void*),
1718 .align = sizeof(void*),
1721 ###### declare terminals
1726 | [ NUMBER ] Type ${ {
1729 struct text noname = { "", 0 };
1732 $0 = t = add_type(c, noname, &array_prototype);
1733 t->array.member = $<4;
1734 t->array.vsize = NULL;
1735 if (number_parse(num, tail, $2.txt) == 0)
1736 tok_err(c, "error: unrecognised number", &$2);
1738 tok_err(c, "error: unsupported number suffix", &$2);
1740 t->array.size = mpz_get_ui(mpq_numref(num));
1741 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1742 tok_err(c, "error: array size must be an integer",
1744 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1745 tok_err(c, "error: array size is too large",
1749 t->array.static_size = 1;
1750 t->size = t->array.size * t->array.member->size;
1751 t->align = t->array.member->align;
1754 | [ IDENTIFIER ] Type ${ {
1755 struct variable *v = var_ref(c, $2.txt);
1756 struct text noname = { "", 0 };
1759 tok_err(c, "error: name undeclared", &$2);
1760 else if (!v->constant)
1761 tok_err(c, "error: array size must be a constant", &$2);
1763 $0 = add_type(c, noname, &array_prototype);
1764 $0->array.member = $<4;
1766 $0->array.vsize = v;
1772 ###### variable grammar
1774 | Variable [ Expression ] ${ {
1775 struct binode *b = new(binode);
1782 ###### print binode cases
1784 print_exec(b->left, -1, bracket);
1786 print_exec(b->right, -1, bracket);
1790 ###### propagate binode cases
1792 /* left must be an array, right must be a number,
1793 * result is the member type of the array
1795 propagate_types(b->right, c, ok, Tnum, 0);
1796 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1797 if (!t || t->compat != array_compat) {
1798 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1801 if (!type_compat(type, t->array.member, rules)) {
1802 type_err(c, "error: have %1 but need %2", prog,
1803 t->array.member, rules, type);
1805 return t->array.member;
1809 ###### interp binode cases
1815 lleft = linterp_exec(c, b->left, <ype);
1816 right = interp_exec(c, b->right, &rtype);
1818 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1822 if (ltype->array.static_size)
1825 ptr = *(void**)lleft;
1826 rvtype = ltype->array.member;
1827 if (i >= 0 && i < ltype->array.size)
1828 lrv = ptr + i * rvtype->size;
1830 val_init(ltype->array.member, &rv);
1837 A `struct` is a data-type that contains one or more other data-types.
1838 It differs from an array in that each member can be of a different
1839 type, and they are accessed by name rather than by number. Thus you
1840 cannot choose an element by calculation, you need to know what you
1843 The language makes no promises about how a given structure will be
1844 stored in memory - it is free to rearrange fields to suit whatever
1845 criteria seems important.
1847 Structs are declared separately from program code - they cannot be
1848 declared in-line in a variable declaration like arrays can. A struct
1849 is given a name and this name is used to identify the type - the name
1850 is not prefixed by the word `struct` as it would be in C.
1852 Structs are only treated as the same if they have the same name.
1853 Simply having the same fields in the same order is not enough. This
1854 might change once we can create structure initializers from a list of
1857 Each component datum is identified much like a variable is declared,
1858 with a name, one or two colons, and a type. The type cannot be omitted
1859 as there is no opportunity to deduce the type from usage. An initial
1860 value can be given following an equals sign, so
1862 ##### Example: a struct type
1868 would declare a type called "complex" which has two number fields,
1869 each initialised to zero.
1871 Struct will need to be declared separately from the code that uses
1872 them, so we will need to be able to print out the declaration of a
1873 struct when reprinting the whole program. So a `print_type_decl` type
1874 function will be needed.
1876 ###### type union fields
1888 ###### type functions
1889 void (*print_type_decl)(struct type *type, FILE *f);
1891 ###### value functions
1893 static void structure_init(struct type *type, struct value *val)
1897 for (i = 0; i < type->structure.nfields; i++) {
1899 v = (void*) val->ptr + type->structure.fields[i].offset;
1900 if (type->structure.fields[i].init)
1901 dup_value(type->structure.fields[i].type,
1902 type->structure.fields[i].init,
1905 val_init(type->structure.fields[i].type, v);
1909 static void structure_free(struct type *type, struct value *val)
1913 for (i = 0; i < type->structure.nfields; i++) {
1915 v = (void*)val->ptr + type->structure.fields[i].offset;
1916 free_value(type->structure.fields[i].type, v);
1920 static void structure_free_type(struct type *t)
1923 for (i = 0; i < t->structure.nfields; i++)
1924 if (t->structure.fields[i].init) {
1925 free_value(t->structure.fields[i].type,
1926 t->structure.fields[i].init);
1928 free(t->structure.fields);
1931 static struct type structure_prototype = {
1932 .init = structure_init,
1933 .free = structure_free,
1934 .free_type = structure_free_type,
1935 .print_type_decl = structure_print_type,
1949 ###### free exec cases
1951 free_exec(cast(fieldref, e)->left);
1955 ###### declare terminals
1958 ###### variable grammar
1960 | Variable . IDENTIFIER ${ {
1961 struct fieldref *fr = new_pos(fieldref, $2);
1968 ###### print exec cases
1972 struct fieldref *f = cast(fieldref, e);
1973 print_exec(f->left, -1, bracket);
1974 printf(".%.*s", f->name.len, f->name.txt);
1978 ###### ast functions
1979 static int find_struct_index(struct type *type, struct text field)
1982 for (i = 0; i < type->structure.nfields; i++)
1983 if (text_cmp(type->structure.fields[i].name, field) == 0)
1988 ###### propagate exec cases
1992 struct fieldref *f = cast(fieldref, prog);
1993 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1996 type_err(c, "error: unknown type for field access", f->left,
1998 else if (st->init != structure_init)
1999 type_err(c, "error: field reference attempted on %1, not a struct",
2000 f->left, st, 0, NULL);
2001 else if (f->index == -2) {
2002 f->index = find_struct_index(st, f->name);
2004 type_err(c, "error: cannot find requested field in %1",
2005 f->left, st, 0, NULL);
2007 if (f->index >= 0) {
2008 struct type *ft = st->structure.fields[f->index].type;
2009 if (!type_compat(type, ft, rules))
2010 type_err(c, "error: have %1 but need %2", prog,
2017 ###### interp exec cases
2020 struct fieldref *f = cast(fieldref, e);
2022 struct value *lleft = linterp_exec(c, f->left, <ype);
2023 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
2024 rvtype = ltype->structure.fields[f->index].type;
2030 struct fieldlist *prev;
2034 ###### ast functions
2035 static void free_fieldlist(struct fieldlist *f)
2039 free_fieldlist(f->prev);
2041 free_value(f->f.type, f->f.init);
2047 ###### top level grammar
2048 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2050 add_type(c, $2.txt, &structure_prototype);
2052 struct fieldlist *f;
2054 for (f = $3; f; f=f->prev)
2057 t->structure.nfields = cnt;
2058 t->structure.fields = calloc(cnt, sizeof(struct field));
2061 int a = f->f.type->align;
2063 t->structure.fields[cnt] = f->f;
2064 if (t->size & (a-1))
2065 t->size = (t->size | (a-1)) + 1;
2066 t->structure.fields[cnt].offset = t->size;
2067 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2076 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2077 | { SimpleFieldList } ${ $0 = $<SFL; }$
2078 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2079 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2081 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2082 | FieldLines SimpleFieldList Newlines ${
2087 SimpleFieldList -> Field ${ $0 = $<F; }$
2088 | SimpleFieldList ; Field ${
2092 | SimpleFieldList ; ${
2095 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2097 Field -> IDENTIFIER : Type = Expression ${ {
2100 $0 = calloc(1, sizeof(struct fieldlist));
2101 $0->f.name = $1.txt;
2106 propagate_types($<5, c, &ok, $3, 0);
2111 struct value vl = interp_exec(c, $5, NULL);
2112 $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
2115 | IDENTIFIER : Type ${
2116 $0 = calloc(1, sizeof(struct fieldlist));
2117 $0->f.name = $1.txt;
2119 if ($0->f.type->prepare_type)
2120 $0->f.type->prepare_type(c, $0->f.type, 1);
2123 ###### forward decls
2124 static void structure_print_type(struct type *t, FILE *f);
2126 ###### value functions
2127 static void structure_print_type(struct type *t, FILE *f)
2131 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2133 for (i = 0; i < t->structure.nfields; i++) {
2134 struct field *fl = t->structure.fields + i;
2135 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2136 type_print(fl->type, f);
2137 if (fl->type->print && fl->init) {
2139 if (fl->type == Tstr)
2141 print_value(fl->type, fl->init);
2142 if (fl->type == Tstr)
2149 ###### print type decls
2154 while (target != 0) {
2156 for (t = context.typelist; t ; t=t->next)
2157 if (t->print_type_decl) {
2166 t->print_type_decl(t, stdout);
2172 ## Executables: the elements of code
2174 Each code element needs to be parsed, printed, analysed,
2175 interpreted, and freed. There are several, so let's just start with
2176 the easy ones and work our way up.
2180 We have already met values as separate objects. When manifest
2181 constants appear in the program text, that must result in an executable
2182 which has a constant value. So the `val` structure embeds a value in
2195 ###### ast functions
2196 struct val *new_val(struct type *T, struct token tk)
2198 struct val *v = new_pos(val, tk);
2209 $0 = new_val(Tbool, $1);
2213 $0 = new_val(Tbool, $1);
2217 $0 = new_val(Tnum, $1);
2220 if (number_parse($0->val.num, tail, $1.txt) == 0)
2221 mpq_init($0->val.num);
2223 tok_err(c, "error: unsupported number suffix",
2228 $0 = new_val(Tstr, $1);
2231 string_parse(&$1, '\\', &$0->val.str, tail);
2233 tok_err(c, "error: unsupported string suffix",
2238 $0 = new_val(Tstr, $1);
2241 string_parse(&$1, '\\', &$0->val.str, tail);
2243 tok_err(c, "error: unsupported string suffix",
2248 ###### print exec cases
2251 struct val *v = cast(val, e);
2252 if (v->vtype == Tstr)
2254 print_value(v->vtype, &v->val);
2255 if (v->vtype == Tstr)
2260 ###### propagate exec cases
2263 struct val *val = cast(val, prog);
2264 if (!type_compat(type, val->vtype, rules))
2265 type_err(c, "error: expected %1%r found %2",
2266 prog, type, rules, val->vtype);
2270 ###### interp exec cases
2272 rvtype = cast(val, e)->vtype;
2273 dup_value(rvtype, &cast(val, e)->val, &rv);
2276 ###### ast functions
2277 static void free_val(struct val *v)
2280 free_value(v->vtype, &v->val);
2284 ###### free exec cases
2285 case Xval: free_val(cast(val, e)); break;
2287 ###### ast functions
2288 // Move all nodes from 'b' to 'rv', reversing their order.
2289 // In 'b' 'left' is a list, and 'right' is the last node.
2290 // In 'rv', left' is the first node and 'right' is a list.
2291 static struct binode *reorder_bilist(struct binode *b)
2293 struct binode *rv = NULL;
2296 struct exec *t = b->right;
2300 b = cast(binode, b->left);
2310 Just as we used a `val` to wrap a value into an `exec`, we similarly
2311 need a `var` to wrap a `variable` into an exec. While each `val`
2312 contained a copy of the value, each `var` holds a link to the variable
2313 because it really is the same variable no matter where it appears.
2314 When a variable is used, we need to remember to follow the `->merged`
2315 link to find the primary instance.
2323 struct variable *var;
2331 VariableDecl -> IDENTIFIER : ${ {
2332 struct variable *v = var_decl(c, $1.txt);
2333 $0 = new_pos(var, $1);
2338 v = var_ref(c, $1.txt);
2340 type_err(c, "error: variable '%v' redeclared",
2342 type_err(c, "info: this is where '%v' was first declared",
2343 v->where_decl, NULL, 0, NULL);
2346 | IDENTIFIER :: ${ {
2347 struct variable *v = var_decl(c, $1.txt);
2348 $0 = new_pos(var, $1);
2354 v = var_ref(c, $1.txt);
2356 type_err(c, "error: variable '%v' redeclared",
2358 type_err(c, "info: this is where '%v' was first declared",
2359 v->where_decl, NULL, 0, NULL);
2362 | IDENTIFIER : Type ${ {
2363 struct variable *v = var_decl(c, $1.txt);
2364 $0 = new_pos(var, $1);
2371 v = var_ref(c, $1.txt);
2373 type_err(c, "error: variable '%v' redeclared",
2375 type_err(c, "info: this is where '%v' was first declared",
2376 v->where_decl, NULL, 0, NULL);
2379 | IDENTIFIER :: Type ${ {
2380 struct variable *v = var_decl(c, $1.txt);
2381 $0 = new_pos(var, $1);
2389 v = var_ref(c, $1.txt);
2391 type_err(c, "error: variable '%v' redeclared",
2393 type_err(c, "info: this is where '%v' was first declared",
2394 v->where_decl, NULL, 0, NULL);
2399 Variable -> IDENTIFIER ${ {
2400 struct variable *v = var_ref(c, $1.txt);
2401 $0 = new_pos(var, $1);
2403 /* This might be a label - allocate a var just in case */
2404 v = var_decl(c, $1.txt);
2411 cast(var, $0)->var = v;
2416 Type -> IDENTIFIER ${
2417 $0 = find_type(c, $1.txt);
2420 "error: undefined type", &$1);
2427 ###### print exec cases
2430 struct var *v = cast(var, e);
2432 struct binding *b = v->var->name;
2433 printf("%.*s", b->name.len, b->name.txt);
2440 if (loc->type == Xvar) {
2441 struct var *v = cast(var, loc);
2443 struct binding *b = v->var->name;
2444 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2446 fputs("???", stderr); // NOTEST
2448 fputs("NOTVAR", stderr); // NOTEST
2451 ###### propagate exec cases
2455 struct var *var = cast(var, prog);
2456 struct variable *v = var->var;
2458 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2459 return Tnone; // NOTEST
2463 if (v->constant && (rules & Rnoconstant)) {
2464 type_err(c, "error: Cannot assign to a constant: %v",
2465 prog, NULL, 0, NULL);
2466 type_err(c, "info: name was defined as a constant here",
2467 v->where_decl, NULL, 0, NULL);
2470 if (v->type == Tnone && v->where_decl == prog)
2471 type_err(c, "error: variable used but not declared: %v",
2472 prog, NULL, 0, NULL);
2473 if (v->type == NULL) {
2474 if (type && *ok != 0) {
2476 v->where_set = prog;
2481 if (!type_compat(type, v->type, rules)) {
2482 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2483 type, rules, v->type);
2484 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2485 v->type, rules, NULL);
2492 ###### interp exec cases
2495 struct var *var = cast(var, e);
2496 struct variable *v = var->var;
2500 lrv = var_value(c, v);
2505 ###### ast functions
2507 static void free_var(struct var *v)
2512 ###### free exec cases
2513 case Xvar: free_var(cast(var, e)); break;
2515 ### Expressions: Conditional
2517 Our first user of the `binode` will be conditional expressions, which
2518 is a bit odd as they actually have three components. That will be
2519 handled by having 2 binodes for each expression. The conditional
2520 expression is the lowest precedence operator which is why we define it
2521 first - to start the precedence list.
2523 Conditional expressions are of the form "value `if` condition `else`
2524 other_value". They associate to the right, so everything to the right
2525 of `else` is part of an else value, while only a higher-precedence to
2526 the left of `if` is the if values. Between `if` and `else` there is no
2527 room for ambiguity, so a full conditional expression is allowed in
2539 Expression -> Expression if Expression else Expression $$ifelse ${ {
2540 struct binode *b1 = new(binode);
2541 struct binode *b2 = new(binode);
2550 ## expression grammar
2552 ###### print binode cases
2555 b2 = cast(binode, b->right);
2556 if (bracket) printf("(");
2557 print_exec(b2->left, -1, bracket);
2559 print_exec(b->left, -1, bracket);
2561 print_exec(b2->right, -1, bracket);
2562 if (bracket) printf(")");
2565 ###### propagate binode cases
2568 /* cond must be Tbool, others must match */
2569 struct binode *b2 = cast(binode, b->right);
2572 propagate_types(b->left, c, ok, Tbool, 0);
2573 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2574 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2578 ###### interp binode cases
2581 struct binode *b2 = cast(binode, b->right);
2582 left = interp_exec(c, b->left, <ype);
2584 rv = interp_exec(c, b2->left, &rvtype);
2586 rv = interp_exec(c, b2->right, &rvtype);
2590 ### Expressions: Boolean
2592 The next class of expressions to use the `binode` will be Boolean
2593 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2594 have same corresponding precendence. The difference is that they don't
2595 evaluate the second expression if not necessary.
2604 ###### expr precedence
2609 ###### expression grammar
2610 | Expression or Expression ${ {
2611 struct binode *b = new(binode);
2617 | Expression or else Expression ${ {
2618 struct binode *b = new(binode);
2625 | Expression and Expression ${ {
2626 struct binode *b = new(binode);
2632 | Expression and then Expression ${ {
2633 struct binode *b = new(binode);
2640 | not Expression ${ {
2641 struct binode *b = new(binode);
2647 ###### print binode cases
2649 if (bracket) printf("(");
2650 print_exec(b->left, -1, bracket);
2652 print_exec(b->right, -1, bracket);
2653 if (bracket) printf(")");
2656 if (bracket) printf("(");
2657 print_exec(b->left, -1, bracket);
2658 printf(" and then ");
2659 print_exec(b->right, -1, bracket);
2660 if (bracket) printf(")");
2663 if (bracket) printf("(");
2664 print_exec(b->left, -1, bracket);
2666 print_exec(b->right, -1, bracket);
2667 if (bracket) printf(")");
2670 if (bracket) printf("(");
2671 print_exec(b->left, -1, bracket);
2672 printf(" or else ");
2673 print_exec(b->right, -1, bracket);
2674 if (bracket) printf(")");
2677 if (bracket) printf("(");
2679 print_exec(b->right, -1, bracket);
2680 if (bracket) printf(")");
2683 ###### propagate binode cases
2689 /* both must be Tbool, result is Tbool */
2690 propagate_types(b->left, c, ok, Tbool, 0);
2691 propagate_types(b->right, c, ok, Tbool, 0);
2692 if (type && type != Tbool)
2693 type_err(c, "error: %1 operation found where %2 expected", prog,
2697 ###### interp binode cases
2699 rv = interp_exec(c, b->left, &rvtype);
2700 right = interp_exec(c, b->right, &rtype);
2701 rv.bool = rv.bool && right.bool;
2704 rv = interp_exec(c, b->left, &rvtype);
2706 rv = interp_exec(c, b->right, NULL);
2709 rv = interp_exec(c, b->left, &rvtype);
2710 right = interp_exec(c, b->right, &rtype);
2711 rv.bool = rv.bool || right.bool;
2714 rv = interp_exec(c, b->left, &rvtype);
2716 rv = interp_exec(c, b->right, NULL);
2719 rv = interp_exec(c, b->right, &rvtype);
2723 ### Expressions: Comparison
2725 Of slightly higher precedence that Boolean expressions are Comparisons.
2726 A comparison takes arguments of any comparable type, but the two types
2729 To simplify the parsing we introduce an `eop` which can record an
2730 expression operator, and the `CMPop` non-terminal will match one of them.
2737 ###### ast functions
2738 static void free_eop(struct eop *e)
2752 ###### expr precedence
2753 $LEFT < > <= >= == != CMPop
2755 ###### expression grammar
2756 | Expression CMPop Expression ${ {
2757 struct binode *b = new(binode);
2767 CMPop -> < ${ $0.op = Less; }$
2768 | > ${ $0.op = Gtr; }$
2769 | <= ${ $0.op = LessEq; }$
2770 | >= ${ $0.op = GtrEq; }$
2771 | == ${ $0.op = Eql; }$
2772 | != ${ $0.op = NEql; }$
2774 ###### print binode cases
2782 if (bracket) printf("(");
2783 print_exec(b->left, -1, bracket);
2785 case Less: printf(" < "); break;
2786 case LessEq: printf(" <= "); break;
2787 case Gtr: printf(" > "); break;
2788 case GtrEq: printf(" >= "); break;
2789 case Eql: printf(" == "); break;
2790 case NEql: printf(" != "); break;
2791 default: abort(); // NOTEST
2793 print_exec(b->right, -1, bracket);
2794 if (bracket) printf(")");
2797 ###### propagate binode cases
2804 /* Both must match but not be labels, result is Tbool */
2805 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2807 propagate_types(b->right, c, ok, t, 0);
2809 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2811 t = propagate_types(b->left, c, ok, t, 0);
2813 if (!type_compat(type, Tbool, 0))
2814 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2815 Tbool, rules, type);
2818 ###### interp binode cases
2827 left = interp_exec(c, b->left, <ype);
2828 right = interp_exec(c, b->right, &rtype);
2829 cmp = value_cmp(ltype, rtype, &left, &right);
2832 case Less: rv.bool = cmp < 0; break;
2833 case LessEq: rv.bool = cmp <= 0; break;
2834 case Gtr: rv.bool = cmp > 0; break;
2835 case GtrEq: rv.bool = cmp >= 0; break;
2836 case Eql: rv.bool = cmp == 0; break;
2837 case NEql: rv.bool = cmp != 0; break;
2838 default: rv.bool = 0; break; // NOTEST
2843 ### Expressions: The rest
2845 The remaining expressions with the highest precedence are arithmetic,
2846 string concatenation, and string conversion. String concatenation
2847 (`++`) has the same precedence as multiplication and division, but lower
2850 String conversion is a temporary feature until I get a better type
2851 system. `$` is a prefix operator which expects a string and returns
2854 `+` and `-` are both infix and prefix operations (where they are
2855 absolute value and negation). These have different operator names.
2857 We also have a 'Bracket' operator which records where parentheses were
2858 found. This makes it easy to reproduce these when printing. Possibly I
2859 should only insert brackets were needed for precedence.
2869 ###### expr precedence
2875 ###### expression grammar
2876 | Expression Eop Expression ${ {
2877 struct binode *b = new(binode);
2884 | Expression Top Expression ${ {
2885 struct binode *b = new(binode);
2892 | ( Expression ) ${ {
2893 struct binode *b = new_pos(binode, $1);
2898 | Uop Expression ${ {
2899 struct binode *b = new(binode);
2904 | Value ${ $0 = $<1; }$
2905 | Variable ${ $0 = $<1; }$
2908 Eop -> + ${ $0.op = Plus; }$
2909 | - ${ $0.op = Minus; }$
2911 Uop -> + ${ $0.op = Absolute; }$
2912 | - ${ $0.op = Negate; }$
2913 | $ ${ $0.op = StringConv; }$
2915 Top -> * ${ $0.op = Times; }$
2916 | / ${ $0.op = Divide; }$
2917 | % ${ $0.op = Rem; }$
2918 | ++ ${ $0.op = Concat; }$
2920 ###### print binode cases
2927 if (bracket) printf("(");
2928 print_exec(b->left, indent, bracket);
2930 case Plus: fputs(" + ", stdout); break;
2931 case Minus: fputs(" - ", stdout); break;
2932 case Times: fputs(" * ", stdout); break;
2933 case Divide: fputs(" / ", stdout); break;
2934 case Rem: fputs(" % ", stdout); break;
2935 case Concat: fputs(" ++ ", stdout); break;
2936 default: abort(); // NOTEST
2938 print_exec(b->right, indent, bracket);
2939 if (bracket) printf(")");
2944 if (bracket) printf("(");
2946 case Absolute: fputs("+", stdout); break;
2947 case Negate: fputs("-", stdout); break;
2948 case StringConv: fputs("$", stdout); break;
2949 default: abort(); // NOTEST
2951 print_exec(b->right, indent, bracket);
2952 if (bracket) printf(")");
2956 print_exec(b->right, indent, bracket);
2960 ###### propagate binode cases
2966 /* both must be numbers, result is Tnum */
2969 /* as propagate_types ignores a NULL,
2970 * unary ops fit here too */
2971 propagate_types(b->left, c, ok, Tnum, 0);
2972 propagate_types(b->right, c, ok, Tnum, 0);
2973 if (!type_compat(type, Tnum, 0))
2974 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2979 /* both must be Tstr, result is Tstr */
2980 propagate_types(b->left, c, ok, Tstr, 0);
2981 propagate_types(b->right, c, ok, Tstr, 0);
2982 if (!type_compat(type, Tstr, 0))
2983 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2988 /* op must be string, result is number */
2989 propagate_types(b->left, c, ok, Tstr, 0);
2990 if (!type_compat(type, Tnum, 0))
2992 "error: Can only convert string to number, not %1",
2993 prog, type, 0, NULL);
2997 return propagate_types(b->right, c, ok, type, 0);
2999 ###### interp binode cases
3002 rv = interp_exec(c, b->left, &rvtype);
3003 right = interp_exec(c, b->right, &rtype);
3004 mpq_add(rv.num, rv.num, right.num);
3007 rv = interp_exec(c, b->left, &rvtype);
3008 right = interp_exec(c, b->right, &rtype);
3009 mpq_sub(rv.num, rv.num, right.num);
3012 rv = interp_exec(c, b->left, &rvtype);
3013 right = interp_exec(c, b->right, &rtype);
3014 mpq_mul(rv.num, rv.num, right.num);
3017 rv = interp_exec(c, b->left, &rvtype);
3018 right = interp_exec(c, b->right, &rtype);
3019 mpq_div(rv.num, rv.num, right.num);
3024 left = interp_exec(c, b->left, <ype);
3025 right = interp_exec(c, b->right, &rtype);
3026 mpz_init(l); mpz_init(r); mpz_init(rem);
3027 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
3028 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
3029 mpz_tdiv_r(rem, l, r);
3030 val_init(Tnum, &rv);
3031 mpq_set_z(rv.num, rem);
3032 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
3037 rv = interp_exec(c, b->right, &rvtype);
3038 mpq_neg(rv.num, rv.num);
3041 rv = interp_exec(c, b->right, &rvtype);
3042 mpq_abs(rv.num, rv.num);
3045 rv = interp_exec(c, b->right, &rvtype);
3048 left = interp_exec(c, b->left, <ype);
3049 right = interp_exec(c, b->right, &rtype);
3051 rv.str = text_join(left.str, right.str);
3054 right = interp_exec(c, b->right, &rvtype);
3058 struct text tx = right.str;
3061 if (tx.txt[0] == '-') {
3066 if (number_parse(rv.num, tail, tx) == 0)
3069 mpq_neg(rv.num, rv.num);
3071 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt);
3075 ###### value functions
3077 static struct text text_join(struct text a, struct text b)
3080 rv.len = a.len + b.len;
3081 rv.txt = malloc(rv.len);
3082 memcpy(rv.txt, a.txt, a.len);
3083 memcpy(rv.txt+a.len, b.txt, b.len);
3087 ### Blocks, Statements, and Statement lists.
3089 Now that we have expressions out of the way we need to turn to
3090 statements. There are simple statements and more complex statements.
3091 Simple statements do not contain (syntactic) newlines, complex statements do.
3093 Statements often come in sequences and we have corresponding simple
3094 statement lists and complex statement lists.
3095 The former comprise only simple statements separated by semicolons.
3096 The later comprise complex statements and simple statement lists. They are
3097 separated by newlines. Thus the semicolon is only used to separate
3098 simple statements on the one line. This may be overly restrictive,
3099 but I'm not sure I ever want a complex statement to share a line with
3102 Note that a simple statement list can still use multiple lines if
3103 subsequent lines are indented, so
3105 ###### Example: wrapped simple statement list
3110 is a single simple statement list. This might allow room for
3111 confusion, so I'm not set on it yet.
3113 A simple statement list needs no extra syntax. A complex statement
3114 list has two syntactic forms. It can be enclosed in braces (much like
3115 C blocks), or it can be introduced by an indent and continue until an
3116 unindented newline (much like Python blocks). With this extra syntax
3117 it is referred to as a block.
3119 Note that a block does not have to include any newlines if it only
3120 contains simple statements. So both of:
3122 if condition: a=b; d=f
3124 if condition { a=b; print f }
3128 In either case the list is constructed from a `binode` list with
3129 `Block` as the operator. When parsing the list it is most convenient
3130 to append to the end, so a list is a list and a statement. When using
3131 the list it is more convenient to consider a list to be a statement
3132 and a list. So we need a function to re-order a list.
3133 `reorder_bilist` serves this purpose.
3135 The only stand-alone statement we introduce at this stage is `pass`
3136 which does nothing and is represented as a `NULL` pointer in a `Block`
3137 list. Other stand-alone statements will follow once the infrastructure
3148 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3149 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3150 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3151 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3152 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3154 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3155 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3156 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3157 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3158 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3160 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3161 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3162 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3164 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3165 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3166 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3167 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3168 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3170 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3172 ComplexStatements -> ComplexStatements ComplexStatement ${
3182 | ComplexStatement ${
3194 ComplexStatement -> SimpleStatements Newlines ${
3195 $0 = reorder_bilist($<SS);
3197 | SimpleStatements ; Newlines ${
3198 $0 = reorder_bilist($<SS);
3200 ## ComplexStatement Grammar
3203 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3209 | SimpleStatement ${
3217 SimpleStatement -> pass ${ $0 = NULL; }$
3218 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3219 ## SimpleStatement Grammar
3221 ###### print binode cases
3225 if (b->left == NULL)
3228 print_exec(b->left, indent, bracket);
3231 print_exec(b->right, indent, bracket);
3234 // block, one per line
3235 if (b->left == NULL)
3236 do_indent(indent, "pass\n");
3238 print_exec(b->left, indent, bracket);
3240 print_exec(b->right, indent, bracket);
3244 ###### propagate binode cases
3247 /* If any statement returns something other than Tnone
3248 * or Tbool then all such must return same type.
3249 * As each statement may be Tnone or something else,
3250 * we must always pass NULL (unknown) down, otherwise an incorrect
3251 * error might occur. We never return Tnone unless it is
3256 for (e = b; e; e = cast(binode, e->right)) {
3257 t = propagate_types(e->left, c, ok, NULL, rules);
3258 if ((rules & Rboolok) && t == Tbool)
3260 if (t && t != Tnone && t != Tbool) {
3264 type_err(c, "error: expected %1%r, found %2",
3265 e->left, type, rules, t);
3271 ###### interp binode cases
3273 while (rvtype == Tnone &&
3276 rv = interp_exec(c, b->left, &rvtype);
3277 b = cast(binode, b->right);
3281 ### The Print statement
3283 `print` is a simple statement that takes a comma-separated list of
3284 expressions and prints the values separated by spaces and terminated
3285 by a newline. No control of formatting is possible.
3287 `print` faces the same list-ordering issue as blocks, and uses the
3293 ##### expr precedence
3296 ###### SimpleStatement Grammar
3298 | print ExpressionList ${
3299 $0 = reorder_bilist($<2);
3301 | print ExpressionList , ${
3306 $0 = reorder_bilist($0);
3317 ExpressionList -> ExpressionList , Expression ${
3330 ###### print binode cases
3333 do_indent(indent, "print");
3337 print_exec(b->left, -1, bracket);
3341 b = cast(binode, b->right);
3347 ###### propagate binode cases
3350 /* don't care but all must be consistent */
3351 propagate_types(b->left, c, ok, NULL, Rnolabel);
3352 propagate_types(b->right, c, ok, NULL, Rnolabel);
3355 ###### interp binode cases
3361 for ( ; b; b = cast(binode, b->right))
3365 left = interp_exec(c, b->left, <ype);
3366 print_value(ltype, &left);
3367 free_value(ltype, &left);
3378 ###### Assignment statement
3380 An assignment will assign a value to a variable, providing it hasn't
3381 been declared as a constant. The analysis phase ensures that the type
3382 will be correct so the interpreter just needs to perform the
3383 calculation. There is a form of assignment which declares a new
3384 variable as well as assigning a value. If a name is assigned before
3385 it is declared, and error will be raised as the name is created as
3386 `Tlabel` and it is illegal to assign to such names.
3392 ###### declare terminals
3395 ###### SimpleStatement Grammar
3396 | Variable = Expression ${
3402 | VariableDecl = Expression ${
3410 if ($1->var->where_set == NULL) {
3412 "Variable declared with no type or value: %v",
3422 ###### print binode cases
3425 do_indent(indent, "");
3426 print_exec(b->left, indent, bracket);
3428 print_exec(b->right, indent, bracket);
3435 struct variable *v = cast(var, b->left)->var;
3436 do_indent(indent, "");
3437 print_exec(b->left, indent, bracket);
3438 if (cast(var, b->left)->var->constant) {
3439 if (v->where_decl == v->where_set) {
3441 type_print(v->type, stdout);
3446 if (v->where_decl == v->where_set) {
3448 type_print(v->type, stdout);
3455 print_exec(b->right, indent, bracket);
3462 ###### propagate binode cases
3466 /* Both must match and not be labels,
3467 * Type must support 'dup',
3468 * For Assign, left must not be constant.
3471 t = propagate_types(b->left, c, ok, NULL,
3472 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3477 if (propagate_types(b->right, c, ok, t, 0) != t)
3478 if (b->left->type == Xvar)
3479 type_err(c, "info: variable '%v' was set as %1 here.",
3480 cast(var, b->left)->var->where_set, t, rules, NULL);
3482 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3484 propagate_types(b->left, c, ok, t,
3485 (b->op == Assign ? Rnoconstant : 0));
3487 if (t && t->dup == NULL)
3488 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3493 ###### interp binode cases
3496 lleft = linterp_exec(c, b->left, <ype);
3497 right = interp_exec(c, b->right, &rtype);
3499 free_value(ltype, lleft);
3500 dup_value(ltype, &right, lleft);
3507 struct variable *v = cast(var, b->left)->var;
3511 val = var_value(c, v);
3512 free_value(v->type, val);
3513 if (v->type->prepare_type)
3514 v->type->prepare_type(c, v->type, 0);
3516 right = interp_exec(c, b->right, &rtype);
3517 memcpy(val, &right, rtype->size);
3520 val_init(v->type, val);
3525 ### The `use` statement
3527 The `use` statement is the last "simple" statement. It is needed when
3528 the condition in a conditional statement is a block. `use` works much
3529 like `return` in C, but only completes the `condition`, not the whole
3535 ###### expr precedence
3538 ###### SimpleStatement Grammar
3540 $0 = new_pos(binode, $1);
3543 if ($0->right->type == Xvar) {
3544 struct var *v = cast(var, $0->right);
3545 if (v->var->type == Tnone) {
3546 /* Convert this to a label */
3549 v->var->type = Tlabel;
3550 val = global_alloc(c, Tlabel, v->var, NULL);
3556 ###### print binode cases
3559 do_indent(indent, "use ");
3560 print_exec(b->right, -1, bracket);
3565 ###### propagate binode cases
3568 /* result matches value */
3569 return propagate_types(b->right, c, ok, type, 0);
3571 ###### interp binode cases
3574 rv = interp_exec(c, b->right, &rvtype);
3577 ### The Conditional Statement
3579 This is the biggy and currently the only complex statement. This
3580 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3581 It is comprised of a number of parts, all of which are optional though
3582 set combinations apply. Each part is (usually) a key word (`then` is
3583 sometimes optional) followed by either an expression or a code block,
3584 except the `casepart` which is a "key word and an expression" followed
3585 by a code block. The code-block option is valid for all parts and,
3586 where an expression is also allowed, the code block can use the `use`
3587 statement to report a value. If the code block does not report a value
3588 the effect is similar to reporting `True`.
3590 The `else` and `case` parts, as well as `then` when combined with
3591 `if`, can contain a `use` statement which will apply to some
3592 containing conditional statement. `for` parts, `do` parts and `then`
3593 parts used with `for` can never contain a `use`, except in some
3594 subordinate conditional statement.
3596 If there is a `forpart`, it is executed first, only once.
3597 If there is a `dopart`, then it is executed repeatedly providing
3598 always that the `condpart` or `cond`, if present, does not return a non-True
3599 value. `condpart` can fail to return any value if it simply executes
3600 to completion. This is treated the same as returning `True`.
3602 If there is a `thenpart` it will be executed whenever the `condpart`
3603 or `cond` returns True (or does not return any value), but this will happen
3604 *after* `dopart` (when present).
3606 If `elsepart` is present it will be executed at most once when the
3607 condition returns `False` or some value that isn't `True` and isn't
3608 matched by any `casepart`. If there are any `casepart`s, they will be
3609 executed when the condition returns a matching value.
3611 The particular sorts of values allowed in case parts has not yet been
3612 determined in the language design, so nothing is prohibited.
3614 The various blocks in this complex statement potentially provide scope
3615 for variables as described earlier. Each such block must include the
3616 "OpenScope" nonterminal before parsing the block, and must call
3617 `var_block_close()` when closing the block.
3619 The code following "`if`", "`switch`" and "`for`" does not get its own
3620 scope, but is in a scope covering the whole statement, so names
3621 declared there cannot be redeclared elsewhere. Similarly the
3622 condition following "`while`" is in a scope the covers the body
3623 ("`do`" part) of the loop, and which does not allow conditional scope
3624 extension. Code following "`then`" (both looping and non-looping),
3625 "`else`" and "`case`" each get their own local scope.
3627 The type requirements on the code block in a `whilepart` are quite
3628 unusal. It is allowed to return a value of some identifiable type, in
3629 which case the loop aborts and an appropriate `casepart` is run, or it
3630 can return a Boolean, in which case the loop either continues to the
3631 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3632 This is different both from the `ifpart` code block which is expected to
3633 return a Boolean, or the `switchpart` code block which is expected to
3634 return the same type as the casepart values. The correct analysis of
3635 the type of the `whilepart` code block is the reason for the
3636 `Rboolok` flag which is passed to `propagate_types()`.
3638 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3647 struct exec *action;
3648 struct casepart *next;
3650 struct cond_statement {
3652 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3653 struct casepart *casepart;
3656 ###### ast functions
3658 static void free_casepart(struct casepart *cp)
3662 free_exec(cp->value);
3663 free_exec(cp->action);
3670 static void free_cond_statement(struct cond_statement *s)
3674 free_exec(s->forpart);
3675 free_exec(s->condpart);
3676 free_exec(s->dopart);
3677 free_exec(s->thenpart);
3678 free_exec(s->elsepart);
3679 free_casepart(s->casepart);
3683 ###### free exec cases
3684 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3686 ###### ComplexStatement Grammar
3687 | CondStatement ${ $0 = $<1; }$
3689 ###### expr precedence
3690 $TERM for then while do
3697 // A CondStatement must end with EOL, as does CondSuffix and
3699 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3700 // may or may not end with EOL
3701 // WhilePart and IfPart include an appropriate Suffix
3704 // Both ForPart and Whilepart open scopes, and CondSuffix only
3705 // closes one - so in the first branch here we have another to close.
3706 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3709 $0->thenpart = $<TP;
3710 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3711 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3712 var_block_close(c, CloseSequential);
3714 | ForPart OptNL WhilePart CondSuffix ${
3717 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3718 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3719 var_block_close(c, CloseSequential);
3721 | WhilePart CondSuffix ${
3723 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3724 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3726 | SwitchPart OptNL CasePart CondSuffix ${
3728 $0->condpart = $<SP;
3729 $CP->next = $0->casepart;
3730 $0->casepart = $<CP;
3732 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3734 $0->condpart = $<SP;
3735 $CP->next = $0->casepart;
3736 $0->casepart = $<CP;
3738 | IfPart IfSuffix ${
3740 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3741 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3742 // This is where we close an "if" statement
3743 var_block_close(c, CloseSequential);
3746 CondSuffix -> IfSuffix ${
3748 // This is where we close scope of the whole
3749 // "for" or "while" statement
3750 var_block_close(c, CloseSequential);
3752 | Newlines CasePart CondSuffix ${
3754 $CP->next = $0->casepart;
3755 $0->casepart = $<CP;
3757 | CasePart CondSuffix ${
3759 $CP->next = $0->casepart;
3760 $0->casepart = $<CP;
3763 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3764 | Newlines ElsePart ${ $0 = $<EP; }$
3765 | ElsePart ${$0 = $<EP; }$
3767 ElsePart -> else OpenBlock Newlines ${
3768 $0 = new(cond_statement);
3769 $0->elsepart = $<OB;
3770 var_block_close(c, CloseElse);
3772 | else OpenScope CondStatement ${
3773 $0 = new(cond_statement);
3774 $0->elsepart = $<CS;
3775 var_block_close(c, CloseElse);
3779 CasePart -> case Expression OpenScope ColonBlock ${
3780 $0 = calloc(1,sizeof(struct casepart));
3783 var_block_close(c, CloseParallel);
3787 // These scopes are closed in CondSuffix
3788 ForPart -> for OpenBlock ${
3792 ThenPart -> then OpenBlock ${
3794 var_block_close(c, CloseSequential);
3798 // This scope is closed in CondSuffix
3799 WhilePart -> while UseBlock OptNL do Block ${
3803 | while OpenScope Expression ColonBlock ${
3804 $0.condpart = $<Exp;
3808 IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
3812 | if OpenScope Expression OpenScope ColonBlock ClosePara ${
3816 | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
3822 // This scope is closed in CondSuffix
3823 SwitchPart -> switch OpenScope Expression ${
3826 | switch UseBlock ${
3830 ###### print exec cases
3832 case Xcond_statement:
3834 struct cond_statement *cs = cast(cond_statement, e);
3835 struct casepart *cp;
3837 do_indent(indent, "for");
3838 if (bracket) printf(" {\n"); else printf("\n");
3839 print_exec(cs->forpart, indent+1, bracket);
3842 do_indent(indent, "} then {\n");
3844 do_indent(indent, "then\n");
3845 print_exec(cs->thenpart, indent+1, bracket);
3847 if (bracket) do_indent(indent, "}\n");
3851 if (cs->condpart && cs->condpart->type == Xbinode &&
3852 cast(binode, cs->condpart)->op == Block) {
3854 do_indent(indent, "while {\n");
3856 do_indent(indent, "while\n");
3857 print_exec(cs->condpart, indent+1, bracket);
3859 do_indent(indent, "} do {\n");
3861 do_indent(indent, "do\n");
3862 print_exec(cs->dopart, indent+1, bracket);
3864 do_indent(indent, "}\n");
3866 do_indent(indent, "while ");
3867 print_exec(cs->condpart, 0, bracket);
3872 print_exec(cs->dopart, indent+1, bracket);
3874 do_indent(indent, "}\n");
3879 do_indent(indent, "switch");
3881 do_indent(indent, "if");
3882 if (cs->condpart && cs->condpart->type == Xbinode &&
3883 cast(binode, cs->condpart)->op == Block) {
3888 print_exec(cs->condpart, indent+1, bracket);
3890 do_indent(indent, "}\n");
3892 do_indent(indent, "then:\n");
3893 print_exec(cs->thenpart, indent+1, bracket);
3897 print_exec(cs->condpart, 0, bracket);
3903 print_exec(cs->thenpart, indent+1, bracket);
3905 do_indent(indent, "}\n");
3910 for (cp = cs->casepart; cp; cp = cp->next) {
3911 do_indent(indent, "case ");
3912 print_exec(cp->value, -1, 0);
3917 print_exec(cp->action, indent+1, bracket);
3919 do_indent(indent, "}\n");
3922 do_indent(indent, "else");
3927 print_exec(cs->elsepart, indent+1, bracket);
3929 do_indent(indent, "}\n");
3934 ###### propagate exec cases
3935 case Xcond_statement:
3937 // forpart and dopart must return Tnone
3938 // thenpart must return Tnone if there is a dopart,
3939 // otherwise it is like elsepart.
3941 // be bool if there is no casepart
3942 // match casepart->values if there is a switchpart
3943 // either be bool or match casepart->value if there
3945 // elsepart and casepart->action must match the return type
3946 // expected of this statement.
3947 struct cond_statement *cs = cast(cond_statement, prog);
3948 struct casepart *cp;
3950 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3951 if (!type_compat(Tnone, t, 0))
3953 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3954 if (!type_compat(Tnone, t, 0))
3957 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3958 if (!type_compat(Tnone, t, 0))
3961 if (cs->casepart == NULL)
3962 propagate_types(cs->condpart, c, ok, Tbool, 0);
3964 /* Condpart must match case values, with bool permitted */
3966 for (cp = cs->casepart;
3967 cp && !t; cp = cp->next)
3968 t = propagate_types(cp->value, c, ok, NULL, 0);
3969 if (!t && cs->condpart)
3970 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3971 // Now we have a type (I hope) push it down
3973 for (cp = cs->casepart; cp; cp = cp->next)
3974 propagate_types(cp->value, c, ok, t, 0);
3975 propagate_types(cs->condpart, c, ok, t, Rboolok);
3978 // (if)then, else, and case parts must return expected type.
3979 if (!cs->dopart && !type)
3980 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3982 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3983 for (cp = cs->casepart;
3986 type = propagate_types(cp->action, c, ok, NULL, rules);
3989 propagate_types(cs->thenpart, c, ok, type, rules);
3990 propagate_types(cs->elsepart, c, ok, type, rules);
3991 for (cp = cs->casepart; cp ; cp = cp->next)
3992 propagate_types(cp->action, c, ok, type, rules);
3998 ###### interp exec cases
3999 case Xcond_statement:
4001 struct value v, cnd;
4002 struct type *vtype, *cndtype;
4003 struct casepart *cp;
4004 struct cond_statement *cs = cast(cond_statement, e);
4007 interp_exec(c, cs->forpart, NULL);
4010 cnd = interp_exec(c, cs->condpart, &cndtype);
4013 if (!(cndtype == Tnone ||
4014 (cndtype == Tbool && cnd.bool != 0)))
4016 // cnd is Tnone or Tbool, doesn't need to be freed
4018 interp_exec(c, cs->dopart, NULL);
4021 rv = interp_exec(c, cs->thenpart, &rvtype);
4022 if (rvtype != Tnone || !cs->dopart)
4024 free_value(rvtype, &rv);
4027 } while (cs->dopart);
4029 for (cp = cs->casepart; cp; cp = cp->next) {
4030 v = interp_exec(c, cp->value, &vtype);
4031 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
4032 free_value(vtype, &v);
4033 free_value(cndtype, &cnd);
4034 rv = interp_exec(c, cp->action, &rvtype);
4037 free_value(vtype, &v);
4039 free_value(cndtype, &cnd);
4041 rv = interp_exec(c, cs->elsepart, &rvtype);
4048 ### Top level structure
4050 All the language elements so far can be used in various places. Now
4051 it is time to clarify what those places are.
4053 At the top level of a file there will be a number of declarations.
4054 Many of the things that can be declared haven't been described yet,
4055 such as functions, procedures, imports, and probably more.
4056 For now there are two sorts of things that can appear at the top
4057 level. They are predefined constants, `struct` types, and the main
4058 program. While the syntax will allow the main program to appear
4059 multiple times, that will trigger an error if it is actually attempted.
4061 The various declarations do not return anything. They store the
4062 various declarations in the parse context.
4064 ###### Parser: grammar
4067 Ocean -> OptNL DeclarationList
4069 ## declare terminals
4076 DeclarationList -> Declaration
4077 | DeclarationList Declaration
4079 Declaration -> ERROR Newlines ${
4081 "error: unhandled parse error", &$1);
4087 ## top level grammar
4089 ### The `const` section
4091 As well as being defined in with the code that uses them, constants
4092 can be declared at the top level. These have full-file scope, so they
4093 are always `InScope`. The value of a top level constant can be given
4094 as an expression, and this is evaluated immediately rather than in the
4095 later interpretation stage. Once we add functions to the language, we
4096 will need rules concern which, if any, can be used to define a top
4099 Constants are defined in a section that starts with the reserved word
4100 `const` and then has a block with a list of assignment statements.
4101 For syntactic consistency, these must use the double-colon syntax to
4102 make it clear that they are constants. Type can also be given: if
4103 not, the type will be determined during analysis, as with other
4106 As the types constants are inserted at the head of a list, printing
4107 them in the same order that they were read is not straight forward.
4108 We take a quadratic approach here and count the number of constants
4109 (variables of depth 0), then count down from there, each time
4110 searching through for the Nth constant for decreasing N.
4112 ###### top level grammar
4116 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4117 | const { SimpleConstList } Newlines
4118 | const IN OptNL ConstList OUT Newlines
4119 | const SimpleConstList Newlines
4121 ConstList -> ConstList SimpleConstLine
4123 SimpleConstList -> SimpleConstList ; Const
4126 SimpleConstLine -> SimpleConstList Newlines
4127 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4130 CType -> Type ${ $0 = $<1; }$
4133 Const -> IDENTIFIER :: CType = Expression ${ {
4137 v = var_decl(c, $1.txt);
4139 struct var *var = new_pos(var, $1);
4140 v->where_decl = var;
4145 v = var_ref(c, $1.txt);
4146 tok_err(c, "error: name already declared", &$1);
4147 type_err(c, "info: this is where '%v' was first declared",
4148 v->where_decl, NULL, 0, NULL);
4152 propagate_types($5, c, &ok, $3, 0);
4157 struct value res = interp_exec(c, $5, &v->type);
4158 global_alloc(c, v->type, v, &res);
4162 ###### print const decls
4167 while (target != 0) {
4169 for (v = context.in_scope; v; v=v->in_scope)
4170 if (v->depth == 0) {
4181 struct value *val = var_value(&context, v);
4182 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4183 type_print(v->type, stdout);
4185 if (v->type == Tstr)
4187 print_value(v->type, val);
4188 if (v->type == Tstr)
4196 ### Finally the whole program.
4198 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4199 the keyword "program" and a list of variable names which are assigned
4200 values from command line arguments. Following this is a `block` which
4201 is the code to execute. Unlike Pascal, constants and other
4202 declarations come *before* the program.
4204 As this is the top level, several things are handled a bit
4206 The whole program is not interpreted by `interp_exec` as that isn't
4207 passed the argument list which the program requires. Similarly type
4208 analysis is a bit more interesting at this level.
4213 ###### top level grammar
4215 DeclareProgram -> Program ${ {
4217 type_err(c, "Program defined a second time",
4226 Program -> program OpenScope Varlist ColonBlock Newlines ${
4229 $0->left = reorder_bilist($<Vl);
4231 var_block_close(c, CloseSequential);
4232 if (c->scope_stack && !c->parse_error) abort();
4235 Varlist -> Varlist ArgDecl ${
4244 ArgDecl -> IDENTIFIER ${ {
4245 struct variable *v = var_decl(c, $1.txt);
4252 ###### print binode cases
4254 do_indent(indent, "program");
4255 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4257 print_exec(b2->left, 0, 0);
4263 print_exec(b->right, indent+1, bracket);
4265 do_indent(indent, "}\n");
4268 ###### propagate binode cases
4269 case Program: abort(); // NOTEST
4271 ###### core functions
4273 static int analyse_prog(struct exec *prog, struct parse_context *c)
4275 struct binode *bp = cast(binode, prog);
4279 struct type *argv_type;
4280 struct text argv_type_name = { " argv", 5 };
4285 argv_type = add_type(c, argv_type_name, &array_prototype);
4286 argv_type->array.member = Tstr;
4288 for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
4293 v = cast(var, b->left);
4294 argv_type->array.vsize = v->var;
4295 propagate_types(b->left, c, &ok, Tnum, 0);
4298 propagate_types(b->left, c, &ok, argv_type, 0);
4300 default: /* invalid */
4301 propagate_types(b->left, c, &ok, Tnone, 0);
4307 propagate_types(bp->right, c, &ok, Tnone, 0);
4312 /* Make sure everything is still consistent */
4313 propagate_types(bp->right, c, &ok, Tnone, 0);
4320 static void interp_prog(struct parse_context *c, struct exec *prog,
4321 int argc, char **argv)
4323 struct binode *p = cast(binode, prog);
4331 al = cast(binode, p->left);
4333 struct var *v = cast(var, al->left);
4334 struct value *vl = var_value(c, v->var);
4342 if (v->var->type == Tnum) {
4344 mpq_set_ui(argcq, argc, 1);
4345 memcpy(vl, &argcq, sizeof(argcq));
4350 t->prepare_type(c, t, 0);
4351 array_init(v->var->type, vl);
4352 for (i = 0; i < argc; i++) {
4353 struct value *vl2 = vl->array + i * v->var->type->array.member->size;
4356 arg.str.txt = argv[i];
4357 arg.str.len = strlen(argv[i]);
4358 free_value(Tstr, vl2);
4359 dup_value(Tstr, &arg, vl2);
4363 al = cast(binode, al->right);
4365 v = interp_exec(c, p->right, &vtype);
4366 free_value(vtype, &v);
4369 ###### interp binode cases
4370 case Program: abort(); // NOTEST
4372 ## And now to test it out.
4374 Having a language requires having a "hello world" program. I'll
4375 provide a little more than that: a program that prints "Hello world"
4376 finds the GCD of two numbers, prints the first few elements of
4377 Fibonacci, performs a binary search for a number, and a few other
4378 things which will likely grow as the languages grows.
4380 ###### File: oceani.mk
4383 @echo "===== DEMO ====="
4384 ./oceani --section "demo: hello" oceani.mdc 55 33
4390 four ::= 2 + 2 ; five ::= 10/2
4391 const pie ::= "I like Pie";
4392 cake ::= "The cake is"
4401 print "Hello World, what lovely oceans you have!"
4402 print "Are there", five, "?"
4403 print pi, pie, "but", cake
4405 A := $argv[1]; B := $argv[2]
4407 /* When a variable is defined in both branches of an 'if',
4408 * and used afterwards, the variables are merged.
4414 print "Is", A, "bigger than", B,"? ", bigger
4415 /* If a variable is not used after the 'if', no
4416 * merge happens, so types can be different
4419 double:string = "yes"
4420 print A, "is more than twice", B, "?", double
4423 print "double", B, "is", double
4428 if a > 0 and then b > 0:
4434 print "GCD of", A, "and", B,"is", a
4436 print a, "is not positive, cannot calculate GCD"
4438 print b, "is not positive, cannot calculate GCD"
4443 print "Fibonacci:", f1,f2,
4444 then togo = togo - 1
4452 /* Binary search... */
4457 mid := (lo + hi) / 2
4469 print "Yay, I found", target
4471 print "Closest I found was", mid
4476 // "middle square" PRNG. Not particularly good, but one my
4477 // Dad taught me - the first one I ever heard of.
4478 for i:=1; then i = i + 1; while i < size:
4479 n := list[i-1] * list[i-1]
4480 list[i] = (n / 100) % 10 000
4482 print "Before sort:",
4483 for i:=0; then i = i + 1; while i < size:
4487 for i := 1; then i=i+1; while i < size:
4488 for j:=i-1; then j=j-1; while j >= 0:
4489 if list[j] > list[j+1]:
4493 print " After sort:",
4494 for i:=0; then i = i + 1; while i < size:
4498 if 1 == 2 then print "yes"; else print "no"
4502 bob.alive = (bob.name == "Hello")
4503 print "bob", "is" if bob.alive else "isn't", "alive"