1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators (which have since been remove), and the
41 "if ... else" trinary operator which can select between two expressions
42 based on a third (which appears syntactically in the middle).
44 The "func" clause currently only allows a "main" function to be
45 declared. That will be extended when proper function support is added.
47 An element that is present purely to make a usable language, and
48 without any expectation that they will remain, is the "print" statement
49 which performs simple output.
51 The current scalar types are "number", "Boolean", and "string".
52 Boolean will likely stay in its current form, the other two might, but
53 could just as easily be changed.
57 Versions of the interpreter which obviously do not support a complete
58 language will be named after creeks and streams. This one is Jamison
61 Once we have something reasonably resembling a complete language, the
62 names of rivers will be used.
63 Early versions of the compiler will be named after seas. Major
64 releases of the compiler will be named after oceans. Hopefully I will
65 be finished once I get to the Pacific Ocean release.
69 As well as parsing and executing a program, the interpreter can print
70 out the program from the parsed internal structure. This is useful
71 for validating the parsing.
72 So the main requirements of the interpreter are:
74 - Parse the program, possibly with tracing,
75 - Analyse the parsed program to ensure consistency,
77 - Execute the "main" function in the program, if no parsing or
78 consistency errors were found.
80 This is all performed by a single C program extracted with
83 There will be two formats for printing the program: a default and one
84 that uses bracketing. So a `--bracket` command line option is needed
85 for that. Normally the first code section found is used, however an
86 alternate section can be requested so that a file (such as this one)
87 can contain multiple programs. This is effected with the `--section`
90 This code must be compiled with `-fplan9-extensions` so that anonymous
91 structures can be used.
93 ###### File: oceani.mk
95 myCFLAGS := -Wall -g -fplan9-extensions
96 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
97 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
98 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
100 all :: $(LDLIBS) oceani
101 oceani.c oceani.h : oceani.mdc parsergen
102 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
103 oceani.mk: oceani.mdc md2c
106 oceani: oceani.o $(LDLIBS)
107 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
109 ###### Parser: header
111 struct parse_context;
114 struct parse_context {
115 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
130 ###### Parser: reduce
131 struct parse_context *c = config2context(config);
139 #include <sys/mman.h>
158 static char Usage[] =
159 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
160 static const struct option long_options[] = {
161 {"trace", 0, NULL, 't'},
162 {"print", 0, NULL, 'p'},
163 {"noexec", 0, NULL, 'n'},
164 {"brackets", 0, NULL, 'b'},
165 {"section", 1, NULL, 's'},
168 const char *options = "tpnbs";
170 static void pr_err(char *msg) // NOTEST
172 fprintf(stderr, "%s\n", msg); // NOTEST
175 int main(int argc, char *argv[])
180 struct section *s = NULL, *ss;
181 char *section = NULL;
182 struct parse_context context = {
184 .ignored = (1 << TK_mark),
185 .number_chars = ".,_+- ",
190 int doprint=0, dotrace=0, doexec=1, brackets=0;
192 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
195 case 't': dotrace=1; break;
196 case 'p': doprint=1; break;
197 case 'n': doexec=0; break;
198 case 'b': brackets=1; break;
199 case 's': section = optarg; break;
200 default: fprintf(stderr, Usage);
204 if (optind >= argc) {
205 fprintf(stderr, "oceani: no input file given\n");
208 fd = open(argv[optind], O_RDONLY);
210 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
213 context.file_name = argv[optind];
214 len = lseek(fd, 0, 2);
215 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
216 s = code_extract(file, file+len, pr_err);
218 fprintf(stderr, "oceani: could not find any code in %s\n",
223 ## context initialization
226 for (ss = s; ss; ss = ss->next) {
227 struct text sec = ss->section;
228 if (sec.len == strlen(section) &&
229 strncmp(sec.txt, section, sec.len) == 0)
233 fprintf(stderr, "oceani: cannot find section %s\n",
240 fprintf(stderr, "oceani: no code found in requested section\n"); // NOTEST
241 goto cleanup; // NOTEST
244 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
246 resolve_consts(&context);
247 prepare_types(&context);
248 if (!context.parse_error && !analyse_funcs(&context)) {
249 fprintf(stderr, "oceani: type error in program - not running.\n");
250 context.parse_error += 1;
258 if (doexec && !context.parse_error)
259 interp_main(&context, argc - optind, argv + optind);
262 struct section *t = s->next;
267 // FIXME parser should pop scope even on error
268 while (context.scope_depth > 0)
272 ## free context types
273 ## free context storage
274 exit(context.parse_error ? 1 : 0);
279 The four requirements of parse, analyse, print, interpret apply to
280 each language element individually so that is how most of the code
283 Three of the four are fairly self explanatory. The one that requires
284 a little explanation is the analysis step.
286 The current language design does not require the types of variables to
287 be declared, but they must still have a single type. Different
288 operations impose different requirements on the variables, for example
289 addition requires both arguments to be numeric, and assignment
290 requires the variable on the left to have the same type as the
291 expression on the right.
293 Analysis involves propagating these type requirements around and
294 consequently setting the type of each variable. If any requirements
295 are violated (e.g. a string is compared with a number) or if a
296 variable needs to have two different types, then an error is raised
297 and the program will not run.
299 If the same variable is declared in both branchs of an 'if/else', or
300 in all cases of a 'switch' then the multiple instances may be merged
301 into just one variable if the variable is referenced after the
302 conditional statement. When this happens, the types must naturally be
303 consistent across all the branches. When the variable is not used
304 outside the if, the variables in the different branches are distinct
305 and can be of different types.
307 Undeclared names may only appear in "use" statements and "case" expressions.
308 These names are given a type of "label" and a unique value.
309 This allows them to fill the role of a name in an enumerated type, which
310 is useful for testing the `switch` statement.
312 As we will see, the condition part of a `while` statement can return
313 either a Boolean or some other type. This requires that the expected
314 type that gets passed around comprises a type and a flag to indicate
315 that `Tbool` is also permitted.
317 As there are, as yet, no distinct types that are compatible, there
318 isn't much subtlety in the analysis. When we have distinct number
319 types, this will become more interesting.
323 When analysis discovers an inconsistency it needs to report an error;
324 just refusing to run the code ensures that the error doesn't cascade,
325 but by itself it isn't very useful. A clear understanding of the sort
326 of error message that are useful will help guide the process of
329 At a simplistic level, the only sort of error that type analysis can
330 report is that the type of some construct doesn't match a contextual
331 requirement. For example, in `4 + "hello"` the addition provides a
332 contextual requirement for numbers, but `"hello"` is not a number. In
333 this particular example no further information is needed as the types
334 are obvious from local information. When a variable is involved that
335 isn't the case. It may be helpful to explain why the variable has a
336 particular type, by indicating the location where the type was set,
337 whether by declaration or usage.
339 Using a recursive-descent analysis we can easily detect a problem at
340 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
341 will detect that one argument is not a number and the usage of `hello`
342 will detect that a number was wanted, but not provided. In this
343 (early) version of the language, we will generate error reports at
344 multiple locations, so the use of `hello` will report an error and
345 explain were the value was set, and the addition will report an error
346 and say why numbers are needed. To be able to report locations for
347 errors, each language element will need to record a file location
348 (line and column) and each variable will need to record the language
349 element where its type was set. For now we will assume that each line
350 of an error message indicates one location in the file, and up to 2
351 types. So we provide a `printf`-like function which takes a format, a
352 location (a `struct exec` which has not yet been introduced), and 2
353 types. "`%1`" reports the first type, "`%2`" reports the second. We
354 will need a function to print the location, once we know how that is
355 stored. e As will be explained later, there are sometimes extra rules for
356 type matching and they might affect error messages, we need to pass those
359 As well as type errors, we sometimes need to report problems with
360 tokens, which might be unexpected or might name a type that has not
361 been defined. For these we have `tok_err()` which reports an error
362 with a given token. Each of the error functions sets the flag in the
363 context so indicate that parsing failed.
367 static void fput_loc(struct exec *loc, FILE *f);
368 static void type_err(struct parse_context *c,
369 char *fmt, struct exec *loc,
370 struct type *t1, enum val_rules rules, struct type *t2);
371 static void tok_err(struct parse_context *c, char *fmt, struct token *t);
373 ###### core functions
375 static void type_err(struct parse_context *c,
376 char *fmt, struct exec *loc,
377 struct type *t1, enum val_rules rules, struct type *t2)
379 fprintf(stderr, "%s:", c->file_name);
380 fput_loc(loc, stderr);
381 for (; *fmt ; fmt++) {
388 case '%': fputc(*fmt, stderr); break; // NOTEST
389 default: fputc('?', stderr); break; // NOTEST
391 type_print(t1, stderr);
394 type_print(t2, stderr);
403 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
405 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
406 t->txt.len, t->txt.txt);
410 ## Entities: declared and predeclared.
412 There are various "things" that the language and/or the interpreter
413 needs to know about to parse and execute a program. These include
414 types, variables, values, and executable code. These are all lumped
415 together under the term "entities" (calling them "objects" would be
416 confusing) and introduced here. The following section will present the
417 different specific code elements which comprise or manipulate these
422 Executables can be lots of different things. In many cases an
423 executable is just an operation combined with one or two other
424 executables. This allows for expressions and lists etc. Other times an
425 executable is something quite specific like a constant or variable name.
426 So we define a `struct exec` to be a general executable with a type, and
427 a `struct binode` which is a subclass of `exec`, forms a node in a
428 binary tree, and holds an operation. The simplest operation is "List"
429 which can be used to combine several execs together.
431 There will be other subclasses, and to access these we need to be able
432 to `cast` the `exec` into the various other types. The first field in
433 any `struct exec` is the type from the `exec_types` enum.
436 #define cast(structname, pointer) ({ \
437 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
438 if (__mptr && *__mptr != X##structname) abort(); \
439 (struct structname *)( (char *)__mptr);})
441 #define new(structname) ({ \
442 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
443 __ptr->type = X##structname; \
444 __ptr->line = -1; __ptr->column = -1; \
447 #define new_pos(structname, token) ({ \
448 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
449 __ptr->type = X##structname; \
450 __ptr->line = token.line; __ptr->column = token.col; \
459 enum exec_types type;
469 struct exec *left, *right;
474 static int __fput_loc(struct exec *loc, FILE *f)
478 if (loc->line >= 0) {
479 fprintf(f, "%d:%d: ", loc->line, loc->column);
482 if (loc->type == Xbinode)
483 return __fput_loc(cast(binode,loc)->left, f) ||
484 __fput_loc(cast(binode,loc)->right, f); // NOTEST
487 static void fput_loc(struct exec *loc, FILE *f)
489 if (!__fput_loc(loc, f))
490 fprintf(f, "??:??: "); // NOTEST
493 Each different type of `exec` node needs a number of functions defined,
494 a bit like methods. We must be able to free it, print it, analyse it
495 and execute it. Once we have specific `exec` types we will need to
496 parse them too. Let's take this a bit more slowly.
500 The parser generator requires a `free_foo` function for each struct
501 that stores attributes and they will often be `exec`s and subtypes
502 there-of. So we need `free_exec` which can handle all the subtypes,
503 and we need `free_binode`.
507 static void free_binode(struct binode *b)
516 ###### core functions
517 static void free_exec(struct exec *e)
528 static void free_exec(struct exec *e);
530 ###### free exec cases
531 case Xbinode: free_binode(cast(binode, e)); break;
535 Printing an `exec` requires that we know the current indent level for
536 printing line-oriented components. As will become clear later, we
537 also want to know what sort of bracketing to use. It will also be used
538 to sometime print comments after an exec to explain some of the results
543 static void do_indent(int i, char *str)
550 ###### core functions
551 static void print_binode(struct binode *b, int indent, int bracket)
555 case List: abort(); // must be handled by parent NOTEST
556 ## print binode cases
560 static void print_exec(struct exec *e, int indent, int bracket)
566 print_binode(cast(binode, e), indent, bracket); break;
574 static void print_exec(struct exec *e, int indent, int bracket);
578 As discussed, analysis involves propagating type requirements around the
579 program and looking for errors.
581 So `propagate_types` is passed an expected type (being a `struct type`
582 pointer together with some `val_rules` flags) that the `exec` is
583 expected to return, and returns the type that it does return, either of
584 which can be `NULL` signifying "unknown". A `prop_err` flag set is
585 passed by reference. It has `Efail` set when an error is found, and
586 `Eretry` when the type for some element is set via propagation. If
587 any expression cannot be evaluated a compile time, `Eruntime` is set.
588 If the expression can be copied, `Emaycopy` is set.
590 If `Erval` is set, then the value cannot be assigned to because it is
591 a temporary result. If `Erval` is clear but `Econst` is set, then
592 the value can only be assigned once, when the variable is declared.
596 enum val_rules {Rboolok = 1<<0, Rrefok = 1<<1,};
597 enum prop_err {Efail = 1<<0, Eretry = 1<<1, Eruntime = 1<<2,
598 Emaycopy = 1<<3, Erval = 1<<4, Econst = 1<<5};
601 static struct type *propagate_types(struct exec *prog, struct parse_context *c, enum prop_err *perr,
602 struct type *type, enum val_rules rules);
603 ###### core functions
605 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, enum prop_err *perr,
606 enum prop_err *perr_local,
607 struct type *type, enum val_rules rules)
614 switch (prog->type) {
617 struct binode *b = cast(binode, prog);
619 case List: abort(); // NOTEST
620 ## propagate binode cases
624 ## propagate exec cases
629 static struct type *propagate_types(struct exec *prog, struct parse_context *c, enum prop_err *perr,
630 struct type *type, enum val_rules rules)
632 int pre_err = c->parse_error;
633 enum prop_err perr_local = 0;
634 struct type *ret = __propagate_types(prog, c, perr, &perr_local, type, rules);
636 *perr |= perr_local & (Efail | Eretry);
637 if (c->parse_error > pre_err)
644 Interpreting an `exec` doesn't require anything but the `exec`. State
645 is stored in variables and each variable will be directly linked from
646 within the `exec` tree. The exception to this is the `main` function
647 which needs to look at command line arguments. This function will be
648 interpreted separately.
650 Each `exec` can return a value combined with a type in `struct lrval`.
651 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
652 the location of a value, which can be updated, in `lval`. Others will
653 set `lval` to NULL indicating that there is a value of appropriate type
657 static struct value interp_exec(struct parse_context *c, struct exec *e,
658 struct type **typeret);
659 ###### core functions
663 struct value rval, *lval;
666 /* If dest is passed, dtype must give the expected type, and
667 * result can go there, in which case type is returned as NULL.
669 static struct lrval _interp_exec(struct parse_context *c, struct exec *e,
670 struct value *dest, struct type *dtype);
672 static struct value interp_exec(struct parse_context *c, struct exec *e,
673 struct type **typeret)
675 struct lrval ret = _interp_exec(c, e, NULL, NULL);
677 if (!ret.type) abort();
681 dup_value(ret.type, ret.lval, &ret.rval);
685 static struct value *linterp_exec(struct parse_context *c, struct exec *e,
686 struct type **typeret)
688 struct lrval ret = _interp_exec(c, e, NULL, NULL);
690 if (!ret.type) abort();
694 free_value(ret.type, &ret.rval);
698 /* dinterp_exec is used when the destination type is certain and
699 * the value has a place to go.
701 static void dinterp_exec(struct parse_context *c, struct exec *e,
702 struct value *dest, struct type *dtype,
705 struct lrval ret = _interp_exec(c, e, dest, dtype);
709 free_value(dtype, dest);
711 dup_value(dtype, ret.lval, dest);
713 memcpy(dest, &ret.rval, dtype->size);
716 static struct lrval _interp_exec(struct parse_context *c, struct exec *e,
717 struct value *dest, struct type *dtype)
719 /* If the result is copied to dest, ret.type is set to NULL */
721 struct value rv = {}, *lrv = NULL;
724 rvtype = ret.type = Tnone;
734 struct binode *b = cast(binode, e);
735 struct value left, right, *lleft;
736 struct type *ltype, *rtype;
737 ltype = rtype = Tnone;
739 case List: abort(); // NOTEST
740 ## interp binode cases
742 free_value(ltype, &left);
743 free_value(rtype, &right);
753 ## interp exec cleanup
759 Values come in a wide range of types, with more likely to be added.
760 Each type needs to be able to print its own values (for convenience at
761 least) as well as to compare two values, at least for equality and
762 possibly for order. For now, values might need to be duplicated and
763 freed, though eventually such manipulations will be better integrated
766 Rather than requiring every numeric type to support all numeric
767 operations (add, multiply, etc), we allow types to be able to present
768 as one of a few standard types: integer, float, and fraction. The
769 existence of these conversion functions eventually enable types to
770 determine if they are compatible with other types, though such types
771 have not yet been implemented.
773 Named type are stored in a simple linked list. Objects of each type are
774 "values" which are often passed around by value.
776 There are both explicitly named types, and anonymous types. Anonymous
777 cannot be accessed by name, but are used internally and have a name
778 which might be reported in error messages.
785 ## value union fields
793 struct token first_use;
796 void (*init)(struct type *type, struct value *val);
797 int (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
798 void (*print)(struct type *type, struct value *val, FILE *f);
799 void (*print_type)(struct type *type, FILE *f);
800 int (*cmp_order)(struct type *t1, struct type *t2,
801 struct value *v1, struct value *v2);
802 int (*cmp_eq)(struct type *t1, struct type *t2,
803 struct value *v1, struct value *v2);
804 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
805 int (*test)(struct type *type, struct value *val);
806 void (*free)(struct type *type, struct value *val);
807 void (*free_type)(struct type *t);
808 long long (*to_int)(struct value *v);
809 double (*to_float)(struct value *v);
810 int (*to_mpq)(mpq_t *q, struct value *v);
819 struct type *typelist;
826 static struct type *find_type(struct parse_context *c, struct text s)
828 struct type *t = c->typelist;
830 while (t && (t->anon ||
831 text_cmp(t->name, s) != 0))
836 static struct type *_add_type(struct parse_context *c, struct text s,
837 struct type *proto, int anon)
841 n = calloc(1, sizeof(*n));
848 n->next = c->typelist;
853 static struct type *add_type(struct parse_context *c, struct text s,
856 return _add_type(c, s, proto, 0);
859 static struct type *add_anon_type(struct parse_context *c,
860 struct type *proto, char *name, ...)
866 vasprintf(&t.txt, name, ap);
868 t.len = strlen(t.txt);
869 return _add_type(c, t, proto, 1);
872 static struct type *find_anon_type(struct parse_context *c,
873 struct type *proto, char *name, ...)
875 struct type *t = c->typelist;
880 vasprintf(&nm.txt, name, ap);
882 nm.len = strlen(name);
884 while (t && (!t->anon ||
885 text_cmp(t->name, nm) != 0))
891 return _add_type(c, nm, proto, 1);
894 static void free_type(struct type *t)
896 /* The type is always a reference to something in the
897 * context, so we don't need to free anything.
901 static void free_value(struct type *type, struct value *v)
905 memset(v, 0x5a, type->size);
909 static void type_print(struct type *type, FILE *f)
912 fputs("*unknown*type*", f); // NOTEST
913 else if (type->name.len && !type->anon)
914 fprintf(f, "%.*s", type->name.len, type->name.txt);
915 else if (type->print_type)
916 type->print_type(type, f);
917 else if (type->name.len && type->anon)
918 fprintf(f, "\"%.*s\"", type->name.len, type->name.txt);
920 fputs("*invalid*type*", f); // NOTEST
923 static void val_init(struct type *type, struct value *val)
925 if (type && type->init)
926 type->init(type, val);
929 static void dup_value(struct type *type,
930 struct value *vold, struct value *vnew)
932 if (type && type->dup)
933 type->dup(type, vold, vnew);
936 static int value_cmp(struct type *tl, struct type *tr,
937 struct value *left, struct value *right)
939 if (tl && tl->cmp_order)
940 return tl->cmp_order(tl, tr, left, right);
941 if (tl && tl->cmp_eq)
942 return tl->cmp_eq(tl, tr, left, right);
946 static void print_value(struct type *type, struct value *v, FILE *f)
948 if (type && type->print)
949 type->print(type, v, f);
951 fprintf(f, "*Unknown*"); // NOTEST
954 static void prepare_types(struct parse_context *c)
958 enum { none, some, cannot } progress = none;
963 for (t = c->typelist; t; t = t->next) {
965 tok_err(c, "error: type used but not declared",
967 if (t->size == 0 && t->prepare_type) {
968 if (t->prepare_type(c, t, 1))
970 else if (progress == cannot)
971 tok_err(c, "error: type has recursive definition",
981 progress = cannot; break;
983 progress = none; break;
990 static void free_value(struct type *type, struct value *v);
991 static int type_compat(struct type *require, struct type *have, enum val_rules rules);
992 static void type_print(struct type *type, FILE *f);
993 static void val_init(struct type *type, struct value *v);
994 static void dup_value(struct type *type,
995 struct value *vold, struct value *vnew);
996 static int value_cmp(struct type *tl, struct type *tr,
997 struct value *left, struct value *right);
998 static void print_value(struct type *type, struct value *v, FILE *f);
1000 ###### free context types
1002 while (context.typelist) {
1003 struct type *t = context.typelist;
1005 context.typelist = t->next;
1013 Type can be specified for local variables, for fields in a structure,
1014 for formal parameters to functions, and possibly elsewhere. Different
1015 rules may apply in different contexts. As a minimum, a named type may
1016 always be used. Currently the type of a formal parameter can be
1017 different from types in other contexts, so we have a separate grammar
1023 Type -> IDENTIFIER ${
1024 $0 = find_type(c, $ID.txt);
1026 $0 = add_type(c, $ID.txt, NULL);
1027 $0->first_use = $ID;
1032 FormalType -> Type ${ $0 = $<1; }$
1033 ## formal type grammar
1037 Values of the base types can be numbers, which we represent as
1038 multi-precision fractions, strings, Booleans and labels. When
1039 analysing the program we also need to allow for places where no value
1040 is meaningful (type `Tnone`) and where we don't know what type to
1041 expect yet (type is `NULL`).
1043 Values are never shared, they are always copied when used, and freed
1044 when no longer needed.
1046 When propagating type information around the program, we need to
1047 determine if two types are compatible, where type `NULL` is compatible
1048 with anything. There are two special cases with type compatibility,
1049 both related to the Conditional Statement which will be described
1050 later. In some cases a Boolean can be accepted as well as some other
1051 primary type, and in others any type is acceptable except a label (`Vlabel`).
1052 A separate function encoding these cases will simplify some code later.
1054 ###### type functions
1056 int (*compat)(struct type *this, struct type *other, enum val_rules rules);
1058 ###### ast functions
1060 static int type_compat(struct type *require, struct type *have,
1061 enum val_rules rules)
1063 if ((rules & Rboolok) && have == Tbool)
1065 if (!require || !have)
1068 if (require->compat)
1069 return require->compat(require, have, rules);
1071 return require == have;
1076 #include "parse_string.h"
1077 #include "parse_number.h"
1080 myLDLIBS := libnumber.o libstring.o -lgmp
1081 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
1083 ###### type union fields
1084 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
1086 ###### value union fields
1092 ###### ast functions
1093 static void _free_value(struct type *type, struct value *v)
1097 switch (type->vtype) {
1099 case Vstr: free(v->str.txt); break;
1100 case Vnum: mpq_clear(v->num); break;
1106 ###### value functions
1108 static void _val_init(struct type *type, struct value *val)
1110 switch(type->vtype) {
1111 case Vnone: // NOTEST
1114 mpq_init(val->num); break;
1116 val->str.txt = malloc(1);
1123 val->label = 0; // NOTEST
1128 static void _dup_value(struct type *type,
1129 struct value *vold, struct value *vnew)
1131 switch (type->vtype) {
1132 case Vnone: // NOTEST
1135 vnew->label = vold->label; // NOTEST
1138 vnew->bool = vold->bool;
1141 mpq_init(vnew->num);
1142 mpq_set(vnew->num, vold->num);
1145 vnew->str.len = vold->str.len;
1146 vnew->str.txt = malloc(vnew->str.len);
1147 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
1152 static int _value_cmp(struct type *tl, struct type *tr,
1153 struct value *left, struct value *right)
1158 switch (tl->vtype) {
1159 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
1160 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
1161 case Vstr: cmp = text_cmp(left->str, right->str); break;
1162 case Vbool: cmp = left->bool - right->bool; break;
1163 case Vnone: cmp = 0; // NOTEST
1168 static void _print_value(struct type *type, struct value *v, FILE *f)
1170 switch (type->vtype) {
1171 case Vnone: // NOTEST
1172 fprintf(f, "*no-value*"); break; // NOTEST
1173 case Vlabel: // NOTEST
1174 fprintf(f, "*label-%d*", v->label); break; // NOTEST
1176 fprintf(f, "%.*s", v->str.len, v->str.txt); break;
1178 fprintf(f, "%s", v->bool ? "True":"False"); break;
1183 mpf_set_q(fl, v->num);
1184 gmp_fprintf(f, "%.10Fg", fl);
1191 static void _free_value(struct type *type, struct value *v);
1193 static int bool_test(struct type *type, struct value *v)
1198 static struct type base_prototype = {
1200 .print = _print_value,
1201 .cmp_order = _value_cmp,
1202 .cmp_eq = _value_cmp,
1204 .free = _free_value,
1207 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
1209 ###### ast functions
1210 static struct type *add_base_type(struct parse_context *c, char *n,
1211 enum vtype vt, int size)
1213 struct text txt = { n, strlen(n) };
1216 t = add_type(c, txt, &base_prototype);
1219 t->align = size > sizeof(void*) ? sizeof(void*) : size;
1220 if (t->size & (t->align - 1))
1221 t->size = (t->size | (t->align - 1)) + 1; // NOTEST
1225 ###### context initialization
1227 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
1228 Tbool->test = bool_test;
1229 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
1230 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
1231 Tnone = add_base_type(&context, "none", Vnone, 0);
1232 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
1236 We have already met values as separate objects. When manifest constants
1237 appear in the program text, that must result in an executable which has
1238 a constant value. So the `val` structure embeds a value in an
1251 ###### ast functions
1252 struct val *new_val(struct type *T, struct token tk)
1254 struct val *v = new_pos(val, tk);
1259 ###### declare terminals
1266 $0 = new_val(Tbool, $1);
1270 $0 = new_val(Tbool, $1);
1275 $0 = new_val(Tnum, $1);
1276 if (number_parse($0->val.num, tail, $1.txt) == 0) {
1277 mpq_init($0->val.num);
1278 tok_err(c, "error: unsupported number format", &$NUM);
1280 tok_err(c, "error: unsupported number suffix", &$1);
1284 $0 = new_val(Tstr, $1);
1285 string_parse(&$1, '\\', &$0->val.str, tail);
1287 tok_err(c, "error: unsupported string suffix",
1292 $0 = new_val(Tstr, $1);
1293 string_parse(&$1, '\\', &$0->val.str, tail);
1295 tok_err(c, "error: unsupported string suffix",
1299 ###### print exec cases
1302 struct val *v = cast(val, e);
1303 if (v->vtype == Tstr)
1305 // FIXME how to ensure numbers have same precision.
1306 print_value(v->vtype, &v->val, stdout);
1307 if (v->vtype == Tstr)
1312 ###### propagate exec cases
1315 struct val *val = cast(val, prog);
1316 if (!type_compat(type, val->vtype, rules))
1317 type_err(c, "error: expected %1 found %2",
1318 prog, type, rules, val->vtype);
1323 ###### interp exec cases
1325 rvtype = cast(val, e)->vtype;
1326 dup_value(rvtype, &cast(val, e)->val, &rv);
1329 ###### ast functions
1330 static void free_val(struct val *v)
1333 free_value(v->vtype, &v->val);
1337 ###### free exec cases
1338 case Xval: free_val(cast(val, e)); break;
1340 ###### ast functions
1341 // Move all nodes from 'b' to 'rv', reversing their order.
1342 // In 'b' 'left' is a list, and 'right' is the last node.
1343 // In 'rv', left' is the first node and 'right' is a list.
1344 static struct binode *reorder_bilist(struct binode *b)
1346 struct binode *rv = NULL;
1349 struct exec *t = b->right;
1353 b = cast(binode, b->left);
1363 Labels are a temporary concept until I implement enums. There are an
1364 anonymous enum which is declared by usage. Thet are only allowed in
1365 `use` statements and corresponding `case` entries. They appear as a
1366 period followed by an identifier. All identifiers that are "used" must
1369 For now, we have a global list of labels, and don't check that all "use"
1381 ###### free exec cases
1385 ###### print exec cases
1387 struct label *l = cast(label, e);
1388 printf(".%.*s", l->name.len, l->name.txt);
1394 struct labels *next;
1398 ###### parse context
1399 struct labels *labels;
1401 ###### ast functions
1402 static int label_lookup(struct parse_context *c, struct text name)
1404 struct labels *l, **lp = &c->labels;
1405 while (*lp && text_cmp((*lp)->name, name) < 0)
1407 if (*lp && text_cmp((*lp)->name, name) == 0)
1408 return (*lp)->value;
1409 l = calloc(1, sizeof(*l));
1412 if (c->next_label == 0)
1414 l->value = c->next_label;
1420 ###### free context storage
1421 while (context.labels) {
1422 struct labels *l = context.labels;
1423 context.labels = l->next;
1427 ###### declare terminals
1431 struct label *l = new_pos(label, $ID);
1435 ###### propagate exec cases
1437 struct label *l = cast(label, prog);
1438 l->value = label_lookup(c, l->name);
1439 if (!type_compat(type, Tlabel, rules))
1440 type_err(c, "error: expected %1 found %2",
1441 prog, type, rules, Tlabel);
1445 ###### interp exec cases
1447 struct label *l = cast(label, e);
1448 rv.label = l->value;
1456 Variables are scoped named values. We store the names in a linked list
1457 of "bindings" sorted in lexical order, and use sequential search and
1464 struct binding *next; // in lexical order
1468 This linked list is stored in the parse context so that "reduce"
1469 functions can find or add variables, and so the analysis phase can
1470 ensure that every variable gets a type.
1472 ###### parse context
1474 struct binding *varlist; // In lexical order
1476 ###### ast functions
1478 static struct binding *find_binding(struct parse_context *c, struct text s)
1480 struct binding **l = &c->varlist;
1485 (cmp = text_cmp((*l)->name, s)) < 0)
1489 n = calloc(1, sizeof(*n));
1496 Each name can be linked to multiple variables defined in different
1497 scopes. Each scope starts where the name is declared and continues
1498 until the end of the containing code block. Scopes of a given name
1499 cannot nest, so a declaration while a name is in-scope is an error.
1501 ###### binding fields
1502 struct variable *var;
1506 struct variable *previous;
1508 struct binding *name;
1509 struct exec *where_decl;// where name was declared
1510 struct exec *where_set; // where type was set
1514 When a scope closes, the values of the variables might need to be freed.
1515 This happens in the context of some `struct exec` and each `exec` will
1516 need to know which variables need to be freed when it completes. To
1517 improve visibility, we add a comment when printing any `exec` that
1518 embodies a scope to list the variables that must be freed when it ends.
1521 struct variable *to_free;
1523 ####### variable fields
1524 struct exec *cleanup_exec;
1525 struct variable *next_free;
1527 ####### interp exec cleanup
1530 for (v = e->to_free; v; v = v->next_free) {
1531 struct value *val = var_value(c, v);
1532 free_value(v->type, val);
1536 ###### print exec extras
1539 do_indent(indent, "/* FREE");
1540 for (v = e->to_free; v; v = v->next_free) {
1541 printf(" %.*s", v->name->name.len, v->name->name.txt);
1542 printf("[%d,%d]", v->scope_start, v->scope_end);
1543 if (v->frame_pos >= 0)
1544 printf("(%d+%d)", v->frame_pos,
1545 v->type ? v->type->size:0);
1550 ###### ast functions
1551 static void variable_unlink_exec(struct variable *v)
1553 struct variable **vp;
1554 if (!v->cleanup_exec)
1556 for (vp = &v->cleanup_exec->to_free;
1557 *vp; vp = &(*vp)->next_free) {
1561 v->cleanup_exec = NULL;
1566 While the naming seems strange, we include local constants in the
1567 definition of variables. A name declared `var := value` can
1568 subsequently be changed, but a name declared `var ::= value` cannot -
1571 ###### variable fields
1574 Scopes in parallel branches can be partially merged. More
1575 specifically, if a given name is declared in both branches of an
1576 if/else then its scope is a candidate for merging. Similarly if
1577 every branch of an exhaustive switch (e.g. has an "else" clause)
1578 declares a given name, then the scopes from the branches are
1579 candidates for merging.
1581 Note that names declared inside a loop (which is only parallel to
1582 itself) are never visible after the loop. Similarly names defined in
1583 scopes which are not parallel, such as those started by `for` and
1584 `switch`, are never visible after the scope. Only variables defined in
1585 both `then` and `else` (including the implicit then after an `if`, and
1586 excluding `then` used with `for`) and in all `case`s and `else` of a
1587 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
1589 Labels, which are a bit like variables, follow different rules.
1590 Labels are not explicitly declared, but if an undeclared name appears
1591 in a context where a label is legal, that effectively declares the
1592 name as a label. The declaration remains in force (or in scope) at
1593 least to the end of the immediately containing block and conditionally
1594 in any larger containing block which does not declare the name in some
1595 other way. Importantly, the conditional scope extension happens even
1596 if the label is only used in one parallel branch of a conditional --
1597 when used in one branch it is treated as having been declared in all
1600 Merge candidates are tentatively visible beyond the end of the
1601 branching statement which creates them. If the name is used, the
1602 merge is affirmed and they become a single variable visible at the
1603 outer layer. If not - if it is redeclared first - the merge lapses.
1605 To track scopes we have an extra stack, implemented as a linked list,
1606 which roughly parallels the parse stack and which is used exclusively
1607 for scoping. When a new scope is opened, a new frame is pushed and
1608 the child-count of the parent frame is incremented. This child-count
1609 is used to distinguish between the first of a set of parallel scopes,
1610 in which declared variables must not be in scope, and subsequent
1611 branches, whether they may already be conditionally scoped.
1613 We need a total ordering of scopes so we can easily compare to variables
1614 to see if they are concurrently in scope. To achieve this we record a
1615 `scope_count` which is actually a count of both beginnings and endings
1616 of scopes. Then each variable has a record of the scope count where it
1617 enters scope, and where it leaves.
1619 To push a new frame *before* any code in the frame is parsed, we need a
1620 grammar reduction. This is most easily achieved with a grammar
1621 element which derives the empty string, and creates the new scope when
1622 it is recognised. This can be placed, for example, between a keyword
1623 like "if" and the code following it.
1627 struct scope *parent;
1631 ###### parse context
1634 struct scope *scope_stack;
1636 ###### variable fields
1637 int scope_start, scope_end;
1639 ###### ast functions
1640 static void scope_pop(struct parse_context *c)
1642 struct scope *s = c->scope_stack;
1644 c->scope_stack = s->parent;
1646 c->scope_depth -= 1;
1647 c->scope_count += 1;
1650 static void scope_push(struct parse_context *c)
1652 struct scope *s = calloc(1, sizeof(*s));
1654 c->scope_stack->child_count += 1;
1655 s->parent = c->scope_stack;
1657 c->scope_depth += 1;
1658 c->scope_count += 1;
1664 OpenScope -> ${ scope_push(c); }$
1666 Each variable records a scope depth and is in one of four states:
1668 - "in scope". This is the case between the declaration of the
1669 variable and the end of the containing block, and also between
1670 the usage with affirms a merge and the end of that block.
1672 The scope depth is not greater than the current parse context scope
1673 nest depth. When the block of that depth closes, the state will
1674 change. To achieve this, all "in scope" variables are linked
1675 together as a stack in nesting order.
1677 - "pending". The "in scope" block has closed, but other parallel
1678 scopes are still being processed. So far, every parallel block at
1679 the same level that has closed has declared the name.
1681 The scope depth is the depth of the last parallel block that
1682 enclosed the declaration, and that has closed.
1684 - "conditionally in scope". The "in scope" block and all parallel
1685 scopes have closed, and no further mention of the name has been seen.
1686 This state includes a secondary nest depth (`min_depth`) which records
1687 the outermost scope seen since the variable became conditionally in
1688 scope. If a use of the name is found, the variable becomes "in scope"
1689 and that secondary depth becomes the recorded scope depth. If the
1690 name is declared as a new variable, the old variable becomes "out of
1691 scope" and the recorded scope depth stays unchanged.
1693 - "out of scope". The variable is neither in scope nor conditionally
1694 in scope. It is permanently out of scope now and can be removed from
1695 the "in scope" stack. When a variable becomes out-of-scope it is
1696 moved to a separate list (`out_scope`) of variables which have fully
1697 known scope. This will be used at the end of each function to assign
1698 each variable a place in the stack frame.
1700 ###### variable fields
1701 int depth, min_depth;
1702 enum { OutScope, PendingScope, CondScope, InScope } scope;
1703 struct variable *in_scope;
1705 ###### parse context
1707 struct variable *in_scope;
1708 struct variable *out_scope;
1710 All variables with the same name are linked together using the
1711 'previous' link. Those variable that have been affirmatively merged all
1712 have a 'merged' pointer that points to one primary variable - the most
1713 recently declared instance. When merging variables, we need to also
1714 adjust the 'merged' pointer on any other variables that had previously
1715 been merged with the one that will no longer be primary.
1717 A variable that is no longer the most recent instance of a name may
1718 still have "pending" scope, if it might still be merged with most
1719 recent instance. These variables don't really belong in the
1720 "in_scope" list, but are not immediately removed when a new instance
1721 is found. Instead, they are detected and ignored when considering the
1722 list of in_scope names.
1724 The storage of the value of a variable will be described later. For now
1725 we just need to know that when a variable goes out of scope, it might
1726 need to be freed. For this we need to be able to find it, so assume that
1727 `var_value()` will provide that.
1729 ###### variable fields
1730 struct variable *merged;
1732 ###### ast functions
1734 static void variable_merge(struct variable *primary, struct variable *secondary)
1738 primary = primary->merged;
1740 for (v = primary->previous; v; v=v->previous)
1741 if (v == secondary || v == secondary->merged ||
1742 v->merged == secondary ||
1743 v->merged == secondary->merged) {
1744 v->scope = OutScope;
1745 v->merged = primary;
1746 if (v->scope_start < primary->scope_start)
1747 primary->scope_start = v->scope_start;
1748 if (v->scope_end > primary->scope_end)
1749 primary->scope_end = v->scope_end; // NOTEST
1750 variable_unlink_exec(v);
1754 ###### forward decls
1755 static struct value *var_value(struct parse_context *c, struct variable *v);
1757 ###### free global vars
1759 while (context.varlist) {
1760 struct binding *b = context.varlist;
1761 struct variable *v = b->var;
1762 context.varlist = b->next;
1765 struct variable *next = v->previous;
1767 if (v->global && v->frame_pos >= 0) {
1768 free_value(v->type, var_value(&context, v));
1769 if (v->depth == 0 && v->type->free == function_free)
1770 // This is a function constant
1771 free_exec(v->where_decl);
1778 #### Manipulating Bindings
1780 When a name is conditionally visible, a new declaration discards the old
1781 binding - the condition lapses. Similarly when we reach the end of a
1782 function (outermost non-global scope) any conditional scope must lapse.
1783 Conversely a usage of the name affirms the visibility and extends it to
1784 the end of the containing block - i.e. the block that contains both the
1785 original declaration and the latest usage. This is determined from
1786 `min_depth`. When a conditionally visible variable gets affirmed like
1787 this, it is also merged with other conditionally visible variables with
1790 When we parse a variable declaration we either report an error if the
1791 name is currently bound, or create a new variable at the current nest
1792 depth if the name is unbound or bound to a conditionally scoped or
1793 pending-scope variable. If the previous variable was conditionally
1794 scoped, it and its homonyms becomes out-of-scope.
1796 When we parse a variable reference (including non-declarative assignment
1797 "foo = bar") we report an error if the name is not bound or is bound to
1798 a pending-scope variable; update the scope if the name is bound to a
1799 conditionally scoped variable; or just proceed normally if the named
1800 variable is in scope.
1802 When we exit a scope, any variables bound at this level are either
1803 marked out of scope or pending-scoped, depending on whether the scope
1804 was sequential or parallel. Here a "parallel" scope means the "then"
1805 or "else" part of a conditional, or any "case" or "else" branch of a
1806 switch. Other scopes are "sequential".
1808 When exiting a parallel scope we check if there are any variables that
1809 were previously pending and are still visible. If there are, then
1810 they weren't redeclared in the most recent scope, so they cannot be
1811 merged and must become out-of-scope. If it is not the first of
1812 parallel scopes (based on `child_count`), we check that there was a
1813 previous binding that is still pending-scope. If there isn't, the new
1814 variable must now be out-of-scope.
1816 When exiting a sequential scope that immediately enclosed parallel
1817 scopes, we need to resolve any pending-scope variables. If there was
1818 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1819 we need to mark all pending-scope variable as out-of-scope. Otherwise
1820 all pending-scope variables become conditionally scoped.
1823 enum closetype { CloseSequential, CloseFunction, CloseParallel, CloseElse };
1825 ###### ast functions
1827 static struct variable *var_decl(struct parse_context *c, struct text s)
1829 struct binding *b = find_binding(c, s);
1830 struct variable *v = b->var;
1832 switch (v ? v->scope : OutScope) {
1834 /* Caller will report the error */
1838 v && v->scope == CondScope;
1840 v->scope = OutScope;
1844 v = calloc(1, sizeof(*v));
1845 v->previous = b->var;
1849 v->min_depth = v->depth = c->scope_depth;
1851 v->in_scope = c->in_scope;
1852 v->scope_start = c->scope_count;
1858 static struct variable *var_ref(struct parse_context *c, struct text s)
1860 struct binding *b = find_binding(c, s);
1861 struct variable *v = b->var;
1862 struct variable *v2;
1864 switch (v ? v->scope : OutScope) {
1867 /* Caller will report the error */
1870 /* All CondScope variables of this name need to be merged
1871 * and become InScope
1873 v->depth = v->min_depth;
1875 for (v2 = v->previous;
1876 v2 && v2->scope == CondScope;
1878 variable_merge(v, v2);
1886 static int var_refile(struct parse_context *c, struct variable *v)
1888 /* Variable just went out of scope. Add it to the out_scope
1889 * list, sorted by ->scope_start
1891 struct variable **vp = &c->out_scope;
1892 while ((*vp) && (*vp)->scope_start < v->scope_start)
1893 vp = &(*vp)->in_scope;
1899 static void var_block_close(struct parse_context *c, enum closetype ct,
1902 /* Close off all variables that are in_scope.
1903 * Some variables in c->scope may already be not-in-scope,
1904 * such as when a PendingScope variable is hidden by a new
1905 * variable with the same name.
1906 * So we check for v->name->var != v and drop them.
1907 * If we choose to make a variable OutScope, we drop it
1910 struct variable *v, **vp, *v2;
1913 for (vp = &c->in_scope;
1914 (v = *vp) && v->min_depth > c->scope_depth;
1915 (v->scope == OutScope || v->name->var != v)
1916 ? (*vp = v->in_scope, var_refile(c, v))
1917 : ( vp = &v->in_scope, 0)) {
1918 v->min_depth = c->scope_depth;
1919 if (v->name->var != v)
1920 /* This is still in scope, but we haven't just
1924 v->min_depth = c->scope_depth;
1925 if (v->scope == InScope)
1926 v->scope_end = c->scope_count;
1927 if (v->scope == InScope && e && !v->global) {
1928 /* This variable gets cleaned up when 'e' finishes */
1929 variable_unlink_exec(v);
1930 v->cleanup_exec = e;
1931 v->next_free = e->to_free;
1936 case CloseParallel: /* handle PendingScope */
1940 if (c->scope_stack->child_count == 1)
1941 /* first among parallel branches */
1942 v->scope = PendingScope;
1943 else if (v->previous &&
1944 v->previous->scope == PendingScope)
1945 /* all previous branches used name */
1946 v->scope = PendingScope;
1948 v->scope = OutScope;
1949 if (ct == CloseElse) {
1950 /* All Pending variables with this name
1951 * are now Conditional */
1953 v2 && v2->scope == PendingScope;
1955 v2->scope = CondScope;
1959 /* Not possible as it would require
1960 * parallel scope to be nested immediately
1961 * in a parallel scope, and that never
1965 /* Not possible as we already tested for
1972 if (v->scope == CondScope)
1973 /* Condition cannot continue past end of function */
1976 case CloseSequential:
1979 v->scope = OutScope;
1982 /* There was no 'else', so we can only become
1983 * conditional if we know the cases were exhaustive,
1984 * and that doesn't mean anything yet.
1985 * So only labels become conditional..
1988 v2 && v2->scope == PendingScope;
1990 v2->scope = OutScope;
1993 case OutScope: break;
2002 The value of a variable is store separately from the variable, on an
2003 analogue of a stack frame. There are (currently) two frames that can be
2004 active. A global frame which currently only stores constants, and a
2005 stacked frame which stores local variables. Each variable knows if it
2006 is global or not, and what its index into the frame is.
2008 Values in the global frame are known immediately they are relevant, so
2009 the frame needs to be reallocated as it grows so it can store those
2010 values. The local frame doesn't get values until the interpreted phase
2011 is started, so there is no need to allocate until the size is known.
2013 We initialize the `frame_pos` to an impossible value, so that we can
2014 tell if it was set or not later.
2016 ###### variable fields
2020 ###### variable init
2023 ###### parse context
2025 short global_size, global_alloc;
2027 void *global, *local;
2029 ###### forward decls
2030 static struct value *global_alloc(struct parse_context *c, struct type *t,
2031 struct variable *v, struct value *init);
2033 ###### ast functions
2035 static struct value *var_value(struct parse_context *c, struct variable *v)
2038 if (!c->local || !v->type)
2039 return NULL; // NOTEST
2040 if (v->frame_pos + v->type->size > c->local_size) {
2041 printf("INVALID frame_pos\n"); // NOTEST
2044 return c->local + v->frame_pos;
2046 if (c->global_size > c->global_alloc) {
2047 int old = c->global_alloc;
2048 c->global_alloc = (c->global_size | 1023) + 1024;
2049 c->global = realloc(c->global, c->global_alloc);
2050 memset(c->global + old, 0, c->global_alloc - old);
2052 return c->global + v->frame_pos;
2055 static struct value *global_alloc(struct parse_context *c, struct type *t,
2056 struct variable *v, struct value *init)
2059 struct variable scratch;
2061 if (t->prepare_type)
2062 t->prepare_type(c, t, 1); // NOTEST
2064 if (c->global_size & (t->align - 1))
2065 c->global_size = (c->global_size + t->align) & ~(t->align-1);
2070 v->frame_pos = c->global_size;
2072 c->global_size += v->type->size;
2073 ret = var_value(c, v);
2075 memcpy(ret, init, t->size);
2077 val_init(t, ret); // NOTEST
2081 As global values are found -- struct field initializers, labels etc --
2082 `global_alloc()` is called to record the value in the global frame.
2084 When the program is fully parsed, each function is analysed, we need to
2085 walk the list of variables local to that function and assign them an
2086 offset in the stack frame. For this we have `scope_finalize()`.
2088 We keep the stack from dense by re-using space for between variables
2089 that are not in scope at the same time. The `out_scope` list is sorted
2090 by `scope_start` and as we process a varible, we move it to an FIFO
2091 stack. For each variable we consider, we first discard any from the
2092 stack anything that went out of scope before the new variable came in.
2093 Then we place the new variable just after the one at the top of the
2096 ###### ast functions
2098 static void scope_finalize(struct parse_context *c, struct type *ft)
2100 int size = ft->function.local_size;
2101 struct variable *next = ft->function.scope;
2102 struct variable *done = NULL;
2105 struct variable *v = next;
2106 struct type *t = v->type;
2113 if (v->frame_pos >= 0)
2115 while (done && done->scope_end < v->scope_start)
2116 done = done->in_scope;
2118 pos = done->frame_pos + done->type->size;
2120 pos = ft->function.local_size;
2121 if (pos & (t->align - 1))
2122 pos = (pos + t->align) & ~(t->align-1);
2124 if (size < pos + v->type->size)
2125 size = pos + v->type->size;
2129 c->out_scope = NULL;
2130 ft->function.local_size = size;
2133 ###### free context storage
2134 free(context.global);
2136 #### Variables as executables
2138 Just as we used a `val` to wrap a value into an `exec`, we similarly
2139 need a `var` to wrap a `variable` into an exec. While each `val`
2140 contained a copy of the value, each `var` holds a link to the variable
2141 because it really is the same variable no matter where it appears.
2142 When a variable is used, we need to remember to follow the `->merged`
2143 link to find the primary instance.
2145 When a variable is declared, it may or may not be given an explicit
2146 type. We need to record which so that we can report the parsed code
2155 struct variable *var;
2158 ###### variable fields
2166 VariableDecl -> IDENTIFIER : ${ {
2167 struct variable *v = var_decl(c, $1.txt);
2168 $0 = new_pos(var, $1);
2173 v = var_ref(c, $1.txt);
2175 type_err(c, "error: variable '%v' redeclared",
2177 type_err(c, "info: this is where '%v' was first declared",
2178 v->where_decl, NULL, 0, NULL);
2181 | IDENTIFIER :: ${ {
2182 struct variable *v = var_decl(c, $1.txt);
2183 $0 = new_pos(var, $1);
2189 v = var_ref(c, $1.txt);
2191 type_err(c, "error: variable '%v' redeclared",
2193 type_err(c, "info: this is where '%v' was first declared",
2194 v->where_decl, NULL, 0, NULL);
2197 | IDENTIFIER : Type ${ {
2198 struct variable *v = var_decl(c, $1.txt);
2199 $0 = new_pos(var, $1);
2205 v->explicit_type = 1;
2207 v = var_ref(c, $1.txt);
2209 type_err(c, "error: variable '%v' redeclared",
2211 type_err(c, "info: this is where '%v' was first declared",
2212 v->where_decl, NULL, 0, NULL);
2215 | IDENTIFIER :: Type ${ {
2216 struct variable *v = var_decl(c, $1.txt);
2217 $0 = new_pos(var, $1);
2224 v->explicit_type = 1;
2226 v = var_ref(c, $1.txt);
2228 type_err(c, "error: variable '%v' redeclared",
2230 type_err(c, "info: this is where '%v' was first declared",
2231 v->where_decl, NULL, 0, NULL);
2236 Variable -> IDENTIFIER ${ {
2237 struct variable *v = var_ref(c, $1.txt);
2238 $0 = new_pos(var, $1);
2240 /* This might be a global const or a label
2241 * Allocate a var with impossible type Tnone,
2242 * which will be adjusted when we find out what it is,
2243 * or will trigger an error.
2245 v = var_decl(c, $1.txt);
2252 cast(var, $0)->var = v;
2255 ###### print exec cases
2258 struct var *v = cast(var, e);
2260 struct binding *b = v->var->name;
2261 printf("%.*s", b->name.len, b->name.txt);
2268 if (loc && loc->type == Xvar) {
2269 struct var *v = cast(var, loc);
2271 struct binding *b = v->var->name;
2272 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2274 fputs("???", stderr); // NOTEST
2276 fputs("NOTVAR", stderr); // NOTEST
2279 ###### propagate exec cases
2283 struct var *var = cast(var, prog);
2284 struct variable *v = var->var;
2286 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2287 return Tnone; // NOTEST
2290 if (v->type == Tnone && v->where_decl == prog)
2291 type_err(c, "error: variable used but not declared: %v",
2292 prog, NULL, 0, NULL);
2293 if (v->type == NULL) {
2294 if (type && !(*perr & Efail)) {
2296 v->where_set = prog;
2299 } else if (!type_compat(type, v->type, rules)) {
2300 type_err(c, "error: expected %1 but variable '%v' is %2", prog,
2301 type, rules, v->type);
2302 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2303 v->type, rules, NULL);
2305 if (!v->global || v->frame_pos < 0)
2312 ###### interp exec cases
2315 struct var *var = cast(var, e);
2316 struct variable *v = var->var;
2319 lrv = var_value(c, v);
2324 ###### ast functions
2326 static void free_var(struct var *v)
2331 ###### free exec cases
2332 case Xvar: free_var(cast(var, e)); break;
2337 Now that we have the shape of the interpreter in place we can add some
2338 complex types and connected them in to the data structures and the
2339 different phases of parse, analyse, print, interpret.
2341 Being "complex" the language will naturally have syntax to access
2342 specifics of objects of these types. These will fit into the grammar as
2343 "Terms" which are the things that are combined with various operators to
2344 form an "Expression". Where a Term is formed by some operation on another
2345 Term, the subordinate Term will always come first, so for example a
2346 member of an array will be expressed as the Term for the array followed
2347 by an index in square brackets. The strict rule of using postfix
2348 operations makes precedence irrelevant within terms. To provide a place
2349 to put the grammar for terms of each type, we will start out by
2350 introducing the "Term" grammar production, with contains at least a
2351 simple "Value" (to be explained later).
2353 We also take this opportunity to introduce the "ExpressionsList" which
2354 is a simple comma-separated list of expressions - it may be used in
2357 ###### declare terminals
2362 Term -> Value ${ $0 = $<1; }$
2363 | Variable ${ $0 = $<1; }$
2367 ExpressionList -> ExpressionList , Expression ${
2380 Thus far the complex types we have are arrays and structs.
2384 Arrays can be declared by giving a size and a type, as `[size]type' so
2385 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
2386 size can be either a literal number, or a named constant. Some day an
2387 arbitrary expression will be supported.
2389 As a formal parameter to a function, the array can be declared with a
2390 new variable as the size: `name:[size::number]string`. The `size`
2391 variable is set to the size of the array and must be a constant. As
2392 `number` is the only supported type, it can be left out:
2393 `name:[size::]string`.
2395 Arrays cannot be assigned. When pointers are introduced we will also
2396 introduce array slices which can refer to part or all of an array -
2397 the assignment syntax will create a slice. For now, an array can only
2398 ever be referenced by the name it is declared with. It is likely that
2399 a "`copy`" primitive will eventually be define which can be used to
2400 make a copy of an array with controllable recursive depth.
2402 For now we have two sorts of array, those with fixed size either because
2403 it is given as a literal number or because it is a struct member (which
2404 cannot have a runtime-changing size), and those with a size that is
2405 determined at runtime - local variables with a const size. The former
2406 have their size calculated at parse time, the latter at run time.
2408 For the latter type, the `size` field of the type is the size of a
2409 pointer, and the array is reallocated every time it comes into scope.
2411 We differentiate struct fields with a const size from local variables
2412 with a const size by whether they are prepared at parse time or not.
2414 ###### type union fields
2417 int unspec; // size is unspecified - vsize must be set.
2420 struct variable *vsize;
2421 struct type *member;
2424 ###### value union fields
2425 void *array; // used if not static_size
2427 ###### value functions
2429 static int array_prepare_type(struct parse_context *c, struct type *type,
2432 struct value *vsize;
2434 if (type->array.static_size)
2435 return 1; // NOTEST - guard against reentry
2436 if (type->array.unspec && parse_time)
2437 return 1; // NOTEST - unspec is still incomplete
2438 if (parse_time && type->array.vsize && !type->array.vsize->global)
2439 return 1; // NOTEST - should be impossible
2441 if (type->array.vsize) {
2442 vsize = var_value(c, type->array.vsize);
2444 return 1; // NOTEST - should be impossible
2446 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
2447 type->array.size = mpz_get_si(q);
2452 if (type->array.member->size <= 0)
2453 return 0; // NOTEST - error caught before here
2455 type->array.static_size = 1;
2456 type->size = type->array.size * type->array.member->size;
2457 type->align = type->array.member->align;
2462 static void array_init(struct type *type, struct value *val)
2465 void *ptr = val->ptr;
2469 if (!type->array.static_size) {
2470 val->array = calloc(type->array.size,
2471 type->array.member->size);
2474 for (i = 0; i < type->array.size; i++) {
2476 v = (void*)ptr + i * type->array.member->size;
2477 val_init(type->array.member, v);
2481 static void array_free(struct type *type, struct value *val)
2484 void *ptr = val->ptr;
2486 if (!type->array.static_size)
2488 for (i = 0; i < type->array.size; i++) {
2490 v = (void*)ptr + i * type->array.member->size;
2491 free_value(type->array.member, v);
2493 if (!type->array.static_size)
2497 static int array_compat(struct type *require, struct type *have,
2498 enum val_rules rules)
2500 if (have->compat != require->compat)
2502 /* Both are arrays, so we can look at details */
2503 if (!type_compat(require->array.member, have->array.member, 0))
2505 if (have->array.unspec && require->array.unspec &&
2506 have->array.size != require->array.size)
2508 if (have->array.unspec || require->array.unspec)
2510 if (require->array.vsize == NULL && have->array.vsize == NULL)
2511 return require->array.size == have->array.size;
2513 return require->array.vsize == have->array.vsize;
2516 static void array_print_type(struct type *type, FILE *f)
2519 if (type->array.vsize) {
2520 struct binding *b = type->array.vsize->name;
2521 fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
2522 type->array.unspec ? "::" : "");
2523 } else if (type->array.size)
2524 fprintf(f, "%d]", type->array.size);
2527 type_print(type->array.member, f);
2530 static struct type array_prototype = {
2532 .prepare_type = array_prepare_type,
2533 .print_type = array_print_type,
2534 .compat = array_compat,
2536 .size = sizeof(void*),
2537 .align = sizeof(void*),
2540 ###### declare terminals
2545 | [ NUMBER ] Type ${ {
2551 if (number_parse(num, tail, $2.txt) == 0)
2552 tok_err(c, "error: unrecognised number", &$2);
2554 tok_err(c, "error: unsupported number suffix", &$2);
2557 elements = mpz_get_ui(mpq_numref(num));
2558 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
2559 tok_err(c, "error: array size must be an integer",
2561 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
2562 tok_err(c, "error: array size is too large",
2567 $0 = t = add_anon_type(c, &array_prototype, "array[%d]", elements );
2568 t->array.size = elements;
2569 t->array.member = $<4;
2570 t->array.vsize = NULL;
2573 | [ IDENTIFIER ] Type ${ {
2574 struct variable *v = var_ref(c, $2.txt);
2577 tok_err(c, "error: name undeclared", &$2);
2578 else if (!v->constant)
2579 tok_err(c, "error: array size must be a constant", &$2);
2581 $0 = add_anon_type(c, &array_prototype, "array[%.*s]", $2.txt.len, $2.txt.txt);
2582 $0->array.member = $<4;
2584 $0->array.vsize = v;
2587 ###### formal type grammar
2590 $0 = add_anon_type(c, &array_prototype, "array[]");
2591 $0->array.member = $<Type;
2593 $0->array.unspec = 1;
2594 $0->array.vsize = NULL;
2602 | Term [ Expression ] ${ {
2603 struct binode *b = new(binode);
2611 struct binode *b = new(binode);
2617 ###### print binode cases
2619 print_exec(b->left, -1, bracket);
2621 print_exec(b->right, -1, bracket);
2626 print_exec(b->left, -1, bracket);
2630 ###### propagate binode cases
2632 /* left must be an array, right must be a number,
2633 * result is the member type of the array
2635 propagate_types(b->right, c, perr_local, Tnum, 0);
2636 t = propagate_types(b->left, c, perr, NULL, 0);
2637 if (!t || t->compat != array_compat) {
2638 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
2641 if (!type_compat(type, t->array.member, rules)) {
2642 type_err(c, "error: have %1 but need %2", prog,
2643 t->array.member, rules, type);
2645 return t->array.member;
2650 /* left must be an array, result is a number
2652 t = propagate_types(b->left, c, perr, NULL, 0);
2653 if (!t || t->compat != array_compat) {
2654 type_err(c, "error: %1 cannot provide length", prog, t, 0, NULL);
2657 if (!type_compat(type, Tnum, rules))
2658 type_err(c, "error: have %1 but need %2", prog,
2663 ###### interp binode cases
2669 lleft = linterp_exec(c, b->left, <ype);
2670 right = interp_exec(c, b->right, &rtype);
2672 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
2676 if (ltype->array.static_size)
2679 ptr = *(void**)lleft;
2680 rvtype = ltype->array.member;
2681 if (i >= 0 && i < ltype->array.size)
2682 lrv = ptr + i * rvtype->size;
2684 val_init(ltype->array.member, &rv); // UNSAFE
2689 lleft = linterp_exec(c, b->left, <ype);
2690 mpq_set_ui(rv.num, ltype->array.size, 1);
2698 A `struct` is a data-type that contains one or more other data-types.
2699 It differs from an array in that each member can be of a different
2700 type, and they are accessed by name rather than by number. Thus you
2701 cannot choose an element by calculation, you need to know what you
2704 The language makes no promises about how a given structure will be
2705 stored in memory - it is free to rearrange fields to suit whatever
2706 criteria seems important.
2708 Structs are declared separately from program code - they cannot be
2709 declared in-line in a variable declaration like arrays can. A struct
2710 is given a name and this name is used to identify the type - the name
2711 is not prefixed by the word `struct` as it would be in C.
2713 Structs are only treated as the same if they have the same name.
2714 Simply having the same fields in the same order is not enough. This
2715 might change once we can create structure initializers from a list of
2718 Each component datum is identified much like a variable is declared,
2719 with a name, one or two colons, and a type. The type cannot be omitted
2720 as there is no opportunity to deduce the type from usage. An initial
2721 value can be given following an equals sign, so
2723 ##### Example: a struct type
2729 would declare a type called "complex" which has two number fields,
2730 each initialised to zero.
2732 Struct will need to be declared separately from the code that uses
2733 them, so we will need to be able to print out the declaration of a
2734 struct when reprinting the whole program. So a `print_type_decl` type
2735 function will be needed.
2737 ###### type union fields
2746 } *fields; // This is created when field_list is analysed.
2748 struct fieldlist *prev;
2751 } *field_list; // This is created during parsing
2754 ###### type functions
2755 void (*print_type_decl)(struct type *type, FILE *f);
2756 struct type *(*fieldref)(struct type *t, struct parse_context *c,
2757 struct fieldref *f, struct value **vp);
2759 ###### value functions
2761 static void structure_init(struct type *type, struct value *val)
2765 for (i = 0; i < type->structure.nfields; i++) {
2767 v = (void*) val->ptr + type->structure.fields[i].offset;
2768 if (type->structure.fields[i].init)
2769 dup_value(type->structure.fields[i].type,
2770 type->structure.fields[i].init,
2773 val_init(type->structure.fields[i].type, v);
2777 static void structure_free(struct type *type, struct value *val)
2781 for (i = 0; i < type->structure.nfields; i++) {
2783 v = (void*)val->ptr + type->structure.fields[i].offset;
2784 free_value(type->structure.fields[i].type, v);
2788 static void free_fieldlist(struct fieldlist *f)
2792 free_fieldlist(f->prev);
2797 static void structure_free_type(struct type *t)
2800 for (i = 0; i < t->structure.nfields; i++)
2801 if (t->structure.fields[i].init) {
2802 free_value(t->structure.fields[i].type,
2803 t->structure.fields[i].init);
2805 free(t->structure.fields);
2806 free_fieldlist(t->structure.field_list);
2809 static int structure_prepare_type(struct parse_context *c,
2810 struct type *t, int parse_time)
2813 struct fieldlist *f;
2815 if (!parse_time || t->structure.fields)
2818 for (f = t->structure.field_list; f; f=f->prev) {
2822 if (f->f.type->size <= 0)
2824 if (f->f.type->prepare_type)
2825 f->f.type->prepare_type(c, f->f.type, parse_time);
2827 if (f->init == NULL)
2831 propagate_types(f->init, c, &perr, f->f.type, 0);
2832 } while (perr & Eretry);
2834 c->parse_error += 1; // NOTEST
2837 t->structure.nfields = cnt;
2838 t->structure.fields = calloc(cnt, sizeof(struct field));
2839 f = t->structure.field_list;
2841 int a = f->f.type->align;
2843 t->structure.fields[cnt] = f->f;
2844 if (t->size & (a-1))
2845 t->size = (t->size | (a-1)) + 1;
2846 t->structure.fields[cnt].offset = t->size;
2847 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2851 if (f->init && !c->parse_error) {
2852 struct value vl = interp_exec(c, f->init, NULL);
2853 t->structure.fields[cnt].init =
2854 global_alloc(c, f->f.type, NULL, &vl);
2862 static int find_struct_index(struct type *type, struct text field)
2865 for (i = 0; i < type->structure.nfields; i++)
2866 if (text_cmp(type->structure.fields[i].name, field) == 0)
2868 return IndexInvalid;
2871 static struct type *structure_fieldref(struct type *t, struct parse_context *c,
2872 struct fieldref *f, struct value **vp)
2874 if (f->index == IndexUnknown) {
2875 f->index = find_struct_index(t, f->name);
2877 type_err(c, "error: cannot find requested field in %1",
2878 f->left, t, 0, NULL);
2883 struct value *v = *vp;
2884 v = (void*)v->ptr + t->structure.fields[f->index].offset;
2887 return t->structure.fields[f->index].type;
2890 static struct type structure_prototype = {
2891 .init = structure_init,
2892 .free = structure_free,
2893 .free_type = structure_free_type,
2894 .print_type_decl = structure_print_type,
2895 .prepare_type = structure_prepare_type,
2896 .fieldref = structure_fieldref,
2909 enum { IndexUnknown = -1, IndexInvalid = -2 };
2911 ###### free exec cases
2913 free_exec(cast(fieldref, e)->left);
2917 ###### declare terminals
2922 | Term . IDENTIFIER ${ {
2923 struct fieldref *fr = new_pos(fieldref, $2);
2926 fr->index = IndexUnknown;
2930 ###### print exec cases
2934 struct fieldref *f = cast(fieldref, e);
2935 print_exec(f->left, -1, bracket);
2936 printf(".%.*s", f->name.len, f->name.txt);
2940 ###### propagate exec cases
2944 struct fieldref *f = cast(fieldref, prog);
2945 struct type *st = propagate_types(f->left, c, perr, NULL, 0);
2947 if (!st || !st->fieldref)
2948 type_err(c, "error: field reference on %1 is not supported",
2949 f->left, st, 0, NULL);
2951 t = st->fieldref(st, c, f, NULL);
2952 if (t && !type_compat(type, t, rules))
2953 type_err(c, "error: have %1 but need %2", prog,
2960 ###### interp exec cases
2963 struct fieldref *f = cast(fieldref, e);
2965 struct value *lleft = linterp_exec(c, f->left, <ype);
2967 rvtype = ltype->fieldref(ltype, c, f, &lrv);
2971 ###### top level grammar
2973 StructName -> IDENTIFIER ${ {
2974 struct type *t = find_type(c, $ID.txt);
2976 if (t && t->size >= 0) {
2977 tok_err(c, "error: type already declared", &$ID);
2978 tok_err(c, "info: this is location of declartion", &t->first_use);
2982 t = add_type(c, $ID.txt, NULL);
2987 DeclareStruct -> struct StructName FieldBlock Newlines ${ {
2988 struct type *t = $<SN;
2989 struct type tmp = *t;
2991 *t = structure_prototype;
2994 t->first_use = tmp.first_use;
2996 t->structure.field_list = $<FB;
3000 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
3001 | { SimpleFieldList } ${ $0 = $<SFL; }$
3002 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
3003 | SimpleFieldList EOL ${ $0 = $<SFL; }$
3005 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
3006 | FieldLines SimpleFieldList Newlines ${ {
3007 struct fieldlist *f = $<SFL;
3018 SimpleFieldList -> Field ${ $0 = $<F; }$
3019 | SimpleFieldList ; Field ${
3023 | SimpleFieldList ; ${
3026 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
3028 Field -> IDENTIFIER : Type = Expression ${ {
3029 $0 = calloc(1, sizeof(struct fieldlist));
3030 $0->f.name = $ID.txt;
3031 $0->f.type = $<Type;
3035 | IDENTIFIER : Type ${
3036 $0 = calloc(1, sizeof(struct fieldlist));
3037 $0->f.name = $ID.txt;
3038 $0->f.type = $<Type;
3041 ###### forward decls
3042 static void structure_print_type(struct type *t, FILE *f);
3044 ###### value functions
3045 static void structure_print_type(struct type *t, FILE *f)
3049 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
3051 for (i = 0; i < t->structure.nfields; i++) {
3052 struct field *fl = t->structure.fields + i;
3053 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
3054 type_print(fl->type, f);
3055 if (fl->type->print && fl->init) {
3057 if (fl->type == Tstr)
3059 print_value(fl->type, fl->init, f);
3060 if (fl->type == Tstr)
3067 ###### print type decls
3072 while (target != 0) {
3074 for (t = context.typelist; t ; t=t->next)
3075 if (!t->anon && t->print_type_decl &&
3085 t->print_type_decl(t, stdout);
3093 References, or pointers, are values that refer to another value. They
3094 can only refer to a `struct`, though as a struct can embed anything they
3095 can effectively refer to anything.
3097 References are potentially dangerous as they might refer to some
3098 variable which no longer exists - either because a stack frame
3099 containing it has been discarded or because the value was allocated on
3100 the heap and has now been free. Ocean does not yet provide any
3101 protection against these problems. It will in due course.
3103 With references comes the opportunity and the need to explicitly
3104 allocate values on the "heap" and to free them. We currently provide
3105 fairly basic support for this.
3107 Reference make use of the `@` symbol in various ways. A type that starts
3108 with `@` is a reference to whatever follows. A reference value
3109 followed by an `@` acts as the referred value, though the `@` is often
3110 not needed. Finally, an expression that starts with `@` is a special
3111 reference related expression. Some examples might help.
3113 ##### Example: Reference examples
3120 bar.number = 23; bar.string = "hello"
3131 Obviously this is very contrived. `ref` is a reference to a `foo` which
3132 is initially set to refer to the value stored in `bar` - no extra syntax
3133 is needed to "Take the address of" `bar` - the fact that `ref` is a
3134 reference means that only the address make sense.
3136 When `ref.a` is accessed, that is whatever value is stored in `bar.a`.
3137 The same syntax is used for accessing fields both in structs and in
3138 references to structs. It would be correct to use `ref@.a`, but not
3141 `@new()` creates an object of whatever type is needed for the program
3142 to by type-correct. In future iterations of Ocean, arguments a
3143 constructor will access arguments, so the the syntax now looks like a
3144 function call. `@free` can be assigned any reference that was returned
3145 by `@new()`, and it will be freed. `@nil` is a value of whatever
3146 reference type is appropriate, and is stable and never the address of
3147 anything in the heap or on the stack. A reference can be assigned
3148 `@nil` or compared against that value.
3150 ###### declare terminals
3153 ###### type union fields
3156 struct type *referent;
3159 ###### value union fields
3162 ###### value functions
3164 static void reference_print_type(struct type *t, FILE *f)
3167 type_print(t->reference.referent, f);
3170 static int reference_cmp(struct type *tl, struct type *tr,
3171 struct value *left, struct value *right)
3173 return left->ref == right->ref ? 0 : 1;
3176 static void reference_dup(struct type *t,
3177 struct value *vold, struct value *vnew)
3179 vnew->ref = vold->ref;
3182 static void reference_free(struct type *t, struct value *v)
3184 /* Nothing to do here */
3187 static int reference_compat(struct type *require, struct type *have,
3188 enum val_rules rules)
3191 if (require->reference.referent == have)
3193 if (have->compat != require->compat)
3195 if (have->reference.referent != require->reference.referent)
3200 static int reference_test(struct type *type, struct value *val)
3202 return val->ref != NULL;
3205 static struct type *reference_fieldref(struct type *t, struct parse_context *c,
3206 struct fieldref *f, struct value **vp)
3208 struct type *rt = t->reference.referent;
3213 return rt->fieldref(rt, c, f, vp);
3215 type_err(c, "error: field reference on %1 is not supported",
3216 f->left, rt, 0, NULL);
3220 static struct type reference_prototype = {
3221 .print_type = reference_print_type,
3222 .cmp_eq = reference_cmp,
3223 .dup = reference_dup,
3224 .test = reference_test,
3225 .free = reference_free,
3226 .compat = reference_compat,
3227 .fieldref = reference_fieldref,
3228 .size = sizeof(void*),
3229 .align = sizeof(void*),
3235 struct type *t = find_type(c, $ID.txt);
3237 t = add_type(c, $ID.txt, NULL);
3240 $0 = find_anon_type(c, &reference_prototype, "@%.*s",
3241 $ID.txt.len, $ID.txt.txt);
3242 $0->reference.referent = t;
3245 ###### core functions
3246 static int text_is(struct text t, char *s)
3248 return (strlen(s) == t.len &&
3249 strncmp(s, t.txt, t.len) == 0);
3258 enum ref_func { RefNew, RefFree, RefNil } action;
3259 struct type *reftype;
3263 ###### SimpleStatement Grammar
3265 | @ IDENTIFIER = Expression ${ {
3266 struct ref *r = new_pos(ref, $ID);
3268 if (!text_is($ID.txt, "free"))
3269 tok_err(c, "error: only \"@free\" makes sense here",
3273 r->action = RefFree;
3277 ###### expression grammar
3278 | @ IDENTIFIER ( ) ${
3279 // Only 'new' valid here
3280 if (!text_is($ID.txt, "new")) {
3281 tok_err(c, "error: Only reference function is \"@new()\"",
3284 struct ref *r = new_pos(ref,$ID);
3290 // Only 'nil' valid here
3291 if (!text_is($ID.txt, "nil")) {
3292 tok_err(c, "error: Only reference value is \"@nil\"",
3295 struct ref *r = new_pos(ref,$ID);
3301 ###### print exec cases
3303 struct ref *r = cast(ref, e);
3304 switch (r->action) {
3306 printf("@new()"); break;
3308 printf("@nil"); break;
3310 do_indent(indent, "@free = ");
3311 print_exec(r->right, indent, bracket);
3317 ###### propagate exec cases
3319 struct ref *r = cast(ref, prog);
3320 switch (r->action) {
3322 if (type && type->free != reference_free) {
3323 type_err(c, "error: @new() can only be used with references, not %1",
3324 prog, type, 0, NULL);
3327 if (type && !r->reftype) {
3334 if (type && type->free != reference_free)
3335 type_err(c, "error: @nil can only be used with reference, not %1",
3336 prog, type, 0, NULL);
3337 if (type && !r->reftype) {
3344 t = propagate_types(r->right, c, perr_local, NULL, 0);
3345 if (t && t->free != reference_free)
3346 type_err(c, "error: @free can only be assigned a reference, not %1",
3355 ###### interp exec cases
3357 struct ref *r = cast(ref, e);
3358 switch (r->action) {
3361 rv.ref = calloc(1, r->reftype->reference.referent->size);
3362 rvtype = r->reftype;
3366 rvtype = r->reftype;
3369 rv = interp_exec(c, r->right, &rvtype);
3370 free_value(rvtype->reference.referent, rv.ref);
3378 ###### free exec cases
3380 struct ref *r = cast(ref, e);
3381 free_exec(r->right);
3386 ###### Expressions: dereference
3394 struct binode *b = new(binode);
3400 ###### print binode cases
3402 print_exec(b->left, -1, bracket);
3406 print_exec(b->left, -1, bracket);
3409 ###### propagate binode cases
3411 /* left must be a reference, and we return what it refers to */
3412 /* FIXME how can I pass the expected type down? */
3413 t = propagate_types(b->left, c, perr, NULL, 0);
3415 if (!t || t->free != reference_free)
3416 type_err(c, "error: Cannot dereference %1", b, t, 0, NULL);
3418 return t->reference.referent;
3422 /* left must be lval, we create reference to it */
3423 if (!type || type->free != reference_free)
3424 t = propagate_types(b->left, c, perr, type, 0); // NOTEST impossible
3426 t = propagate_types(b->left, c, perr,
3427 type->reference.referent, 0);
3429 t = find_anon_type(c, &reference_prototype, "@%.*s",
3430 t->name.len, t->name.txt);
3433 ###### interp binode cases
3435 left = interp_exec(c, b->left, <ype);
3437 rvtype = ltype->reference.referent;
3441 rv.ref = linterp_exec(c, b->left, &rvtype);
3442 rvtype = find_anon_type(c, &reference_prototype, "@%.*s",
3443 rvtype->name.len, rvtype->name.txt);
3449 A function is a chunk of code which can be passed parameters and can
3450 return results. Each function has a type which includes the set of
3451 parameters and the return value. As yet these types cannot be declared
3452 separately from the function itself.
3454 The parameters can be specified either in parentheses as a ';' separated
3457 ##### Example: function 1
3459 func main(av:[ac::number]string; env:[envc::number]string)
3462 or as an indented list of one parameter per line (though each line can
3463 be a ';' separated list)
3465 ##### Example: function 2
3468 argv:[argc::number]string
3469 env:[envc::number]string
3473 In the first case a return type can follow the parentheses after a colon,
3474 in the second it is given on a line starting with the word `return`.
3476 ##### Example: functions that return
3478 func add(a:number; b:number): number
3488 Rather than returning a type, the function can specify a set of local
3489 variables to return as a struct. The values of these variables when the
3490 function exits will be provided to the caller. For this the return type
3491 is replaced with a block of result declarations, either in parentheses
3492 or bracketed by `return` and `do`.
3494 ##### Example: functions returning multiple variables
3496 func to_cartesian(rho:number; theta:number):(x:number; y:number)
3509 For constructing the lists we use a `List` binode, which will be
3510 further detailed when Expression Lists are introduced.
3512 ###### type union fields
3515 struct binode *params;
3516 struct type *return_type;
3517 struct variable *scope;
3518 int inline_result; // return value is at start of 'local'
3522 ###### value union fields
3523 struct exec *function;
3525 ###### type functions
3526 void (*check_args)(struct parse_context *c, enum prop_err *perr,
3527 struct type *require, struct exec *args);
3529 ###### value functions
3531 static void function_free(struct type *type, struct value *val)
3533 free_exec(val->function);
3534 val->function = NULL;
3537 static int function_compat(struct type *require, struct type *have,
3538 enum val_rules rules)
3540 // FIXME can I do anything here yet?
3544 static struct exec *take_addr(struct exec *e)
3546 struct binode *rv = new(binode);
3552 static void function_check_args(struct parse_context *c, enum prop_err *perr,
3553 struct type *require, struct exec *args)
3555 /* This should be 'compat', but we don't have a 'tuple' type to
3556 * hold the type of 'args'
3558 struct binode *arg = cast(binode, args);
3559 struct binode *param = require->function.params;
3562 struct var *pv = cast(var, param->left);
3563 struct type *t = pv->var->type, *t2;
3565 type_err(c, "error: insufficient arguments to function.",
3566 args, NULL, 0, NULL);
3570 t2 = propagate_types(arg->left, c, perr, t, Rrefok);
3571 if (t->free == reference_free &&
3572 t->reference.referent == t2 &&
3574 arg->left = take_addr(arg->left);
3575 } else if (!(*perr & Efail) && !type_compat(t2, t, 0)) {
3576 type_err(c, "error: cannot pass rval when reference expected",
3577 arg->left, NULL, 0, NULL);
3579 param = cast(binode, param->right);
3580 arg = cast(binode, arg->right);
3583 type_err(c, "error: too many arguments to function.",
3584 args, NULL, 0, NULL);
3587 static void function_print(struct type *type, struct value *val, FILE *f)
3590 print_exec(val->function, 1, 0);
3593 static void function_print_type_decl(struct type *type, FILE *f)
3597 for (b = type->function.params; b; b = cast(binode, b->right)) {
3598 struct variable *v = cast(var, b->left)->var;
3599 fprintf(f, "%.*s%s", v->name->name.len, v->name->name.txt,
3600 v->constant ? "::" : ":");
3601 type_print(v->type, f);
3606 if (type->function.return_type != Tnone) {
3608 if (type->function.inline_result) {
3610 struct type *t = type->function.return_type;
3612 for (i = 0; i < t->structure.nfields; i++) {
3613 struct field *fl = t->structure.fields + i;
3616 fprintf(f, "%.*s:", fl->name.len, fl->name.txt);
3617 type_print(fl->type, f);
3621 type_print(type->function.return_type, f);
3625 static void function_free_type(struct type *t)
3627 free_exec(t->function.params);
3630 static struct type function_prototype = {
3631 .size = sizeof(void*),
3632 .align = sizeof(void*),
3633 .free = function_free,
3634 .compat = function_compat,
3635 .check_args = function_check_args,
3636 .print = function_print,
3637 .print_type_decl = function_print_type_decl,
3638 .free_type = function_free_type,
3641 ###### declare terminals
3648 FuncName -> IDENTIFIER ${ {
3649 struct variable *v = var_decl(c, $1.txt);
3650 struct var *e = new_pos(var, $1);
3657 v = var_ref(c, $1.txt);
3659 type_err(c, "error: function '%v' redeclared",
3661 type_err(c, "info: this is where '%v' was first declared",
3662 v->where_decl, NULL, 0, NULL);
3668 Args -> ArgsLine NEWLINE ${ $0 = $<AL; }$
3669 | Args ArgsLine NEWLINE ${ {
3670 struct binode *b = $<AL;
3671 struct binode **bp = &b;
3673 bp = (struct binode **)&(*bp)->left;
3678 ArgsLine -> ${ $0 = NULL; }$
3679 | Varlist ${ $0 = $<1; }$
3680 | Varlist ; ${ $0 = $<1; }$
3682 Varlist -> Varlist ; ArgDecl ${
3683 $0 = new_pos(binode, $2);
3696 ArgDecl -> IDENTIFIER : FormalType ${ {
3697 struct variable *v = var_decl(c, $ID.txt);
3698 $0 = new_pos(var, $ID);
3705 ##### Function calls
3707 A function call can appear either as an expression or as a statement.
3708 We use a new 'Funcall' binode type to link the function with a list of
3709 arguments, form with the 'List' nodes.
3711 We have already seen the "Term" which is how a function call can appear
3712 in an expression. To parse a function call into a statement we include
3713 it in the "SimpleStatement Grammar" which will be described later.
3719 | Term ( ExpressionList ) ${ {
3720 struct binode *b = new(binode);
3723 b->right = reorder_bilist($<EL);
3727 struct binode *b = new(binode);
3734 ###### SimpleStatement Grammar
3736 | Term ( ExpressionList ) ${ {
3737 struct binode *b = new(binode);
3740 b->right = reorder_bilist($<EL);
3744 ###### print binode cases
3747 do_indent(indent, "");
3748 print_exec(b->left, -1, bracket);
3750 for (b = cast(binode, b->right); b; b = cast(binode, b->right)) {
3753 print_exec(b->left, -1, bracket);
3763 ###### propagate binode cases
3766 /* Every arg must match formal parameter, and result
3767 * is return type of function
3769 struct binode *args = cast(binode, b->right);
3770 struct var *v = cast(var, b->left);
3772 if (!v->var->type || v->var->type->check_args == NULL) {
3773 type_err(c, "error: attempt to call a non-function.",
3774 prog, NULL, 0, NULL);
3778 v->var->type->check_args(c, perr_local, v->var->type, args);
3779 if (v->var->type->function.inline_result)
3782 return v->var->type->function.return_type;
3785 ###### interp binode cases
3788 struct var *v = cast(var, b->left);
3789 struct type *t = v->var->type;
3790 void *oldlocal = c->local;
3791 int old_size = c->local_size;
3792 void *local = calloc(1, t->function.local_size);
3793 struct value *fbody = var_value(c, v->var);
3794 struct binode *arg = cast(binode, b->right);
3795 struct binode *param = t->function.params;
3798 struct var *pv = cast(var, param->left);
3799 struct type *vtype = NULL;
3800 struct value val = interp_exec(c, arg->left, &vtype);
3802 c->local = local; c->local_size = t->function.local_size;
3803 lval = var_value(c, pv->var);
3804 c->local = oldlocal; c->local_size = old_size;
3805 memcpy(lval, &val, vtype->size);
3806 param = cast(binode, param->right);
3807 arg = cast(binode, arg->right);
3809 c->local = local; c->local_size = t->function.local_size;
3810 if (t->function.inline_result && dtype) {
3811 _interp_exec(c, fbody->function, NULL, NULL);
3812 memcpy(dest, local, dtype->size);
3813 rvtype = ret.type = NULL;
3815 rv = interp_exec(c, fbody->function, &rvtype);
3816 c->local = oldlocal; c->local_size = old_size;
3821 ## Complex executables: statements and expressions
3823 Now that we have types and values and variables and most of the basic
3824 Terms which provide access to these, we can explore the more complex
3825 code that combine all of these to get useful work done. Specifically
3826 statements and expressions.
3828 Expressions are various combinations of Terms. We will use operator
3829 precedence to ensure correct parsing. The simplest Expression is just a
3830 Term - others will follow.
3835 Expression -> Term ${ $0 = $<Term; }$
3836 ## expression grammar
3838 ### Expressions: Conditional
3840 Our first user of the `binode` will be conditional expressions, which
3841 is a bit odd as they actually have three components. That will be
3842 handled by having 2 binodes for each expression. The conditional
3843 expression is the lowest precedence operator which is why we define it
3844 first - to start the precedence list.
3846 Conditional expressions are of the form "value `if` condition `else`
3847 other_value". They associate to the right, so everything to the right
3848 of `else` is part of an else value, while only a higher-precedence to
3849 the left of `if` is the if values. Between `if` and `else` there is no
3850 room for ambiguity, so a full conditional expression is allowed in
3856 ###### declare terminals
3860 ###### expression grammar
3862 | Expression if Expression else Expression $$ifelse ${ {
3863 struct binode *b1 = new(binode);
3864 struct binode *b2 = new(binode);
3874 ###### print binode cases
3877 b2 = cast(binode, b->right);
3878 if (bracket) printf("(");
3879 print_exec(b2->left, -1, bracket);
3881 print_exec(b->left, -1, bracket);
3883 print_exec(b2->right, -1, bracket);
3884 if (bracket) printf(")");
3887 ###### propagate binode cases
3890 /* cond must be Tbool, others must match */
3891 struct binode *b2 = cast(binode, b->right);
3894 propagate_types(b->left, c, perr_local, Tbool, 0);
3895 t = propagate_types(b2->left, c, perr, type, 0);
3896 t2 = propagate_types(b2->right, c, perr, type ?: t, 0);
3900 ###### interp binode cases
3903 struct binode *b2 = cast(binode, b->right);
3904 left = interp_exec(c, b->left, <ype);
3906 rv = interp_exec(c, b2->left, &rvtype);
3908 rv = interp_exec(c, b2->right, &rvtype);
3912 ### Expressions: Boolean
3914 The next class of expressions to use the `binode` will be Boolean
3915 expressions. `and` and `or` are short-circuit operators that don't
3916 evaluate the second expression if not necessary.
3923 ###### declare terminals
3928 ###### expression grammar
3929 | Expression or Expression ${ {
3930 struct binode *b = new(binode);
3936 | Expression and Expression ${ {
3937 struct binode *b = new(binode);
3943 | not Expression ${ {
3944 struct binode *b = new(binode);
3950 ###### print binode cases
3952 if (bracket) printf("(");
3953 print_exec(b->left, -1, bracket);
3955 print_exec(b->right, -1, bracket);
3956 if (bracket) printf(")");
3959 if (bracket) printf("(");
3960 print_exec(b->left, -1, bracket);
3962 print_exec(b->right, -1, bracket);
3963 if (bracket) printf(")");
3966 if (bracket) printf("(");
3968 print_exec(b->right, -1, bracket);
3969 if (bracket) printf(")");
3972 ###### propagate binode cases
3976 /* both must be Tbool, result is Tbool */
3977 propagate_types(b->left, c, perr, Tbool, 0);
3978 propagate_types(b->right, c, perr, Tbool, 0);
3979 if (type && type != Tbool)
3980 type_err(c, "error: %1 operation found where %2 expected", prog,
3985 ###### interp binode cases
3987 rv = interp_exec(c, b->left, &rvtype);
3989 rv = interp_exec(c, b->right, NULL);
3992 rv = interp_exec(c, b->left, &rvtype);
3994 rv = interp_exec(c, b->right, NULL);
3997 rv = interp_exec(c, b->right, &rvtype);
4001 ### Expressions: Comparison
4003 Of slightly higher precedence that Boolean expressions are Comparisons.
4004 A comparison takes arguments of any comparable type, but the two types
4007 To simplify the parsing we introduce an `eop` which can record an
4008 expression operator, and the `CMPop` non-terminal will match one of them.
4015 ###### ast functions
4016 static void free_eop(struct eop *e)
4030 ###### declare terminals
4031 $LEFT < > <= >= == != CMPop
4033 ###### expression grammar
4034 | Expression CMPop Expression ${ {
4035 struct binode *b = new(binode);
4045 CMPop -> < ${ $0.op = Less; }$
4046 | > ${ $0.op = Gtr; }$
4047 | <= ${ $0.op = LessEq; }$
4048 | >= ${ $0.op = GtrEq; }$
4049 | == ${ $0.op = Eql; }$
4050 | != ${ $0.op = NEql; }$
4052 ###### print binode cases
4060 if (bracket) printf("(");
4061 print_exec(b->left, -1, bracket);
4063 case Less: printf(" < "); break;
4064 case LessEq: printf(" <= "); break;
4065 case Gtr: printf(" > "); break;
4066 case GtrEq: printf(" >= "); break;
4067 case Eql: printf(" == "); break;
4068 case NEql: printf(" != "); break;
4069 default: abort(); // NOTEST
4071 print_exec(b->right, -1, bracket);
4072 if (bracket) printf(")");
4075 ###### propagate binode cases
4082 /* Both must match but not be labels, result is Tbool */
4083 t = propagate_types(b->left, c, perr, NULL, 0);
4085 propagate_types(b->right, c, perr, t, 0);
4087 t = propagate_types(b->right, c, perr, NULL, 0); // NOTEST
4089 t = propagate_types(b->left, c, perr, t, 0); // NOTEST
4091 if (!type_compat(type, Tbool, 0))
4092 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
4093 Tbool, rules, type);
4097 ###### interp binode cases
4106 left = interp_exec(c, b->left, <ype);
4107 right = interp_exec(c, b->right, &rtype);
4108 cmp = value_cmp(ltype, rtype, &left, &right);
4111 case Less: rv.bool = cmp < 0; break;
4112 case LessEq: rv.bool = cmp <= 0; break;
4113 case Gtr: rv.bool = cmp > 0; break;
4114 case GtrEq: rv.bool = cmp >= 0; break;
4115 case Eql: rv.bool = cmp == 0; break;
4116 case NEql: rv.bool = cmp != 0; break;
4117 default: rv.bool = 0; break; // NOTEST
4122 ### Expressions: Arithmetic etc.
4124 The remaining expressions with the highest precedence are arithmetic,
4125 string concatenation, string conversion, and testing. String concatenation
4126 (`++`) has the same precedence as multiplication and division, but lower
4129 Testing comes in two forms. A single question mark (`?`) is a uniary
4130 operator which converts come types into Boolean. The general meaning is
4131 "is this a value value" and there will be more uses as the language
4132 develops. A double questionmark (`??`) is a binary operator (Choose),
4133 with same precedence as multiplication, which returns the LHS if it
4134 tests successfully, else returns the RHS.
4136 String conversion is a temporary feature until I get a better type
4137 system. `$` is a prefix operator which expects a string and returns
4140 `+` and `-` are both infix and prefix operations (where they are
4141 absolute value and negation). These have different operator names.
4143 We also have a 'Bracket' operator which records where parentheses were
4144 found. This makes it easy to reproduce these when printing. Possibly I
4145 should only insert brackets were needed for precedence. Putting
4146 parentheses around an expression converts it into a Term,
4152 Absolute, Negate, Test,
4156 ###### declare terminals
4158 $LEFT * / % ++ ?? Top
4162 ###### expression grammar
4163 | Expression Eop Expression ${ {
4164 struct binode *b = new(binode);
4171 | Expression Top Expression ${ {
4172 struct binode *b = new(binode);
4179 | Uop Expression ${ {
4180 struct binode *b = new(binode);
4188 | ( Expression ) ${ {
4189 struct binode *b = new_pos(binode, $1);
4198 Eop -> + ${ $0.op = Plus; }$
4199 | - ${ $0.op = Minus; }$
4201 Uop -> + ${ $0.op = Absolute; }$
4202 | - ${ $0.op = Negate; }$
4203 | $ ${ $0.op = StringConv; }$
4204 | ? ${ $0.op = Test; }$
4206 Top -> * ${ $0.op = Times; }$
4207 | / ${ $0.op = Divide; }$
4208 | % ${ $0.op = Rem; }$
4209 | ++ ${ $0.op = Concat; }$
4210 | ?? ${ $0.op = Choose; }$
4212 ###### print binode cases
4220 if (bracket) printf("(");
4221 print_exec(b->left, indent, bracket);
4223 case Plus: fputs(" + ", stdout); break;
4224 case Minus: fputs(" - ", stdout); break;
4225 case Times: fputs(" * ", stdout); break;
4226 case Divide: fputs(" / ", stdout); break;
4227 case Rem: fputs(" % ", stdout); break;
4228 case Concat: fputs(" ++ ", stdout); break;
4229 case Choose: fputs(" ?? ", stdout); break;
4230 default: abort(); // NOTEST
4232 print_exec(b->right, indent, bracket);
4233 if (bracket) printf(")");
4239 if (bracket) printf("(");
4241 case Absolute: fputs("+", stdout); break;
4242 case Negate: fputs("-", stdout); break;
4243 case StringConv: fputs("$", stdout); break;
4244 case Test: fputs("?", stdout); break;
4245 default: abort(); // NOTEST
4247 print_exec(b->right, indent, bracket);
4248 if (bracket) printf(")");
4251 /* Avoid double brackets... */
4252 if (!bracket) printf("(");
4253 print_exec(b->right, indent, bracket);
4254 if (!bracket) printf(")");
4257 ###### propagate binode cases
4263 /* both must be numbers, result is Tnum */
4266 /* as propagate_types ignores a NULL,
4267 * unary ops fit here too */
4268 propagate_types(b->left, c, perr, Tnum, 0);
4269 propagate_types(b->right, c, perr, Tnum, 0);
4270 if (!type_compat(type, Tnum, 0))
4271 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
4277 /* both must be Tstr, result is Tstr */
4278 propagate_types(b->left, c, perr, Tstr, 0);
4279 propagate_types(b->right, c, perr, Tstr, 0);
4280 if (!type_compat(type, Tstr, 0))
4281 type_err(c, "error: Concat returns %1 but %2 expected", prog,
4287 /* op must be string, result is number */
4288 propagate_types(b->left, c, perr, Tstr, 0);
4289 if (!type_compat(type, Tnum, 0))
4291 "error: Can only convert string to number, not %1",
4292 prog, type, 0, NULL);
4297 /* LHS must support ->test, result is Tbool */
4298 t = propagate_types(b->right, c, perr, NULL, 0);
4300 type_err(c, "error: '?' requires a testable value, not %1",
4306 /* LHS and RHS must match and are returned. Must support
4309 t = propagate_types(b->left, c, perr, type, rules);
4310 t = propagate_types(b->right, c, perr, t, rules);
4311 if (t && t->test == NULL)
4312 type_err(c, "error: \"??\" requires a testable value, not %1",
4318 return propagate_types(b->right, c, perr, type, rules);
4320 ###### interp binode cases
4323 rv = interp_exec(c, b->left, &rvtype);
4324 right = interp_exec(c, b->right, &rtype);
4325 mpq_add(rv.num, rv.num, right.num);
4328 rv = interp_exec(c, b->left, &rvtype);
4329 right = interp_exec(c, b->right, &rtype);
4330 mpq_sub(rv.num, rv.num, right.num);
4333 rv = interp_exec(c, b->left, &rvtype);
4334 right = interp_exec(c, b->right, &rtype);
4335 mpq_mul(rv.num, rv.num, right.num);
4338 rv = interp_exec(c, b->left, &rvtype);
4339 right = interp_exec(c, b->right, &rtype);
4340 mpq_div(rv.num, rv.num, right.num);
4345 left = interp_exec(c, b->left, <ype);
4346 right = interp_exec(c, b->right, &rtype);
4347 mpz_init(l); mpz_init(r); mpz_init(rem);
4348 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
4349 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
4350 mpz_tdiv_r(rem, l, r);
4351 val_init(Tnum, &rv);
4352 mpq_set_z(rv.num, rem);
4353 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
4358 rv = interp_exec(c, b->right, &rvtype);
4359 mpq_neg(rv.num, rv.num);
4362 rv = interp_exec(c, b->right, &rvtype);
4363 mpq_abs(rv.num, rv.num);
4366 rv = interp_exec(c, b->right, &rvtype);
4369 left = interp_exec(c, b->left, <ype);
4370 right = interp_exec(c, b->right, &rtype);
4372 rv.str = text_join(left.str, right.str);
4375 right = interp_exec(c, b->right, &rvtype);
4379 struct text tx = right.str;
4382 if (tx.txt[0] == '-') {
4387 if (number_parse(rv.num, tail, tx) == 0)
4390 mpq_neg(rv.num, rv.num);
4392 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt);
4396 right = interp_exec(c, b->right, &rtype);
4398 rv.bool = !!rtype->test(rtype, &right);
4401 left = interp_exec(c, b->left, <ype);
4402 if (ltype->test(ltype, &left)) {
4407 rv = interp_exec(c, b->right, &rvtype);
4410 ###### value functions
4412 static struct text text_join(struct text a, struct text b)
4415 rv.len = a.len + b.len;
4416 rv.txt = malloc(rv.len);
4417 memcpy(rv.txt, a.txt, a.len);
4418 memcpy(rv.txt+a.len, b.txt, b.len);
4422 ### Blocks, Statements, and Statement lists.
4424 Now that we have expressions out of the way we need to turn to
4425 statements. There are simple statements and more complex statements.
4426 Simple statements do not contain (syntactic) newlines, complex statements do.
4428 Statements often come in sequences and we have corresponding simple
4429 statement lists and complex statement lists.
4430 The former comprise only simple statements separated by semicolons.
4431 The later comprise complex statements and simple statement lists. They are
4432 separated by newlines. Thus the semicolon is only used to separate
4433 simple statements on the one line. This may be overly restrictive,
4434 but I'm not sure I ever want a complex statement to share a line with
4437 Note that a simple statement list can still use multiple lines if
4438 subsequent lines are indented, so
4440 ###### Example: wrapped simple statement list
4445 is a single simple statement list. This might allow room for
4446 confusion, so I'm not set on it yet.
4448 A simple statement list needs no extra syntax. A complex statement
4449 list has two syntactic forms. It can be enclosed in braces (much like
4450 C blocks), or it can be introduced by an indent and continue until an
4451 unindented newline (much like Python blocks). With this extra syntax
4452 it is referred to as a block.
4454 Note that a block does not have to include any newlines if it only
4455 contains simple statements. So both of:
4457 if condition: a=b; d=f
4459 if condition { a=b; print f }
4463 In either case the list is constructed from a `binode` list with
4464 `Block` as the operator. When parsing the list it is most convenient
4465 to append to the end, so a list is a list and a statement. When using
4466 the list it is more convenient to consider a list to be a statement
4467 and a list. So we need a function to re-order a list.
4468 `reorder_bilist` serves this purpose.
4470 The only stand-alone statement we introduce at this stage is `pass`
4471 which does nothing and is represented as a `NULL` pointer in a `Block`
4472 list. Other stand-alone statements will follow once the infrastructure
4475 As many statements will use binodes, we declare a binode pointer 'b' in
4476 the common header for all reductions to use.
4478 ###### Parser: reduce
4489 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
4490 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
4491 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
4492 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS);
4494 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
4496 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
4497 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
4498 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
4499 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
4500 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
4502 UseBlock -> { IN OpenScope OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
4503 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
4504 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
4506 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
4507 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
4508 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
4509 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
4510 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
4512 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
4514 ComplexStatements -> ComplexStatements ComplexStatement ${
4516 $0 = $<1; // NOTEST - impossible
4524 | ComplexStatement ${
4526 $0 = NULL; // NOTEST - impossible
4536 ComplexStatement -> SimpleStatements Newlines ${
4537 $0 = reorder_bilist($<SS);
4539 | SimpleStatements ; Newlines ${
4540 $0 = reorder_bilist($<SS);
4542 ## ComplexStatement Grammar
4545 SimpleStatements -> SimpleStatements ; SimpleStatement ${
4551 | SimpleStatement ${
4560 SimpleStatement -> pass ${ $0 = NULL; }$
4561 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
4562 ## SimpleStatement Grammar
4564 ###### print binode cases
4566 // block, one per line
4567 if (b->left == NULL)
4568 do_indent(indent, "pass\n");
4570 print_exec(b->left, indent, bracket);
4572 print_exec(b->right, indent, bracket);
4575 ###### propagate binode cases
4578 /* If any statement returns something other than Tnone
4579 * or Tbool then all such must return same type.
4580 * As each statement may be Tnone or something else,
4581 * we must always pass NULL (unknown) down, otherwise an incorrect
4582 * error might occur. We never return Tnone unless it is
4587 for (e = b; e; e = cast(binode, e->right)) {
4588 *perr |= *perr_local;
4590 t = propagate_types(e->left, c, perr_local, NULL, rules);
4591 if ((rules & Rboolok) && (t == Tbool || t == Tnone))
4593 if (t == Tnone && e->right)
4594 /* Only the final statement *must* return a value
4602 type_err(c, "error: expected %1, found %2",
4603 e->left, type, rules, t);
4609 ###### interp binode cases
4611 while (rvtype == Tnone &&
4614 rv = interp_exec(c, b->left, &rvtype);
4615 b = cast(binode, b->right);
4619 ### The Print statement
4621 `print` is a simple statement that takes a comma-separated list of
4622 expressions and prints the values separated by spaces and terminated
4623 by a newline. No control of formatting is possible.
4625 `print` uses `ExpressionList` to collect the expressions and stores them
4626 on the left side of a `Print` binode unlessthere is a trailing comma
4627 when the list is stored on the `right` side and no trailing newline is
4633 ##### declare terminals
4636 ###### SimpleStatement Grammar
4638 | print ExpressionList ${
4639 $0 = b = new_pos(binode, $1);
4642 b->left = reorder_bilist($<EL);
4644 | print ExpressionList , ${ {
4645 $0 = b = new_pos(binode, $1);
4647 b->right = reorder_bilist($<EL);
4651 $0 = b = new_pos(binode, $1);
4657 ###### print binode cases
4660 do_indent(indent, "print");
4661 b2 = cast(binode, b->left ?: b->right);
4664 print_exec(b2->left, -1, bracket);
4667 b2 = cast(binode, b2->right);
4675 ###### propagate binode cases
4678 /* don't care but all must be consistent */
4680 b = cast(binode, b->left);
4682 b = cast(binode, b->right);
4684 propagate_types(b->left, c, perr_local, NULL, 0);
4685 b = cast(binode, b->right);
4689 ###### interp binode cases
4693 struct binode *b2 = cast(binode, b->left);
4695 b2 = cast(binode, b->right);
4696 for (; b2; b2 = cast(binode, b2->right)) {
4697 left = interp_exec(c, b2->left, <ype);
4698 print_value(ltype, &left, stdout);
4699 free_value(ltype, &left);
4703 if (b->right == NULL)
4709 ###### Assignment statement
4711 An assignment will assign a value to a variable, providing it hasn't
4712 been declared as a constant. The analysis phase ensures that the type
4713 will be correct so the interpreter just needs to perform the
4714 calculation. There is a form of assignment which declares a new
4715 variable as well as assigning a value. If a name is used before
4716 it is declared, it is assumed to be a global constant which are allowed to
4717 be declared at any time.
4723 ###### declare terminals
4726 ###### SimpleStatement Grammar
4727 | Term = Expression ${
4728 $0 = b= new(binode);
4733 | VariableDecl = Expression ${
4734 $0 = b= new(binode);
4741 if ($1->var->where_set == NULL) {
4743 "Variable declared with no type or value: %v",
4747 $0 = b = new(binode);
4754 ###### print binode cases
4757 do_indent(indent, "");
4758 print_exec(b->left, -1, bracket);
4760 print_exec(b->right, -1, bracket);
4767 struct variable *v = cast(var, b->left)->var;
4768 do_indent(indent, "");
4769 print_exec(b->left, -1, bracket);
4770 if (cast(var, b->left)->var->constant) {
4772 if (v->explicit_type) {
4773 type_print(v->type, stdout);
4778 if (v->explicit_type) {
4779 type_print(v->type, stdout);
4785 print_exec(b->right, -1, bracket);
4792 ###### propagate binode cases
4796 /* Both must match, or left may be ref and right an lval
4797 * Type must support 'dup',
4798 * For Assign, left must not be constant.
4801 *perr &= ~(Erval | Econst);
4802 t = propagate_types(b->left, c, perr, NULL, 0);
4807 struct type *t2 = propagate_types(b->right, c, perr_local,
4809 if (!t2 || t2 == t || (*perr_local & Efail))
4810 ; // No more effort needed
4811 else if (t->free == reference_free &&
4812 t->reference.referent == t2 &&
4813 !(*perr_local & Erval))
4814 b->right = take_addr(b->right);
4815 else if (t->free == reference_free &&
4816 t->reference.referent == t2 &&
4817 (*perr_local & Erval))
4818 type_err(c, "error: Cannot assign an rval to a reference.",
4821 t = propagate_types(b->right, c, perr_local, NULL, 0);
4823 propagate_types(b->left, c, perr, t, 0);
4826 type_err(c, "error: cannot assign to an rval", b,
4828 else if (b->op == Assign && (*perr & Econst)) {
4829 type_err(c, "error: Cannot assign to a constant: %v",
4830 b->left, NULL, 0, NULL);
4831 if (b->left->type == Xvar) {
4832 struct var *var = cast(var, b->left);
4833 struct variable *v = var->var;
4834 type_err(c, "info: name was defined as a constant here",
4835 v->where_decl, NULL, 0, NULL);
4838 if (t && t->dup == NULL && !(*perr_local & Emaycopy))
4839 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
4840 if (b->left->type == Xvar && (*perr_local & Efail))
4841 type_err(c, "info: variable '%v' was set as %1 here.",
4842 cast(var, b->left)->var->where_set, t, rules, NULL);
4847 ###### interp binode cases
4850 lleft = linterp_exec(c, b->left, <ype);
4852 dinterp_exec(c, b->right, lleft, ltype, 1);
4858 struct variable *v = cast(var, b->left)->var;
4861 val = var_value(c, v);
4862 if (v->type->prepare_type)
4863 v->type->prepare_type(c, v->type, 0);
4865 val_init(v->type, val);
4867 dinterp_exec(c, b->right, val, v->type, 0);
4871 ### The `use` statement
4873 The `use` statement is the last "simple" statement. It is needed when a
4874 statement block can return a value. This includes the body of a
4875 function which has a return type, and the "condition" code blocks in
4876 `if`, `while`, and `switch` statements.
4881 ###### declare terminals
4884 ###### SimpleStatement Grammar
4886 $0 = b = new_pos(binode, $1);
4891 ###### print binode cases
4894 do_indent(indent, "use ");
4895 print_exec(b->right, -1, bracket);
4900 ###### propagate binode cases
4903 /* result matches value */
4904 return propagate_types(b->right, c, perr, type, 0);
4906 ###### interp binode cases
4909 rv = interp_exec(c, b->right, &rvtype);
4912 ### The Conditional Statement
4914 This is the biggy and currently the only complex statement. This
4915 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
4916 It is comprised of a number of parts, all of which are optional though
4917 set combinations apply. Each part is (usually) a key word (`then` is
4918 sometimes optional) followed by either an expression or a code block,
4919 except the `casepart` which is a "key word and an expression" followed
4920 by a code block. The code-block option is valid for all parts and,
4921 where an expression is also allowed, the code block can use the `use`
4922 statement to report a value. If the code block does not report a value
4923 the effect is similar to reporting `True`.
4925 The `else` and `case` parts, as well as `then` when combined with
4926 `if`, can contain a `use` statement which will apply to some
4927 containing conditional statement. `for` parts, `do` parts and `then`
4928 parts used with `for` can never contain a `use`, except in some
4929 subordinate conditional statement.
4931 If there is a `forpart`, it is executed first, only once.
4932 If there is a `dopart`, then it is executed repeatedly providing
4933 always that the `condpart` or `cond`, if present, does not return a non-True
4934 value. `condpart` can fail to return any value if it simply executes
4935 to completion. This is treated the same as returning `True`.
4937 If there is a `thenpart` it will be executed whenever the `condpart`
4938 or `cond` returns True (or does not return any value), but this will happen
4939 *after* `dopart` (when present).
4941 If `elsepart` is present it will be executed at most once when the
4942 condition returns `False` or some value that isn't `True` and isn't
4943 matched by any `casepart`. If there are any `casepart`s, they will be
4944 executed when the condition returns a matching value.
4946 The particular sorts of values allowed in case parts has not yet been
4947 determined in the language design, so nothing is prohibited.
4949 The various blocks in this complex statement potentially provide scope
4950 for variables as described earlier. Each such block must include the
4951 "OpenScope" nonterminal before parsing the block, and must call
4952 `var_block_close()` when closing the block.
4954 The code following "`if`", "`switch`" and "`for`" does not get its own
4955 scope, but is in a scope covering the whole statement, so names
4956 declared there cannot be redeclared elsewhere. Similarly the
4957 condition following "`while`" is in a scope the covers the body
4958 ("`do`" part) of the loop, and which does not allow conditional scope
4959 extension. Code following "`then`" (both looping and non-looping),
4960 "`else`" and "`case`" each get their own local scope.
4962 The type requirements on the code block in a `whilepart` are quite
4963 unusal. It is allowed to return a value of some identifiable type, in
4964 which case the loop aborts and an appropriate `casepart` is run, or it
4965 can return a Boolean, in which case the loop either continues to the
4966 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
4967 This is different both from the `ifpart` code block which is expected to
4968 return a Boolean, or the `switchpart` code block which is expected to
4969 return the same type as the casepart values. The correct analysis of
4970 the type of the `whilepart` code block is the reason for the
4971 `Rboolok` flag which is passed to `propagate_types()`.
4973 The `cond_statement` cannot fit into a `binode` so a new `exec` is
4974 defined. As there are two scopes which cover multiple parts - one for
4975 the whole statement and one for "while" and "do" - and as we will use
4976 the 'struct exec' to track scopes, we actually need two new types of
4977 exec. One is a `binode` for the looping part, the rest is the
4978 `cond_statement`. The `cond_statement` will use an auxilliary `struct
4979 casepart` to track a list of case parts.
4990 struct exec *action;
4991 struct casepart *next;
4993 struct cond_statement {
4995 struct exec *forpart, *condpart, *thenpart, *elsepart;
4996 struct binode *looppart;
4997 struct casepart *casepart;
5000 ###### ast functions
5002 static void free_casepart(struct casepart *cp)
5006 free_exec(cp->value);
5007 free_exec(cp->action);
5014 static void free_cond_statement(struct cond_statement *s)
5018 free_exec(s->forpart);
5019 free_exec(s->condpart);
5020 free_exec(s->looppart);
5021 free_exec(s->thenpart);
5022 free_exec(s->elsepart);
5023 free_casepart(s->casepart);
5027 ###### free exec cases
5028 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
5030 ###### ComplexStatement Grammar
5031 | CondStatement ${ $0 = $<1; }$
5033 ###### declare terminals
5034 $TERM for then while do
5041 // A CondStatement must end with EOL, as does CondSuffix and
5043 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
5044 // may or may not end with EOL
5045 // WhilePart and IfPart include an appropriate Suffix
5047 // ForPart, SwitchPart, and IfPart open scopes, o we have to close
5048 // them. WhilePart opens and closes its own scope.
5049 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
5052 $0->thenpart = $<TP;
5053 $0->looppart = $<WP;
5054 var_block_close(c, CloseSequential, $0);
5056 | ForPart OptNL WhilePart CondSuffix ${
5059 $0->looppart = $<WP;
5060 var_block_close(c, CloseSequential, $0);
5062 | WhilePart CondSuffix ${
5064 $0->looppart = $<WP;
5066 | SwitchPart OptNL CasePart CondSuffix ${
5068 $0->condpart = $<SP;
5069 $CP->next = $0->casepart;
5070 $0->casepart = $<CP;
5071 var_block_close(c, CloseSequential, $0);
5073 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
5075 $0->condpart = $<SP;
5076 $CP->next = $0->casepart;
5077 $0->casepart = $<CP;
5078 var_block_close(c, CloseSequential, $0);
5080 | IfPart IfSuffix ${
5082 $0->condpart = $IP.condpart; $IP.condpart = NULL;
5083 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
5084 // This is where we close an "if" statement
5085 var_block_close(c, CloseSequential, $0);
5088 CondSuffix -> IfSuffix ${
5091 | Newlines CasePart CondSuffix ${
5093 $CP->next = $0->casepart;
5094 $0->casepart = $<CP;
5096 | CasePart CondSuffix ${
5098 $CP->next = $0->casepart;
5099 $0->casepart = $<CP;
5102 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
5103 | Newlines ElsePart ${ $0 = $<EP; }$
5104 | ElsePart ${$0 = $<EP; }$
5106 ElsePart -> else OpenBlock Newlines ${
5107 $0 = new(cond_statement);
5108 $0->elsepart = $<OB;
5109 var_block_close(c, CloseElse, $0->elsepart);
5111 | else OpenScope CondStatement ${
5112 $0 = new(cond_statement);
5113 $0->elsepart = $<CS;
5114 var_block_close(c, CloseElse, $0->elsepart);
5118 CasePart -> case Expression OpenScope ColonBlock ${
5119 $0 = calloc(1,sizeof(struct casepart));
5122 var_block_close(c, CloseParallel, $0->action);
5126 // These scopes are closed in CondStatement
5127 ForPart -> for OpenBlock ${
5131 ThenPart -> then OpenBlock ${
5133 var_block_close(c, CloseSequential, $0);
5137 // This scope is closed in CondStatement
5138 WhilePart -> while UseBlock OptNL do OpenBlock ${
5143 var_block_close(c, CloseSequential, $0->right);
5144 var_block_close(c, CloseSequential, $0);
5146 | while OpenScope Expression OpenScope ColonBlock ${
5151 var_block_close(c, CloseSequential, $0->right);
5152 var_block_close(c, CloseSequential, $0);
5156 IfPart -> if UseBlock OptNL then OpenBlock ${
5159 var_block_close(c, CloseParallel, $0.thenpart);
5161 | if OpenScope Expression OpenScope ColonBlock ${
5164 var_block_close(c, CloseParallel, $0.thenpart);
5166 | if OpenScope Expression OpenScope OptNL then Block ${
5169 var_block_close(c, CloseParallel, $0.thenpart);
5173 // This scope is closed in CondStatement
5174 SwitchPart -> switch OpenScope Expression ${
5177 | switch UseBlock ${
5181 ###### print binode cases
5183 if (b->left && b->left->type == Xbinode &&
5184 cast(binode, b->left)->op == Block) {
5186 do_indent(indent, "while {\n");
5188 do_indent(indent, "while\n");
5189 print_exec(b->left, indent+1, bracket);
5191 do_indent(indent, "} do {\n");
5193 do_indent(indent, "do\n");
5194 print_exec(b->right, indent+1, bracket);
5196 do_indent(indent, "}\n");
5198 do_indent(indent, "while ");
5199 print_exec(b->left, 0, bracket);
5204 print_exec(b->right, indent+1, bracket);
5206 do_indent(indent, "}\n");
5210 ###### print exec cases
5212 case Xcond_statement:
5214 struct cond_statement *cs = cast(cond_statement, e);
5215 struct casepart *cp;
5217 do_indent(indent, "for");
5218 if (bracket) printf(" {\n"); else printf("\n");
5219 print_exec(cs->forpart, indent+1, bracket);
5222 do_indent(indent, "} then {\n");
5224 do_indent(indent, "then\n");
5225 print_exec(cs->thenpart, indent+1, bracket);
5227 if (bracket) do_indent(indent, "}\n");
5230 print_exec(cs->looppart, indent, bracket);
5234 do_indent(indent, "switch");
5236 do_indent(indent, "if");
5237 if (cs->condpart && cs->condpart->type == Xbinode &&
5238 cast(binode, cs->condpart)->op == Block) {
5243 print_exec(cs->condpart, indent+1, bracket);
5245 do_indent(indent, "}\n");
5247 do_indent(indent, "then\n");
5248 print_exec(cs->thenpart, indent+1, bracket);
5252 print_exec(cs->condpart, 0, bracket);
5258 print_exec(cs->thenpart, indent+1, bracket);
5260 do_indent(indent, "}\n");
5265 for (cp = cs->casepart; cp; cp = cp->next) {
5266 do_indent(indent, "case ");
5267 print_exec(cp->value, -1, 0);
5272 print_exec(cp->action, indent+1, bracket);
5274 do_indent(indent, "}\n");
5277 do_indent(indent, "else");
5282 print_exec(cs->elsepart, indent+1, bracket);
5284 do_indent(indent, "}\n");
5289 ###### propagate binode cases
5291 propagate_types(b->right, c, perr_local, Tnone, 0);
5292 return propagate_types(b->left, c, perr, type, rules);
5294 ###### propagate exec cases
5295 case Xcond_statement:
5297 // forpart and looppart->right must return Tnone
5298 // thenpart must return Tnone if there is a loopart,
5299 // otherwise it is like elsepart.
5301 // be bool if there is no casepart
5302 // match casepart->values if there is a switchpart
5303 // either be bool or match casepart->value if there
5305 // elsepart and casepart->action must match the return type
5306 // expected of this statement.
5307 struct cond_statement *cs = cast(cond_statement, prog);
5308 struct casepart *cp;
5310 t = propagate_types(cs->forpart, c, perr, Tnone, 0);
5313 t = propagate_types(cs->thenpart, c, perr, Tnone, 0);
5315 if (cs->casepart == NULL) {
5316 propagate_types(cs->condpart, c, perr, Tbool, 0);
5317 propagate_types(cs->looppart, c, perr, Tbool, 0);
5319 /* Condpart must match case values, with bool permitted */
5321 for (cp = cs->casepart;
5322 cp && !t; cp = cp->next)
5323 t = propagate_types(cp->value, c, perr, NULL, 0);
5324 if (!t && cs->condpart)
5325 t = propagate_types(cs->condpart, c, perr, NULL, Rboolok); // NOTEST
5326 if (!t && cs->looppart)
5327 t = propagate_types(cs->looppart, c, perr, NULL, Rboolok); // NOTEST
5328 // Now we have a type (I hope) push it down
5330 for (cp = cs->casepart; cp; cp = cp->next)
5331 propagate_types(cp->value, c, perr, t, 0);
5332 propagate_types(cs->condpart, c, perr, t, Rboolok);
5333 propagate_types(cs->looppart, c, perr, t, Rboolok);
5336 // (if)then, else, and case parts must return expected type.
5337 if (!cs->looppart && !type)
5338 type = propagate_types(cs->thenpart, c, perr, NULL, rules);
5340 type = propagate_types(cs->elsepart, c, perr, NULL, rules);
5341 for (cp = cs->casepart;
5343 cp = cp->next) // NOTEST
5344 type = propagate_types(cp->action, c, perr, NULL, rules); // NOTEST
5347 propagate_types(cs->thenpart, c, perr, type, rules);
5348 propagate_types(cs->elsepart, c, perr, type, rules);
5349 for (cp = cs->casepart; cp ; cp = cp->next)
5350 propagate_types(cp->action, c, perr, type, rules);
5356 ###### interp binode cases
5358 // This just performs one iterration of the loop
5359 rv = interp_exec(c, b->left, &rvtype);
5360 if (rvtype == Tnone ||
5361 (rvtype == Tbool && rv.bool != 0))
5362 // rvtype is Tnone or Tbool, doesn't need to be freed
5363 interp_exec(c, b->right, NULL);
5366 ###### interp exec cases
5367 case Xcond_statement:
5369 struct value v, cnd;
5370 struct type *vtype, *cndtype;
5371 struct casepart *cp;
5372 struct cond_statement *cs = cast(cond_statement, e);
5375 interp_exec(c, cs->forpart, NULL);
5377 while ((cnd = interp_exec(c, cs->looppart, &cndtype)),
5378 cndtype == Tnone || (cndtype == Tbool && cnd.bool != 0))
5379 interp_exec(c, cs->thenpart, NULL);
5381 cnd = interp_exec(c, cs->condpart, &cndtype);
5382 if ((cndtype == Tnone ||
5383 (cndtype == Tbool && cnd.bool != 0))) {
5384 // cnd is Tnone or Tbool, doesn't need to be freed
5385 rv = interp_exec(c, cs->thenpart, &rvtype);
5386 // skip else (and cases)
5390 for (cp = cs->casepart; cp; cp = cp->next) {
5391 v = interp_exec(c, cp->value, &vtype);
5392 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
5393 free_value(vtype, &v);
5394 free_value(cndtype, &cnd);
5395 rv = interp_exec(c, cp->action, &rvtype);
5398 free_value(vtype, &v);
5400 free_value(cndtype, &cnd);
5402 rv = interp_exec(c, cs->elsepart, &rvtype);
5409 ### Top level structure
5411 All the language elements so far can be used in various places. Now
5412 it is time to clarify what those places are.
5414 At the top level of a file there will be a number of declarations.
5415 Many of the things that can be declared haven't been described yet,
5416 such as functions, procedures, imports, and probably more.
5417 For now there are two sorts of things that can appear at the top
5418 level. They are predefined constants, `struct` types, and the `main`
5419 function. While the syntax will allow the `main` function to appear
5420 multiple times, that will trigger an error if it is actually attempted.
5422 The various declarations do not return anything. They store the
5423 various declarations in the parse context.
5425 ###### Parser: grammar
5428 Ocean -> OptNL DeclarationList
5430 ## declare terminals
5438 DeclarationList -> Declaration
5439 | DeclarationList Declaration
5441 Declaration -> ERROR Newlines ${
5442 tok_err(c, // NOTEST
5443 "error: unhandled parse error", &$1);
5449 ## top level grammar
5453 ### The `const` section
5455 As well as being defined in with the code that uses them, constants can
5456 be declared at the top level. These have full-file scope, so they are
5457 always `InScope`, even before(!) they have been declared. The value of
5458 a top level constant can be given as an expression, and this is
5459 evaluated after parsing and before execution.
5461 A function call can be used to evaluate a constant, but it will not have
5462 access to any program state, once such statement becomes meaningful.
5463 e.g. arguments and filesystem will not be visible.
5465 Constants are defined in a section that starts with the reserved word
5466 `const` and then has a block with a list of assignment statements.
5467 For syntactic consistency, these must use the double-colon syntax to
5468 make it clear that they are constants. Type can also be given: if
5469 not, the type will be determined during analysis, as with other
5472 ###### parse context
5473 struct binode *constlist;
5475 ###### top level grammar
5479 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
5480 | const { SimpleConstList } Newlines
5481 | const IN OptNL ConstList OUT Newlines
5482 | const SimpleConstList Newlines
5484 ConstList -> ConstList SimpleConstLine
5487 SimpleConstList -> SimpleConstList ; Const
5491 SimpleConstLine -> SimpleConstList Newlines
5492 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
5495 CType -> Type ${ $0 = $<1; }$
5499 Const -> IDENTIFIER :: CType = Expression ${ {
5501 struct binode *bl, *bv;
5502 struct var *var = new_pos(var, $ID);
5504 v = var_decl(c, $ID.txt);
5506 v->where_decl = var;
5512 v = var_ref(c, $1.txt);
5513 if (v->type == Tnone) {
5514 v->where_decl = var;
5520 tok_err(c, "error: name already declared", &$1);
5521 type_err(c, "info: this is where '%v' was first declared",
5522 v->where_decl, NULL, 0, NULL);
5534 bl->left = c->constlist;
5539 ###### core functions
5540 static void resolve_consts(struct parse_context *c)
5544 enum { none, some, cannot } progress = none;
5546 c->constlist = reorder_bilist(c->constlist);
5549 for (b = cast(binode, c->constlist); b;
5550 b = cast(binode, b->right)) {
5552 struct binode *vb = cast(binode, b->left);
5553 struct var *v = cast(var, vb->left);
5554 if (v->var->frame_pos >= 0)
5558 propagate_types(vb->right, c, &perr,
5560 } while (perr & Eretry);
5562 c->parse_error += 1;
5563 else if (!(perr & Eruntime)) {
5565 struct value res = interp_exec(
5566 c, vb->right, &v->var->type);
5567 global_alloc(c, v->var->type, v->var, &res);
5569 if (progress == cannot)
5570 type_err(c, "error: const %v cannot be resolved.",
5580 progress = cannot; break;
5582 progress = none; break;
5587 ###### print const decls
5592 for (b = cast(binode, context.constlist); b;
5593 b = cast(binode, b->right)) {
5594 struct binode *vb = cast(binode, b->left);
5595 struct var *vr = cast(var, vb->left);
5596 struct variable *v = vr->var;
5602 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
5603 type_print(v->type, stdout);
5605 print_exec(vb->right, -1, 0);
5610 ###### free const decls
5611 free_binode(context.constlist);
5613 ### Function declarations
5615 The code in an Ocean program is all stored in function declarations.
5616 One of the functions must be named `main` and it must accept an array of
5617 strings as a parameter - the command line arguments.
5619 As this is the top level, several things are handled a bit differently.
5620 The function is not interpreted by `interp_exec` as that isn't passed
5621 the argument list which the program requires. Similarly type analysis
5622 is a bit more interesting at this level.
5624 ###### ast functions
5626 static struct type *handle_results(struct parse_context *c,
5627 struct binode *results)
5629 /* Create a 'struct' type from the results list, which
5630 * is a list for 'struct var'
5632 struct type *t = add_anon_type(c, &structure_prototype,
5637 for (b = results; b; b = cast(binode, b->right))
5639 t->structure.nfields = cnt;
5640 t->structure.fields = calloc(cnt, sizeof(struct field));
5642 for (b = results; b; b = cast(binode, b->right)) {
5643 struct var *v = cast(var, b->left);
5644 struct field *f = &t->structure.fields[cnt++];
5645 int a = v->var->type->align;
5646 f->name = v->var->name->name;
5647 f->type = v->var->type;
5649 f->offset = t->size;
5650 v->var->frame_pos = f->offset;
5651 t->size += ((f->type->size - 1) | (a-1)) + 1;
5654 variable_unlink_exec(v->var);
5656 free_binode(results);
5660 static struct variable *declare_function(struct parse_context *c,
5661 struct variable *name,
5662 struct binode *args,
5664 struct binode *results,
5668 struct value fn = {.function = code};
5670 var_block_close(c, CloseFunction, code);
5671 t = add_anon_type(c, &function_prototype,
5672 "func %.*s", name->name->name.len,
5673 name->name->name.txt);
5675 t->function.params = reorder_bilist(args);
5677 ret = handle_results(c, reorder_bilist(results));
5678 t->function.inline_result = 1;
5679 t->function.local_size = ret->size;
5681 t->function.return_type = ret;
5682 global_alloc(c, t, name, &fn);
5683 name->type->function.scope = c->out_scope;
5688 var_block_close(c, CloseFunction, NULL);
5690 c->out_scope = NULL;
5694 ###### declare terminals
5697 ###### top level grammar
5700 DeclareFunction -> func FuncName ( OpenScope ArgsLine ) Block Newlines ${
5701 $0 = declare_function(c, $<FN, $<Ar, Tnone, NULL, $<Bl);
5703 | func FuncName IN OpenScope Args OUT OptNL do Block Newlines ${
5704 $0 = declare_function(c, $<FN, $<Ar, Tnone, NULL, $<Bl);
5706 | func FuncName NEWLINE OpenScope OptNL do Block Newlines ${
5707 $0 = declare_function(c, $<FN, NULL, Tnone, NULL, $<Bl);
5709 | func FuncName ( OpenScope ArgsLine ) : Type Block Newlines ${
5710 $0 = declare_function(c, $<FN, $<Ar, $<Ty, NULL, $<Bl);
5712 | func FuncName ( OpenScope ArgsLine ) : ( ArgsLine ) Block Newlines ${
5713 $0 = declare_function(c, $<FN, $<AL, NULL, $<AL2, $<Bl);
5715 | func FuncName IN OpenScope Args OUT OptNL return Type Newlines do Block Newlines ${
5716 $0 = declare_function(c, $<FN, $<Ar, $<Ty, NULL, $<Bl);
5718 | func FuncName NEWLINE OpenScope return Type Newlines do Block Newlines ${
5719 $0 = declare_function(c, $<FN, NULL, $<Ty, NULL, $<Bl);
5721 | func FuncName IN OpenScope Args OUT OptNL return IN Args OUT OptNL do Block Newlines ${
5722 $0 = declare_function(c, $<FN, $<Ar, NULL, $<Ar2, $<Bl);
5724 | func FuncName NEWLINE OpenScope return IN Args OUT OptNL do Block Newlines ${
5725 $0 = declare_function(c, $<FN, NULL, NULL, $<Ar, $<Bl);
5728 ###### print func decls
5733 while (target != 0) {
5735 for (v = context.in_scope; v; v=v->in_scope)
5736 if (v->depth == 0 && v->type && v->type->check_args) {
5745 struct value *val = var_value(&context, v);
5746 printf("func %.*s", v->name->name.len, v->name->name.txt);
5747 v->type->print_type_decl(v->type, stdout);
5750 print_exec(val->function, 1, brackets);
5753 print_value(v->type, val, stdout);
5755 printf("/* frame size %d */\n", v->type->function.local_size);
5761 ###### core functions
5763 static int analyse_funcs(struct parse_context *c)
5767 for (v = c->in_scope; v; v = v->in_scope) {
5771 if (v->depth != 0 || !v->type || !v->type->check_args)
5773 ret = v->type->function.inline_result ?
5774 Tnone : v->type->function.return_type;
5775 val = var_value(c, v);
5778 propagate_types(val->function, c, &perr, ret, 0);
5779 } while (!(perr & Efail) && (perr & Eretry));
5780 if (!(perr & Efail))
5781 /* Make sure everything is still consistent */
5782 propagate_types(val->function, c, &perr, ret, 0);
5785 if (!v->type->function.inline_result &&
5786 !v->type->function.return_type->dup) {
5787 type_err(c, "error: function cannot return value of type %1",
5788 v->where_decl, v->type->function.return_type, 0, NULL);
5791 scope_finalize(c, v->type);
5796 static int analyse_main(struct type *type, struct parse_context *c)
5798 struct binode *bp = type->function.params;
5802 struct type *argv_type;
5804 argv_type = add_anon_type(c, &array_prototype, "argv");
5805 argv_type->array.member = Tstr;
5806 argv_type->array.unspec = 1;
5808 for (b = bp; b; b = cast(binode, b->right)) {
5812 propagate_types(b->left, c, &perr, argv_type, 0);
5814 default: /* invalid */ // NOTEST
5815 propagate_types(b->left, c, &perr, Tnone, 0); // NOTEST
5818 c->parse_error += 1;
5821 return !c->parse_error;
5824 static void interp_main(struct parse_context *c, int argc, char **argv)
5826 struct value *progp = NULL;
5827 struct text main_name = { "main", 4 };
5828 struct variable *mainv;
5834 mainv = var_ref(c, main_name);
5836 progp = var_value(c, mainv);
5837 if (!progp || !progp->function) {
5838 fprintf(stderr, "oceani: no main function found.\n");
5839 c->parse_error += 1;
5842 if (!analyse_main(mainv->type, c)) {
5843 fprintf(stderr, "oceani: main has wrong type.\n");
5844 c->parse_error += 1;
5847 al = mainv->type->function.params;
5849 c->local_size = mainv->type->function.local_size;
5850 c->local = calloc(1, c->local_size);
5852 struct var *v = cast(var, al->left);
5853 struct value *vl = var_value(c, v->var);
5861 t->array.size = argc;
5862 t->prepare_type(c, t, 0);
5863 array_init(v->var->type, vl);
5864 for (i = 0; i < argc; i++) {
5865 struct value *vl2 = vl->array + i * v->var->type->array.member->size;
5867 arg.str.txt = argv[i];
5868 arg.str.len = strlen(argv[i]);
5869 free_value(Tstr, vl2);
5870 dup_value(Tstr, &arg, vl2);
5874 al = cast(binode, al->right);
5876 v = interp_exec(c, progp->function, &vtype);
5877 free_value(vtype, &v);
5882 ###### ast functions
5883 void free_variable(struct variable *v)
5887 ## And now to test it out.
5889 Having a language requires having a "hello world" program. I'll
5890 provide a little more than that: a program that prints "Hello world"
5891 finds the GCD of two numbers, prints the first few elements of
5892 Fibonacci, performs a binary search for a number, and a few other
5893 things which will likely grow as the languages grows.
5895 ###### File: oceani.mk
5898 @echo "===== DEMO ====="
5899 ./oceani --section "demo: hello" oceani.mdc 55 33
5905 four ::= 2 + 2 ; five ::= 10/2
5906 const pie ::= "I like Pie";
5907 cake ::= "The cake is"
5915 func main(argv:[]string)
5916 print "Hello World, what lovely oceans you have!"
5917 print "Are there", five, "?"
5918 print pi, pie, "but", cake
5920 A := $argv[1]; B := $argv[2]
5922 /* When a variable is defined in both branches of an 'if',
5923 * and used afterwards, the variables are merged.
5929 print "Is", A, "bigger than", B,"? ", bigger
5930 /* If a variable is not used after the 'if', no
5931 * merge happens, so types can be different
5934 double:string = "yes"
5935 print A, "is more than twice", B, "?", double
5938 print "double", B, "is", double
5949 print "GCD of", A, "and", B,"is", a
5951 print a, "is not positive, cannot calculate GCD"
5953 print b, "is not positive, cannot calculate GCD"
5958 print "Fibonacci:", f1,f2,
5959 then togo = togo - 1
5967 /* Binary search... */
5972 mid := (lo + hi) / 2
5985 print "Yay, I found", target
5987 print "Closest I found was", lo
5992 // "middle square" PRNG. Not particularly good, but one my
5993 // Dad taught me - the first one I ever heard of.
5994 for i:=1; then i = i + 1; while i < size:
5995 n := list[i-1] * list[i-1]
5996 list[i] = (n / 100) % 10 000
5998 print "Before sort:",
5999 for i:=0; then i = i + 1; while i < size:
6003 for i := 1; then i=i+1; while i < size:
6004 for j:=i-1; then j=j-1; while j >= 0:
6005 if list[j] > list[j+1]:
6009 print " After sort:",
6010 for i:=0; then i = i + 1; while i < size:
6014 if 1 == 2 then print "yes"; else print "no"
6018 bob.alive = (bob.name == "Hello")
6019 print "bob", "is" if bob.alive else "isn't", "alive"