1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/`, `%` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing,
77 - Analyse the parsed program to ensure consistency,
79 - Execute the program.
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 This code must be compiled with `-fplan9-extensions` so that anonymous
92 structures can be used.
94 ###### File: oceani.mk
96 myCFLAGS := -Wall -g -fplan9-extensions
97 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
98 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
99 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
101 all :: $(LDLIBS) oceani
102 oceani.c oceani.h : oceani.mdc parsergen
103 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
104 oceani.mk: oceani.mdc md2c
107 oceani: oceani.o $(LDLIBS)
108 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
110 ###### Parser: header
113 struct parse_context {
114 struct token_config config;
123 #define container_of(ptr, type, member) ({ \
124 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
125 (type *)( (char *)__mptr - offsetof(type,member) );})
127 #define config2context(_conf) container_of(_conf, struct parse_context, \
136 #include <sys/mman.h>
155 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
156 "--section=SectionName prog.ocn\n";
157 static const struct option long_options[] = {
158 {"trace", 0, NULL, 't'},
159 {"print", 0, NULL, 'p'},
160 {"noexec", 0, NULL, 'n'},
161 {"brackets", 0, NULL, 'b'},
162 {"section", 1, NULL, 's'},
165 const char *options = "tpnbs";
166 int main(int argc, char *argv[])
172 char *section = NULL;
173 struct parse_context context = {
175 .ignored = (1 << TK_line_comment)
176 | (1 << TK_block_comment),
177 .number_chars = ".,_+-",
182 int doprint=0, dotrace=0, doexec=1, brackets=0;
184 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
187 case 't': dotrace=1; break;
188 case 'p': doprint=1; break;
189 case 'n': doexec=0; break;
190 case 'b': brackets=1; break;
191 case 's': section = optarg; break;
192 default: fprintf(stderr, Usage);
196 if (optind >= argc) {
197 fprintf(stderr, "oceani: no input file given\n");
200 fd = open(argv[optind], O_RDONLY);
202 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
205 context.file_name = argv[optind];
206 len = lseek(fd, 0, 2);
207 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
208 s = code_extract(file, file+len, NULL);
210 fprintf(stderr, "oceani: could not find any code in %s\n",
215 ## context initialization
219 for (ss = s; ss; ss = ss->next) {
220 struct text sec = ss->section;
221 if (sec.len == strlen(section) &&
222 strncmp(sec.txt, section, sec.len) == 0)
226 parse_oceani(ss->code, &context.config,
227 dotrace ? stderr : NULL);
229 fprintf(stderr, "oceani: cannot find section %s\n",
234 parse_oceani(s->code, &context.config,
235 dotrace ? stderr : NULL);
237 fprintf(stderr, "oceani: no program found.\n");
238 context.parse_error = 1;
240 if (context.prog && doprint) {
242 print_exec(context.prog, 0, brackets);
244 if (context.prog && doexec && !context.parse_error) {
245 if (!analyse_prog(context.prog, &context)) {
246 fprintf(stderr, "oceani: type error in program - not running.\n");
249 interp_prog(context.prog, argv+optind+1);
252 free_exec(context.prog);
255 struct section *t = s->next;
261 ## free context types
262 exit(context.parse_error ? 1 : 0);
267 The four requirements of parse, analyse, print, interpret apply to
268 each language element individually so that is how most of the code
271 Three of the four are fairly self explanatory. The one that requires
272 a little explanation is the analysis step.
274 The current language design does not require the types of variables to
275 be declared, but they must still have a single type. Different
276 operations impose different requirements on the variables, for example
277 addition requires both arguments to be numeric, and assignment
278 requires the variable on the left to have the same type as the
279 expression on the right.
281 Analysis involves propagating these type requirements around and
282 consequently setting the type of each variable. If any requirements
283 are violated (e.g. a string is compared with a number) or if a
284 variable needs to have two different types, then an error is raised
285 and the program will not run.
287 If the same variable is declared in both branchs of an 'if/else', or
288 in all cases of a 'switch' then the multiple instances may be merged
289 into just one variable if the variable is references after the
290 conditional statement. When this happens, the types must naturally be
291 consistent across all the branches. When the variable is not used
292 outside the if, the variables in the different branches are distinct
293 and can be of different types.
295 Determining the types of all variables early is important for
296 processing command line arguments. These can be assigned to any type
297 of variable, but we must first know the correct type so any required
298 conversion can happen. If a variable is associated with a command
299 line argument but no type can be interpreted (e.g. the variable is
300 only ever used in a `print` statement), then the type is set to
303 Undeclared names may only appear in "use" statements and "case" expressions.
304 These names are given a type of "label" and a unique value.
305 This allows them to fill the role of a name in an enumerated type, which
306 is useful for testing the `switch` statement.
308 As we will see, the condition part of a `while` statement can return
309 either a Boolean or some other type. This requires that the expect
310 type that gets passed around comprises a type (`enum vtype`) and a
311 flag to indicate that `Vbool` is also permitted.
313 As there are, as yet, no distinct types that are compatible, there
314 isn't much subtlety in the analysis. When we have distinct number
315 types, this will become more interesting.
319 When analysis discovers an inconsistency it needs to report an error;
320 just refusing to run the code ensures that the error doesn't cascade,
321 but by itself it isn't very useful. A clear understand of the sort of
322 error message that are useful will help guide the process of analysis.
324 At a simplistic level, the only sort of error that type analysis can
325 report is that the type of some construct doesn't match a contextual
326 requirement. For example, in `4 + "hello"` the addition provides a
327 contextual requirement for numbers, but `"hello"` is not a number. In
328 this particular example no further information is needed as the types
329 are obvious from local information. When a variable is involved that
330 isn't the case. It may be helpful to explain why the variable has a
331 particular type, by indicating the location where the type was set,
332 whether by declaration or usage.
334 Using a recursive-descent analysis we can easily detect a problem at
335 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
336 will detect that one argument is not a number and the usage of `hello`
337 will detect that a number was wanted, but not provided. In this
338 (early) version of the language, we will generate error reports at
339 multiple locations, so the use of `hello` will report an error and
340 explain were the value was set, and the addition will report an error
341 and say why numbers are needed. To be able to report locations for
342 errors, each language element will need to record a file location
343 (line and column) and each variable will need to record the language
344 element where its type was set. For now we will assume that each line
345 of an error message indicates one location in the file, and up to 2
346 types. So we provide a `printf`-like function which takes a format, a
347 language (a `struct exec` which has not yet been introduced), and 2
348 types. "`%1`" reports the first type, "`%2`" reports the second. We
349 will need a function to print the location, once we know how that is
350 stored. As will be explained later, there are sometimes extra rules for
351 type matching and they might affect error messages, we need to pass those
354 As well as type errors, we sometimes need to report problems with
355 tokens, which might be unexpected or might name a type that has not
356 been defined. For these we have `tok_err()` which reports an error
357 with a given token. Each of the error functions sets the flag in the
358 context so indicate that parsing failed.
362 static void fput_loc(struct exec *loc, FILE *f);
364 ###### core functions
366 static void type_err(struct parse_context *c,
367 char *fmt, struct exec *loc,
368 struct type *t1, int rules, struct type *t2)
370 fprintf(stderr, "%s:", c->file_name);
371 fput_loc(loc, stderr);
372 for (; *fmt ; fmt++) {
379 case '%': fputc(*fmt, stderr); break; // NOTEST
380 default: fputc('?', stderr); break; // NOTEST
382 type_print(t1, stderr);
385 type_print(t2, stderr);
394 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
396 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
397 t->txt.len, t->txt.txt);
403 One last introductory step before detailing the language elements and
404 providing their four requirements is to establish the data structures
405 to store these elements.
407 There are two key objects that we need to work with: executable
408 elements which comprise the program, and values which the program
409 works with. Between these are the variables in their various scopes
410 which hold the values, and types which classify the values stored and
411 manipulatd by executables.
415 Values come in a wide range of types, with more likely to be added.
416 Each type needs to be able to parse and print its own values (for
417 convenience at least) as well as to compare two values, at least for
418 equality and possibly for order. For now, values might need to be
419 duplicated and freed, though eventually such manipulations will be
420 better integrated into the language.
422 Rather than requiring every numeric type to support all numeric
423 operations (add, multiple, etc), we allow types to be able to present
424 as one of a few standard types: integer, float, and fraction. The
425 existance of these conversion functions enable types to determine if
426 they are compatible with other types.
428 Named type are stored in a simple linked list. Objects of each type are "values"
429 which are often passed around by value.
436 ## value union fields
443 struct value (*init)(struct type *type);
444 struct value (*prepare)(struct type *type);
445 struct value (*parse)(struct type *type, char *str);
446 void (*print)(struct value val);
447 void (*print_type)(struct type *type, FILE *f);
448 int (*cmp_order)(struct value v1, struct value v2);
449 int (*cmp_eq)(struct value v1, struct value v2);
450 struct value (*dup)(struct value val);
451 void (*free)(struct value val);
452 int (*compat)(struct type *this, struct type *other);
453 long long (*to_int)(struct value *v);
454 double (*to_float)(struct value *v);
455 int (*to_mpq)(mpq_t *q, struct value *v);
463 struct type *typelist;
467 static struct type *find_type(struct parse_context *c, struct text s)
469 struct type *l = c->typelist;
472 text_cmp(l->name, s) != 0)
477 static struct type *add_type(struct parse_context *c, struct text s,
482 n = calloc(1, sizeof(*n));
485 n->next = c->typelist;
490 static void free_type(struct type *t)
492 /* The type is always a reference to something in the
493 * context, so we don't need to free anything.
497 static void free_value(struct value v)
503 static int type_compat(struct type *require, struct type *have, int rules)
505 if ((rules & Rboolok) && have == Tbool)
507 if ((rules & Rnolabel) && have == Tlabel)
509 if (!require || !have)
513 return require->compat(require, have);
515 return require == have;
518 static void type_print(struct type *type, FILE *f)
521 fputs("*unknown*type*", f);
522 else if (type->name.len)
523 fprintf(f, "%.*s", type->name.len, type->name.txt);
524 else if (type->print_type)
525 type->print_type(type, f);
527 fputs("*invalid*type*", f); // NOTEST
530 static struct value val_prepare(struct type *type)
535 return type->prepare(type);
540 static struct value val_init(struct type *type)
545 return type->init(type);
550 static struct value dup_value(struct value v)
553 return v.type->dup(v);
557 static int value_cmp(struct value left, struct value right)
559 if (left.type && left.type->cmp_order)
560 return left.type->cmp_order(left, right);
561 if (left.type && left.type->cmp_eq)
562 return left.type->cmp_eq(left, right);
566 static void print_value(struct value v)
568 if (v.type && v.type->print)
571 printf("*Unknown*"); // NOTEST
574 static struct value parse_value(struct type *type, char *arg)
578 if (type && type->parse)
579 return type->parse(type, arg);
580 rv.type = NULL; // NOTEST
586 static void free_value(struct value v);
587 static int type_compat(struct type *require, struct type *have, int rules);
588 static void type_print(struct type *type, FILE *f);
589 static struct value val_init(struct type *type);
590 static struct value dup_value(struct value v);
591 static int value_cmp(struct value left, struct value right);
592 static void print_value(struct value v);
593 static struct value parse_value(struct type *type, char *arg);
595 ###### free context types
597 while (context.typelist) {
598 struct type *t = context.typelist;
600 context.typelist = t->next;
606 Values of the base types can be numbers, which we represent as
607 multi-precision fractions, strings, Booleans and labels. When
608 analysing the program we also need to allow for places where no value
609 is meaningful (type `Tnone`) and where we don't know what type to
610 expect yet (type is `NULL`).
612 Values are never shared, they are always copied when used, and freed
613 when no longer needed.
615 When propagating type information around the program, we need to
616 determine if two types are compatible, where type `NULL` is compatible
617 with anything. There are two special cases with type compatibility,
618 both related to the Conditional Statement which will be described
619 later. In some cases a Boolean can be accepted as well as some other
620 primary type, and in others any type is acceptable except a label (`Vlabel`).
621 A separate function encode these cases will simplify some code later.
623 When assigning command line arguments to variables, we need to be able
624 to parse each type from a string.
632 myLDLIBS := libnumber.o libstring.o -lgmp
633 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
635 ###### type union fields
636 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
638 ###### value union fields
645 static void _free_value(struct value v)
647 switch (v.type->vtype) {
649 case Vstr: free(v.str.txt); break;
650 case Vnum: mpq_clear(v.num); break;
656 ###### value functions
658 static struct value _val_prepare(struct type *type)
663 switch(type->vtype) {
667 memset(&rv.num, 0, sizeof(rv.num));
683 static struct value _val_init(struct type *type)
688 switch(type->vtype) {
689 case Vnone: // NOTEST
692 mpq_init(rv.num); break;
694 rv.str.txt = malloc(1);
700 case Vlabel: // NOTEST
701 rv.label = NULL; // NOTEST
707 static struct value _dup_value(struct value v)
711 switch (rv.type->vtype) {
712 case Vnone: // NOTEST
722 mpq_set(rv.num, v.num);
725 rv.str.len = v.str.len;
726 rv.str.txt = malloc(rv.str.len);
727 memcpy(rv.str.txt, v.str.txt, v.str.len);
733 static int _value_cmp(struct value left, struct value right)
736 if (left.type != right.type)
737 return left.type - right.type; // NOTEST
738 switch (left.type->vtype) {
739 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
740 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
741 case Vstr: cmp = text_cmp(left.str, right.str); break;
742 case Vbool: cmp = left.bool - right.bool; break;
743 case Vnone: cmp = 0; // NOTEST
748 static void _print_value(struct value v)
750 switch (v.type->vtype) {
751 case Vnone: // NOTEST
752 printf("*no-value*"); break; // NOTEST
753 case Vlabel: // NOTEST
754 printf("*label-%p*", v.label); break; // NOTEST
756 printf("%.*s", v.str.len, v.str.txt); break;
758 printf("%s", v.bool ? "True":"False"); break;
763 mpf_set_q(fl, v.num);
764 gmp_printf("%Fg", fl);
771 static struct value _parse_value(struct type *type, char *arg)
779 switch(type->vtype) {
780 case Vlabel: // NOTEST
781 case Vnone: // NOTEST
782 val.type = NULL; // NOTEST
785 val.str.len = strlen(arg);
786 val.str.txt = malloc(val.str.len);
787 memcpy(val.str.txt, arg, val.str.len);
794 tx.txt = arg; tx.len = strlen(tx.txt);
795 if (number_parse(val.num, tail, tx) == 0)
798 mpq_neg(val.num, val.num);
800 printf("Unsupported suffix: %s\n", arg);
805 if (strcasecmp(arg, "true") == 0 ||
806 strcmp(arg, "1") == 0)
808 else if (strcasecmp(arg, "false") == 0 ||
809 strcmp(arg, "0") == 0)
812 printf("Bad bool: %s\n", arg);
820 static void _free_value(struct value v);
822 static struct type base_prototype = {
824 .prepare = _val_prepare,
825 .parse = _parse_value,
826 .print = _print_value,
827 .cmp_order = _value_cmp,
828 .cmp_eq = _value_cmp,
833 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
836 static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
838 struct text txt = { n, strlen(n) };
841 t = add_type(c, txt, &base_prototype);
846 ###### context initialization
848 Tbool = add_base_type(&context, "Boolean", Vbool);
849 Tstr = add_base_type(&context, "string", Vstr);
850 Tnum = add_base_type(&context, "number", Vnum);
851 Tnone = add_base_type(&context, "none", Vnone);
852 Tlabel = add_base_type(&context, "label", Vlabel);
856 Variables are scoped named values. We store the names in a linked
857 list of "bindings" sorted lexically, and use sequential search and
864 struct binding *next; // in lexical order
868 This linked list is stored in the parse context so that "reduce"
869 functions can find or add variables, and so the analysis phase can
870 ensure that every variable gets a type.
874 struct binding *varlist; // In lexical order
878 static struct binding *find_binding(struct parse_context *c, struct text s)
880 struct binding **l = &c->varlist;
885 (cmp = text_cmp((*l)->name, s)) < 0)
889 n = calloc(1, sizeof(*n));
896 Each name can be linked to multiple variables defined in different
897 scopes. Each scope starts where the name is declared and continues
898 until the end of the containing code block. Scopes of a given name
899 cannot nest, so a declaration while a name is in-scope is an error.
901 ###### binding fields
902 struct variable *var;
906 struct variable *previous;
908 struct binding *name;
909 struct exec *where_decl;// where name was declared
910 struct exec *where_set; // where type was set
914 While the naming seems strange, we include local constants in the
915 definition of variables. A name declared `var := value` can
916 subsequently be changed, but a name declared `var ::= value` cannot -
919 ###### variable fields
922 Scopes in parallel branches can be partially merged. More
923 specifically, if a given name is declared in both branches of an
924 if/else then its scope is a candidate for merging. Similarly if
925 every branch of an exhaustive switch (e.g. has an "else" clause)
926 declares a given name, then the scopes from the branches are
927 candidates for merging.
929 Note that names declared inside a loop (which is only parallel to
930 itself) are never visible after the loop. Similarly names defined in
931 scopes which are not parallel, such as those started by `for` and
932 `switch`, are never visible after the scope. Only variables defined in
933 both `then` and `else` (including the implicit then after an `if`, and
934 excluding `then` used with `for`) and in all `case`s and `else` of a
935 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
937 Labels, which are a bit like variables, follow different rules.
938 Labels are not explicitly declared, but if an undeclared name appears
939 in a context where a label is legal, that effectively declares the
940 name as a label. The declaration remains in force (or in scope) at
941 least to the end of the immediately containing block and conditionally
942 in any larger containing block which does not declare the name in some
943 other way. Importantly, the conditional scope extension happens even
944 if the label is only used in one parallel branch of a conditional --
945 when used in one branch it is treated as having been declared in all
948 Merge candidates are tentatively visible beyond the end of the
949 branching statement which creates them. If the name is used, the
950 merge is affirmed and they become a single variable visible at the
951 outer layer. If not - if it is redeclared first - the merge lapses.
953 To track scopes we have an extra stack, implemented as a linked list,
954 which roughly parallels the parse stack and which is used exclusively
955 for scoping. When a new scope is opened, a new frame is pushed and
956 the child-count of the parent frame is incremented. This child-count
957 is used to distinguish between the first of a set of parallel scopes,
958 in which declared variables must not be in scope, and subsequent
959 branches, whether they must already be conditionally scoped.
961 To push a new frame *before* any code in the frame is parsed, we need a
962 grammar reduction. This is most easily achieved with a grammar
963 element which derives the empty string, and creates the new scope when
964 it is recognized. This can be placed, for example, between a keyword
965 like "if" and the code following it.
969 struct scope *parent;
975 struct scope *scope_stack;
978 static void scope_pop(struct parse_context *c)
980 struct scope *s = c->scope_stack;
982 c->scope_stack = s->parent;
987 static void scope_push(struct parse_context *c)
989 struct scope *s = calloc(1, sizeof(*s));
991 c->scope_stack->child_count += 1;
992 s->parent = c->scope_stack;
1000 OpenScope -> ${ scope_push(config2context(config)); }$
1003 Each variable records a scope depth and is in one of four states:
1005 - "in scope". This is the case between the declaration of the
1006 variable and the end of the containing block, and also between
1007 the usage with affirms a merge and the end of that block.
1009 The scope depth is not greater than the current parse context scope
1010 nest depth. When the block of that depth closes, the state will
1011 change. To achieve this, all "in scope" variables are linked
1012 together as a stack in nesting order.
1014 - "pending". The "in scope" block has closed, but other parallel
1015 scopes are still being processed. So far, every parallel block at
1016 the same level that has closed has declared the name.
1018 The scope depth is the depth of the last parallel block that
1019 enclosed the declaration, and that has closed.
1021 - "conditionally in scope". The "in scope" block and all parallel
1022 scopes have closed, and no further mention of the name has been
1023 seen. This state includes a secondary nest depth which records the
1024 outermost scope seen since the variable became conditionally in
1025 scope. If a use of the name is found, the variable becomes "in
1026 scope" and that secondary depth becomes the recorded scope depth.
1027 If the name is declared as a new variable, the old variable becomes
1028 "out of scope" and the recorded scope depth stays unchanged.
1030 - "out of scope". The variable is neither in scope nor conditionally
1031 in scope. It is permanently out of scope now and can be removed from
1032 the "in scope" stack.
1035 ###### variable fields
1036 int depth, min_depth;
1037 enum { OutScope, PendingScope, CondScope, InScope } scope;
1038 struct variable *in_scope;
1040 ###### parse context
1042 struct variable *in_scope;
1044 All variables with the same name are linked together using the
1045 'previous' link. Those variable that have
1046 been affirmatively merged all have a 'merged' pointer that points to
1047 one primary variable - the most recently declared instance. When
1048 merging variables, we need to also adjust the 'merged' pointer on any
1049 other variables that had previously been merged with the one that will
1050 no longer be primary.
1052 ###### variable fields
1053 struct variable *merged;
1055 ###### ast functions
1057 static void variable_merge(struct variable *primary, struct variable *secondary)
1061 if (primary->merged)
1063 primary = primary->merged;
1065 for (v = primary->previous; v; v=v->previous)
1066 if (v == secondary || v == secondary->merged ||
1067 v->merged == secondary ||
1068 (v->merged && v->merged == secondary->merged)) {
1069 v->scope = OutScope;
1070 v->merged = primary;
1074 ###### free context vars
1076 while (context.varlist) {
1077 struct binding *b = context.varlist;
1078 struct variable *v = b->var;
1079 context.varlist = b->next;
1082 struct variable *t = v;
1086 if (t->min_depth == 0)
1087 free_exec(t->where_decl);
1092 #### Manipulating Bindings
1094 When a name is conditionally visible, a new declaration discards the
1095 old binding - the condition lapses. Conversely a usage of the name
1096 affirms the visibility and extends it to the end of the containing
1097 block - i.e. the block that contains both the original declaration and
1098 the latest usage. This is determined from `min_depth`. When a
1099 conditionally visible variable gets affirmed like this, it is also
1100 merged with other conditionally visible variables with the same name.
1102 When we parse a variable declaration we either signal an error if the
1103 name is currently bound, or create a new variable at the current nest
1104 depth if the name is unbound or bound to a conditionally scoped or
1105 pending-scope variable. If the previous variable was conditionally
1106 scoped, it and its homonyms becomes out-of-scope.
1108 When we parse a variable reference (including non-declarative
1109 assignment) we signal an error if the name is not bound or is bound to
1110 a pending-scope variable; update the scope if the name is bound to a
1111 conditionally scoped variable; or just proceed normally if the named
1112 variable is in scope.
1114 When we exit a scope, any variables bound at this level are either
1115 marked out of scope or pending-scoped, depending on whether the
1116 scope was sequential or parallel.
1118 When exiting a parallel scope we check if there are any variables that
1119 were previously pending and are still visible. If there are, then
1120 there weren't redeclared in the most recent scope, so they cannot be
1121 merged and must become out-of-scope. If it is not the first of
1122 parallel scopes (based on `child_count`), we check that there was a
1123 previous binding that is still pending-scope. If there isn't, the new
1124 variable must now be out-of-scope.
1126 When exiting a sequential scope that immediately enclosed parallel
1127 scopes, we need to resolve any pending-scope variables. If there was
1128 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1129 we need to mark all pending-scope variable as out-of-scope. Otherwise
1130 all pending-scope variables become conditionally scoped.
1133 enum closetype { CloseSequential, CloseParallel, CloseElse };
1135 ###### ast functions
1137 static struct variable *var_decl(struct parse_context *c, struct text s)
1139 struct binding *b = find_binding(c, s);
1140 struct variable *v = b->var;
1142 switch (v ? v->scope : OutScope) {
1144 /* Caller will report the error */
1148 v && v->scope == CondScope;
1150 v->scope = OutScope;
1154 v = calloc(1, sizeof(*v));
1155 v->previous = b->var;
1158 v->min_depth = v->depth = c->scope_depth;
1160 v->in_scope = c->in_scope;
1162 v->val = val_prepare(NULL);
1166 static struct variable *var_ref(struct parse_context *c, struct text s)
1168 struct binding *b = find_binding(c, s);
1169 struct variable *v = b->var;
1170 struct variable *v2;
1172 switch (v ? v->scope : OutScope) {
1175 /* Caller will report the error */
1178 /* All CondScope variables of this name need to be merged
1179 * and become InScope
1181 v->depth = v->min_depth;
1183 for (v2 = v->previous;
1184 v2 && v2->scope == CondScope;
1186 variable_merge(v, v2);
1194 static void var_block_close(struct parse_context *c, enum closetype ct)
1196 /* Close off all variables that are in_scope */
1197 struct variable *v, **vp, *v2;
1200 for (vp = &c->in_scope;
1201 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1203 if (v->name->var == v) switch (ct) {
1205 case CloseParallel: /* handle PendingScope */
1209 if (c->scope_stack->child_count == 1)
1210 v->scope = PendingScope;
1211 else if (v->previous &&
1212 v->previous->scope == PendingScope)
1213 v->scope = PendingScope;
1214 else if (v->val.type == Tlabel)
1215 v->scope = PendingScope;
1216 else if (v->name->var == v)
1217 v->scope = OutScope;
1218 if (ct == CloseElse) {
1219 /* All Pending variables with this name
1220 * are now Conditional */
1222 v2 && v2->scope == PendingScope;
1224 v2->scope = CondScope;
1229 v2 && v2->scope == PendingScope;
1231 if (v2->val.type != Tlabel)
1232 v2->scope = OutScope;
1234 case OutScope: break;
1237 case CloseSequential:
1238 if (v->val.type == Tlabel)
1239 v->scope = PendingScope;
1242 v->scope = OutScope;
1245 /* There was no 'else', so we can only become
1246 * conditional if we know the cases were exhaustive,
1247 * and that doesn't mean anything yet.
1248 * So only labels become conditional..
1251 v2 && v2->scope == PendingScope;
1253 if (v2->val.type == Tlabel) {
1254 v2->scope = CondScope;
1255 v2->min_depth = c->scope_depth;
1257 v2->scope = OutScope;
1260 case OutScope: break;
1264 if (v->scope == OutScope || v->name->var != v)
1273 Executables can be lots of different things. In many cases an
1274 executable is just an operation combined with one or two other
1275 executables. This allows for expressions and lists etc. Other times
1276 an executable is something quite specific like a constant or variable
1277 name. So we define a `struct exec` to be a general executable with a
1278 type, and a `struct binode` which is a subclass of `exec`, forms a
1279 node in a binary tree, and holds an operation. There will be other
1280 subclasses, and to access these we need to be able to `cast` the
1281 `exec` into the various other types.
1284 #define cast(structname, pointer) ({ \
1285 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1286 if (__mptr && *__mptr != X##structname) abort(); \
1287 (struct structname *)( (char *)__mptr);})
1289 #define new(structname) ({ \
1290 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1291 __ptr->type = X##structname; \
1292 __ptr->line = -1; __ptr->column = -1; \
1295 #define new_pos(structname, token) ({ \
1296 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1297 __ptr->type = X##structname; \
1298 __ptr->line = token.line; __ptr->column = token.col; \
1307 enum exec_types type;
1315 struct exec *left, *right;
1318 ###### ast functions
1320 static int __fput_loc(struct exec *loc, FILE *f)
1324 if (loc->line >= 0) {
1325 fprintf(f, "%d:%d: ", loc->line, loc->column);
1328 if (loc->type == Xbinode)
1329 return __fput_loc(cast(binode,loc)->left, f) ||
1330 __fput_loc(cast(binode,loc)->right, f);
1333 static void fput_loc(struct exec *loc, FILE *f)
1335 if (!__fput_loc(loc, f))
1336 fprintf(f, "??:??: "); // NOTEST
1339 Each different type of `exec` node needs a number of functions
1340 defined, a bit like methods. We must be able to be able to free it,
1341 print it, analyse it and execute it. Once we have specific `exec`
1342 types we will need to parse them too. Let's take this a bit more
1347 The parser generator requires a `free_foo` function for each struct
1348 that stores attributes and they will be `exec`s and subtypes there-of.
1349 So we need `free_exec` which can handle all the subtypes, and we need
1352 ###### ast functions
1354 static void free_binode(struct binode *b)
1359 free_exec(b->right);
1363 ###### core functions
1364 static void free_exec(struct exec *e)
1373 ###### forward decls
1375 static void free_exec(struct exec *e);
1377 ###### free exec cases
1378 case Xbinode: free_binode(cast(binode, e)); break;
1382 Printing an `exec` requires that we know the current indent level for
1383 printing line-oriented components. As will become clear later, we
1384 also want to know what sort of bracketing to use.
1386 ###### ast functions
1388 static void do_indent(int i, char *str)
1395 ###### core functions
1396 static void print_binode(struct binode *b, int indent, int bracket)
1400 ## print binode cases
1404 static void print_exec(struct exec *e, int indent, int bracket)
1410 print_binode(cast(binode, e), indent, bracket); break;
1415 ###### forward decls
1417 static void print_exec(struct exec *e, int indent, int bracket);
1421 As discussed, analysis involves propagating type requirements around
1422 the program and looking for errors.
1424 So `propagate_types` is passed an expected type (being a `struct type`
1425 pointer together with some `val_rules` flags) that the `exec` is
1426 expected to return, and returns the type that it does return, either
1427 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1428 by reference. It is set to `0` when an error is found, and `2` when
1429 any change is made. If it remains unchanged at `1`, then no more
1430 propagation is needed.
1434 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1438 if (rules & Rnolabel)
1439 fputs(" (labels not permitted)", stderr);
1442 ###### core functions
1444 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1445 struct type *type, int rules)
1452 switch (prog->type) {
1455 struct binode *b = cast(binode, prog);
1457 ## propagate binode cases
1461 ## propagate exec cases
1468 Interpreting an `exec` doesn't require anything but the `exec`. State
1469 is stored in variables and each variable will be directly linked from
1470 within the `exec` tree. The exception to this is the whole `program`
1471 which needs to look at command line arguments. The `program` will be
1472 interpreted separately.
1474 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1476 ###### core functions
1479 struct value val, *lval;
1482 static struct lrval _interp_exec(struct exec *e);
1484 static struct value interp_exec(struct exec *e)
1486 struct lrval ret = _interp_exec(e);
1489 return dup_value(*ret.lval);
1494 static struct value *linterp_exec(struct exec *e)
1496 struct lrval ret = _interp_exec(e);
1501 static struct lrval _interp_exec(struct exec *e)
1504 struct value rv, *lrv = NULL;
1515 struct binode *b = cast(binode, e);
1516 struct value left, right, *lleft;
1517 left.type = right.type = Tnone;
1519 ## interp binode cases
1521 free_value(left); free_value(right);
1524 ## interp exec cases
1533 Now that we have the shape of the interpreter in place we can add some
1534 complex types and connected them in to the data structures and the
1535 different phases of parse, analyse, print, interpret.
1537 For now, just arrays.
1541 Arrays can be declared by giving a size and a type, as `[size]type' so
1542 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1543 size can be an arbitrary expression which is evaluated when the name
1546 Arrays cannot be assigned. When pointers are introduced we will also
1547 introduce array slices which can refer to part or all of an array -
1548 the assignment syntax will create a slice. For now, an array can only
1549 ever be referenced by the name it is declared with. It is likely that
1550 a "`copy`" primitive will eventually be define which can be used to
1551 make a copy of an array with controllable depth.
1553 ###### type union fields
1557 struct variable *vsize;
1558 struct type *member;
1561 ###### value union fields
1563 struct value *elmnts;
1566 ###### value functions
1568 static struct value array_prepare(struct type *type)
1573 ret.array.elmnts = NULL;
1577 static struct value array_init(struct type *type)
1583 if (type->array.vsize) {
1586 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1587 mpq_denref(type->array.vsize->val.num));
1588 type->array.size = mpz_get_si(q);
1591 ret.array.elmnts = calloc(type->array.size,
1592 sizeof(ret.array.elmnts[0]));
1593 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1594 ret.array.elmnts[i] = val_init(type->array.member);
1598 static void array_free(struct value val)
1602 if (val.array.elmnts)
1603 for (i = 0; i < val.type->array.size; i++)
1604 free_value(val.array.elmnts[i]);
1605 free(val.array.elmnts);
1608 static int array_compat(struct type *require, struct type *have)
1610 if (have->compat != require->compat)
1612 /* Both are arrays, so we can look at details */
1613 if (!type_compat(require->array.member, have->array.member, 0))
1615 if (require->array.vsize == NULL && have->array.vsize == NULL)
1616 return require->array.size == have->array.size;
1618 return require->array.vsize == have->array.vsize;
1621 static void array_print_type(struct type *type, FILE *f)
1624 if (type->array.vsize) {
1625 struct binding *b = type->array.vsize->name;
1626 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1628 fprintf(f, "%d]", type->array.size);
1629 type_print(type->array.member, f);
1632 static struct type array_prototype = {
1633 .prepare = array_prepare,
1635 .print_type = array_print_type,
1636 .compat = array_compat,
1642 | [ NUMBER ] Type ${
1643 $0 = calloc(1, sizeof(struct type));
1644 *($0) = array_prototype;
1645 $0->array.member = $<4;
1646 $0->array.vsize = NULL;
1648 struct parse_context *c = config2context(config);
1651 if (number_parse(num, tail, $2.txt) == 0)
1652 tok_err(c, "error: unrecognised number", &$2);
1654 tok_err(c, "error: unsupported number suffix", &$2);
1656 $0->array.size = mpz_get_ui(mpq_numref(num));
1657 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1658 tok_err(c, "error: array size must be an integer",
1660 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1661 tok_err(c, "error: array size is too large",
1665 $0->next= c->anon_typelist;
1666 c->anon_typelist = $0;
1670 | [ IDENTIFIER ] Type ${ {
1671 struct parse_context *c = config2context(config);
1672 struct variable *v = var_ref(c, $2.txt);
1675 tok_err(config2context(config), "error: name undeclared", &$2);
1676 else if (!v->constant)
1677 tok_err(config2context(config), "error: array size must be a constant", &$2);
1679 $0 = calloc(1, sizeof(struct type));
1680 *($0) = array_prototype;
1681 $0->array.member = $<4;
1683 $0->array.vsize = v;
1684 $0->next= c->anon_typelist;
1685 c->anon_typelist = $0;
1688 ###### parse context
1690 struct type *anon_typelist;
1692 ###### free context types
1694 while (context.anon_typelist) {
1695 struct type *t = context.anon_typelist;
1697 context.anon_typelist = t->next;
1704 ###### variable grammar
1706 | Variable [ Expression ] ${ {
1707 struct binode *b = new(binode);
1714 ###### print binode cases
1716 print_exec(b->left, -1, 0);
1718 print_exec(b->right, -1, 0);
1722 ###### propagate binode cases
1724 /* left must be an array, right must be a number,
1725 * result is the member type of the array
1727 propagate_types(b->right, c, ok, Tnum, 0);
1728 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1729 if (!t || t->compat != array_compat) {
1730 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1734 if (!type_compat(type, t->array.member, rules)) {
1735 type_err(c, "error: have %1 but need %2", prog,
1736 t->array.member, rules, type);
1739 return t->array.member;
1743 ###### interp binode cases
1748 lleft = linterp_exec(b->left);
1749 right = interp_exec(b->right);
1751 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1755 if (i >= 0 && i < lleft->type->array.size)
1756 lrv = &lleft->array.elmnts[i];
1758 rv = val_init(lleft->type->array.member);
1762 ## Language elements
1764 Each language element needs to be parsed, printed, analysed,
1765 interpreted, and freed. There are several, so let's just start with
1766 the easy ones and work our way up.
1770 We have already met values as separate objects. When manifest
1771 constants appear in the program text, that must result in an executable
1772 which has a constant value. So the `val` structure embeds a value in
1788 $0 = new_pos(val, $1);
1789 $0->val.type = Tbool;
1793 $0 = new_pos(val, $1);
1794 $0->val.type = Tbool;
1798 $0 = new_pos(val, $1);
1799 $0->val.type = Tnum;
1802 if (number_parse($0->val.num, tail, $1.txt) == 0)
1803 mpq_init($0->val.num);
1805 tok_err(config2context(config), "error: unsupported number suffix",
1810 $0 = new_pos(val, $1);
1811 $0->val.type = Tstr;
1814 string_parse(&$1, '\\', &$0->val.str, tail);
1816 tok_err(config2context(config), "error: unsupported string suffix",
1821 $0 = new_pos(val, $1);
1822 $0->val.type = Tstr;
1825 string_parse(&$1, '\\', &$0->val.str, tail);
1827 tok_err(config2context(config), "error: unsupported string suffix",
1832 ###### print exec cases
1835 struct val *v = cast(val, e);
1836 if (v->val.type == Tstr)
1838 print_value(v->val);
1839 if (v->val.type == Tstr)
1844 ###### propagate exec cases
1847 struct val *val = cast(val, prog);
1848 if (!type_compat(type, val->val.type, rules)) {
1849 type_err(c, "error: expected %1%r found %2",
1850 prog, type, rules, val->val.type);
1853 return val->val.type;
1856 ###### interp exec cases
1858 rv = dup_value(cast(val, e)->val);
1861 ###### ast functions
1862 static void free_val(struct val *v)
1870 ###### free exec cases
1871 case Xval: free_val(cast(val, e)); break;
1873 ###### ast functions
1874 // Move all nodes from 'b' to 'rv', reversing the order.
1875 // In 'b' 'left' is a list, and 'right' is the last node.
1876 // In 'rv', left' is the first node and 'right' is a list.
1877 static struct binode *reorder_bilist(struct binode *b)
1879 struct binode *rv = NULL;
1882 struct exec *t = b->right;
1886 b = cast(binode, b->left);
1896 Just as we used a `val` to wrap a value into an `exec`, we similarly
1897 need a `var` to wrap a `variable` into an exec. While each `val`
1898 contained a copy of the value, each `var` hold a link to the variable
1899 because it really is the same variable no matter where it appears.
1900 When a variable is used, we need to remember to follow the `->merged`
1901 link to find the primary instance.
1909 struct variable *var;
1915 VariableDecl -> IDENTIFIER : ${ {
1916 struct variable *v = var_decl(config2context(config), $1.txt);
1917 $0 = new_pos(var, $1);
1922 v = var_ref(config2context(config), $1.txt);
1924 type_err(config2context(config), "error: variable '%v' redeclared",
1925 $0, Tnone, 0, Tnone);
1926 type_err(config2context(config), "info: this is where '%v' was first declared",
1927 v->where_decl, Tnone, 0, Tnone);
1930 | IDENTIFIER :: ${ {
1931 struct variable *v = var_decl(config2context(config), $1.txt);
1932 $0 = new_pos(var, $1);
1938 v = var_ref(config2context(config), $1.txt);
1940 type_err(config2context(config), "error: variable '%v' redeclared",
1941 $0, Tnone, 0, Tnone);
1942 type_err(config2context(config), "info: this is where '%v' was first declared",
1943 v->where_decl, Tnone, 0, Tnone);
1946 | IDENTIFIER : Type ${ {
1947 struct variable *v = var_decl(config2context(config), $1.txt);
1948 $0 = new_pos(var, $1);
1953 v->val = val_prepare($<3);
1955 v = var_ref(config2context(config), $1.txt);
1957 type_err(config2context(config), "error: variable '%v' redeclared",
1958 $0, Tnone, 0, Tnone);
1959 type_err(config2context(config), "info: this is where '%v' was first declared",
1960 v->where_decl, Tnone, 0, Tnone);
1963 | IDENTIFIER :: Type ${ {
1964 struct variable *v = var_decl(config2context(config), $1.txt);
1965 $0 = new_pos(var, $1);
1970 v->val = val_prepare($<3);
1973 v = var_ref(config2context(config), $1.txt);
1975 type_err(config2context(config), "error: variable '%v' redeclared",
1976 $0, Tnone, 0, Tnone);
1977 type_err(config2context(config), "info: this is where '%v' was first declared",
1978 v->where_decl, Tnone, 0, Tnone);
1983 Variable -> IDENTIFIER ${ {
1984 struct variable *v = var_ref(config2context(config), $1.txt);
1985 $0 = new_pos(var, $1);
1987 /* This might be a label - allocate a var just in case */
1988 v = var_decl(config2context(config), $1.txt);
1990 v->val = val_prepare(Tlabel);
1991 v->val.label = &v->val;
1995 cast(var, $0)->var = v;
2000 Type -> IDENTIFIER ${
2001 $0 = find_type(config2context(config), $1.txt);
2003 tok_err(config2context(config),
2004 "error: undefined type", &$1);
2011 ###### print exec cases
2014 struct var *v = cast(var, e);
2016 struct binding *b = v->var->name;
2017 printf("%.*s", b->name.len, b->name.txt);
2024 if (loc->type == Xvar) {
2025 struct var *v = cast(var, loc);
2027 struct binding *b = v->var->name;
2028 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2030 fputs("???", stderr); // NOTEST
2032 fputs("NOTVAR", stderr); // NOTEST
2035 ###### propagate exec cases
2039 struct var *var = cast(var, prog);
2040 struct variable *v = var->var;
2042 type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone); // NOTEST
2044 return Tnone; // NOTEST
2048 if (v->constant && (rules & Rnoconstant)) {
2049 type_err(c, "error: Cannot assign to a constant: %v",
2050 prog, NULL, 0, NULL);
2051 type_err(c, "info: name was defined as a constant here",
2052 v->where_decl, NULL, 0, NULL);
2056 if (v->val.type == NULL) {
2057 if (type && *ok != 0) {
2058 v->val = val_prepare(type);
2059 v->where_set = prog;
2064 if (!type_compat(type, v->val.type, rules)) {
2065 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2066 type, rules, v->val.type);
2067 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2068 v->val.type, rules, Tnone);
2076 ###### interp exec cases
2079 struct var *var = cast(var, e);
2080 struct variable *v = var->var;
2088 ###### ast functions
2090 static void free_var(struct var *v)
2095 ###### free exec cases
2096 case Xvar: free_var(cast(var, e)); break;
2098 ### Expressions: Conditional
2100 Our first user of the `binode` will be conditional expressions, which
2101 is a bit odd as they actually have three components. That will be
2102 handled by having 2 binodes for each expression. The conditional
2103 expression is the lowest precedence operatior, so it gets to define
2104 what an "Expression" is. The next level up is "BoolExpr", which
2107 Conditional expressions are of the form "value `if` condition `else`
2108 other_value". There is no associativite with this operator: the
2109 values and conditions can only be other conditional expressions if
2110 they are enclosed in parentheses. Allowing nesting without
2111 parentheses would be too confusing.
2119 Expression -> BoolExpr if BoolExpr else BoolExpr ${ {
2120 struct binode *b1 = new(binode);
2121 struct binode *b2 = new(binode);
2130 | BoolExpr ${ $0 = $<1; }$
2132 ###### print binode cases
2135 b2 = cast(binode, b->right);
2136 print_exec(b2->left, -1, 0);
2138 print_exec(b->left, -1, 0);
2140 print_exec(b2->right, -1, 0);
2143 ###### propagate binode cases
2146 /* cond must be Tbool, others must match */
2147 struct binode *b2 = cast(binode, b->right);
2150 propagate_types(b->left, c, ok, Tbool, 0);
2151 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2152 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2156 ###### interp binode cases
2159 struct binode *b2 = cast(binode, b->right);
2160 left = interp_exec(b->left);
2162 rv = interp_exec(b2->left);
2164 rv = interp_exec(b2->right);
2168 ### Expressions: Boolean
2170 The next class of expressions to use the `binode` will be Boolean
2171 expressions. As I haven't implemented precedence in the parser
2172 generator yet, we need different names for each precedence level used
2173 by expressions. The outer most or lowest level precedence are
2174 conditional expressions are Boolean operators which form an `BoolExpr`
2175 out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
2176 have `and then` and `or else` which only evaluate the second operand
2177 if the result would make a difference.
2189 BoolExpr -> BoolExpr or BTerm ${ {
2190 struct binode *b = new(binode);
2196 | BoolExpr or else BTerm ${ {
2197 struct binode *b = new(binode);
2203 | BTerm ${ $0 = $<1; }$
2205 BTerm -> BTerm and BFact ${ {
2206 struct binode *b = new(binode);
2212 | BTerm and then BFact ${ {
2213 struct binode *b = new(binode);
2219 | BFact ${ $0 = $<1; }$
2221 BFact -> not BFact ${ {
2222 struct binode *b = new(binode);
2229 ###### print binode cases
2231 print_exec(b->left, -1, 0);
2233 print_exec(b->right, -1, 0);
2236 print_exec(b->left, -1, 0);
2237 printf(" and then ");
2238 print_exec(b->right, -1, 0);
2241 print_exec(b->left, -1, 0);
2243 print_exec(b->right, -1, 0);
2246 print_exec(b->left, -1, 0);
2247 printf(" or else ");
2248 print_exec(b->right, -1, 0);
2252 print_exec(b->right, -1, 0);
2255 ###### propagate binode cases
2261 /* both must be Tbool, result is Tbool */
2262 propagate_types(b->left, c, ok, Tbool, 0);
2263 propagate_types(b->right, c, ok, Tbool, 0);
2264 if (type && type != Tbool) {
2265 type_err(c, "error: %1 operation found where %2 expected", prog,
2271 ###### interp binode cases
2273 rv = interp_exec(b->left);
2274 right = interp_exec(b->right);
2275 rv.bool = rv.bool && right.bool;
2278 rv = interp_exec(b->left);
2280 rv = interp_exec(b->right);
2283 rv = interp_exec(b->left);
2284 right = interp_exec(b->right);
2285 rv.bool = rv.bool || right.bool;
2288 rv = interp_exec(b->left);
2290 rv = interp_exec(b->right);
2293 rv = interp_exec(b->right);
2297 ### Expressions: Comparison
2299 Of slightly higher precedence that Boolean expressions are
2301 A comparison takes arguments of any type, but the two types must be
2304 To simplify the parsing we introduce an `eop` which can record an
2305 expression operator.
2312 ###### ast functions
2313 static void free_eop(struct eop *e)
2328 | Expr CMPop Expr ${ {
2329 struct binode *b = new(binode);
2335 | Expr ${ $0 = $<1; }$
2340 CMPop -> < ${ $0.op = Less; }$
2341 | > ${ $0.op = Gtr; }$
2342 | <= ${ $0.op = LessEq; }$
2343 | >= ${ $0.op = GtrEq; }$
2344 | == ${ $0.op = Eql; }$
2345 | != ${ $0.op = NEql; }$
2347 ###### print binode cases
2355 print_exec(b->left, -1, 0);
2357 case Less: printf(" < "); break;
2358 case LessEq: printf(" <= "); break;
2359 case Gtr: printf(" > "); break;
2360 case GtrEq: printf(" >= "); break;
2361 case Eql: printf(" == "); break;
2362 case NEql: printf(" != "); break;
2363 default: abort(); // NOTEST
2365 print_exec(b->right, -1, 0);
2368 ###### propagate binode cases
2375 /* Both must match but not be labels, result is Tbool */
2376 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2378 propagate_types(b->right, c, ok, t, 0);
2380 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2382 t = propagate_types(b->left, c, ok, t, 0);
2384 if (!type_compat(type, Tbool, 0)) {
2385 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2386 Tbool, rules, type);
2391 ###### interp binode cases
2400 left = interp_exec(b->left);
2401 right = interp_exec(b->right);
2402 cmp = value_cmp(left, right);
2405 case Less: rv.bool = cmp < 0; break;
2406 case LessEq: rv.bool = cmp <= 0; break;
2407 case Gtr: rv.bool = cmp > 0; break;
2408 case GtrEq: rv.bool = cmp >= 0; break;
2409 case Eql: rv.bool = cmp == 0; break;
2410 case NEql: rv.bool = cmp != 0; break;
2411 default: rv.bool = 0; break; // NOTEST
2416 ### Expressions: The rest
2418 The remaining expressions with the highest precedence are arithmetic
2419 and string concatenation. They are `Expr`, `Term`, and `Factor`.
2420 The `Factor` is where the `Value` and `Variable` that we already have
2423 `+` and `-` are both infix and prefix operations (where they are
2424 absolute value and negation). These have different operator names.
2426 We also have a 'Bracket' operator which records where parentheses were
2427 found. This makes it easy to reproduce these when printing. Once
2428 precedence is handled better I might be able to discard this.
2440 Expr -> Expr Eop Term ${ {
2441 struct binode *b = new(binode);
2447 | Term ${ $0 = $<1; }$
2449 Term -> Term Top Factor ${ {
2450 struct binode *b = new(binode);
2456 | Factor ${ $0 = $<1; }$
2458 Factor -> ( Expression ) ${ {
2459 struct binode *b = new_pos(binode, $1);
2465 struct binode *b = new(binode);
2470 | Value ${ $0 = $<1; }$
2471 | Variable ${ $0 = $<1; }$
2474 Eop -> + ${ $0.op = Plus; }$
2475 | - ${ $0.op = Minus; }$
2477 Uop -> + ${ $0.op = Absolute; }$
2478 | - ${ $0.op = Negate; }$
2480 Top -> * ${ $0.op = Times; }$
2481 | / ${ $0.op = Divide; }$
2482 | % ${ $0.op = Rem; }$
2483 | ++ ${ $0.op = Concat; }$
2485 ###### print binode cases
2492 print_exec(b->left, indent, 0);
2494 case Plus: fputs(" + ", stdout); break;
2495 case Minus: fputs(" - ", stdout); break;
2496 case Times: fputs(" * ", stdout); break;
2497 case Divide: fputs(" / ", stdout); break;
2498 case Rem: fputs(" % ", stdout); break;
2499 case Concat: fputs(" ++ ", stdout); break;
2500 default: abort(); // NOTEST
2502 print_exec(b->right, indent, 0);
2506 print_exec(b->right, indent, 0);
2510 print_exec(b->right, indent, 0);
2514 print_exec(b->right, indent, 0);
2518 ###### propagate binode cases
2524 /* both must be numbers, result is Tnum */
2527 /* as propagate_types ignores a NULL,
2528 * unary ops fit here too */
2529 propagate_types(b->left, c, ok, Tnum, 0);
2530 propagate_types(b->right, c, ok, Tnum, 0);
2531 if (!type_compat(type, Tnum, 0)) {
2532 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2539 /* both must be Tstr, result is Tstr */
2540 propagate_types(b->left, c, ok, Tstr, 0);
2541 propagate_types(b->right, c, ok, Tstr, 0);
2542 if (!type_compat(type, Tstr, 0)) {
2543 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2550 return propagate_types(b->right, c, ok, type, 0);
2552 ###### interp binode cases
2555 rv = interp_exec(b->left);
2556 right = interp_exec(b->right);
2557 mpq_add(rv.num, rv.num, right.num);
2560 rv = interp_exec(b->left);
2561 right = interp_exec(b->right);
2562 mpq_sub(rv.num, rv.num, right.num);
2565 rv = interp_exec(b->left);
2566 right = interp_exec(b->right);
2567 mpq_mul(rv.num, rv.num, right.num);
2570 rv = interp_exec(b->left);
2571 right = interp_exec(b->right);
2572 mpq_div(rv.num, rv.num, right.num);
2577 left = interp_exec(b->left);
2578 right = interp_exec(b->right);
2579 mpz_init(l); mpz_init(r); mpz_init(rem);
2580 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2581 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2582 mpz_tdiv_r(rem, l, r);
2583 rv = val_init(Tnum);
2584 mpq_set_z(rv.num, rem);
2585 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2589 rv = interp_exec(b->right);
2590 mpq_neg(rv.num, rv.num);
2593 rv = interp_exec(b->right);
2594 mpq_abs(rv.num, rv.num);
2597 rv = interp_exec(b->right);
2600 left = interp_exec(b->left);
2601 right = interp_exec(b->right);
2603 rv.str = text_join(left.str, right.str);
2607 ###### value functions
2609 static struct text text_join(struct text a, struct text b)
2612 rv.len = a.len + b.len;
2613 rv.txt = malloc(rv.len);
2614 memcpy(rv.txt, a.txt, a.len);
2615 memcpy(rv.txt+a.len, b.txt, b.len);
2620 ### Blocks, Statements, and Statement lists.
2622 Now that we have expressions out of the way we need to turn to
2623 statements. There are simple statements and more complex statements.
2624 Simple statements do not contain newlines, complex statements do.
2626 Statements often come in sequences and we have corresponding simple
2627 statement lists and complex statement lists.
2628 The former comprise only simple statements separated by semicolons.
2629 The later comprise complex statements and simple statement lists. They are
2630 separated by newlines. Thus the semicolon is only used to separate
2631 simple statements on the one line. This may be overly restrictive,
2632 but I'm not sure I ever want a complex statement to share a line with
2635 Note that a simple statement list can still use multiple lines if
2636 subsequent lines are indented, so
2638 ###### Example: wrapped simple statement list
2643 is a single simple statement list. This might allow room for
2644 confusion, so I'm not set on it yet.
2646 A simple statement list needs no extra syntax. A complex statement
2647 list has two syntactic forms. It can be enclosed in braces (much like
2648 C blocks), or it can be introduced by a colon and continue until an
2649 unindented newline (much like Python blocks). With this extra syntax
2650 it is referred to as a block.
2652 Note that a block does not have to include any newlines if it only
2653 contains simple statements. So both of:
2655 if condition: a=b; d=f
2657 if condition { a=b; print f }
2661 In either case the list is constructed from a `binode` list with
2662 `Block` as the operator. When parsing the list it is most convenient
2663 to append to the end, so a list is a list and a statement. When using
2664 the list it is more convenient to consider a list to be a statement
2665 and a list. So we need a function to re-order a list.
2666 `reorder_bilist` serves this purpose.
2668 The only stand-alone statement we introduce at this stage is `pass`
2669 which does nothing and is represented as a `NULL` pointer in a `Block`
2670 list. Other stand-alone statements will follow once the infrastructure
2690 Block -> Open Statementlist Close ${ $0 = $<2; }$
2691 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2692 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2693 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2694 | : Statementlist ${ $0 = $<2; }$
2695 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2697 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2699 ComplexStatements -> ComplexStatements ComplexStatement ${
2705 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2706 | ComplexStatement ${
2714 ComplexStatement -> SimpleStatements NEWLINE ${
2715 $0 = reorder_bilist($<1);
2717 ## ComplexStatement Grammar
2720 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2726 | SimpleStatement ${
2732 | SimpleStatements ; ${ $0 = $<1; }$
2734 SimpleStatement -> pass ${ $0 = NULL; }$
2735 ## SimpleStatement Grammar
2737 ###### print binode cases
2741 if (b->left == NULL)
2744 print_exec(b->left, indent, 0);
2747 print_exec(b->right, indent, 0);
2750 // block, one per line
2751 if (b->left == NULL)
2752 do_indent(indent, "pass\n");
2754 print_exec(b->left, indent, bracket);
2756 print_exec(b->right, indent, bracket);
2760 ###### propagate binode cases
2763 /* If any statement returns something other than Tnone
2764 * or Tbool then all such must return same type.
2765 * As each statement may be Tnone or something else,
2766 * we must always pass NULL (unknown) down, otherwise an incorrect
2767 * error might occur. We never return Tnone unless it is
2772 for (e = b; e; e = cast(binode, e->right)) {
2773 t = propagate_types(e->left, c, ok, NULL, rules);
2774 if ((rules & Rboolok) && t == Tbool)
2776 if (t && t != Tnone && t != Tbool) {
2779 else if (t != type) {
2780 type_err(c, "error: expected %1%r, found %2",
2781 e->left, type, rules, t);
2789 ###### interp binode cases
2791 while (rv.type == Tnone &&
2794 rv = interp_exec(b->left);
2795 b = cast(binode, b->right);
2799 ### The Print statement
2801 `print` is a simple statement that takes a comma-separated list of
2802 expressions and prints the values separated by spaces and terminated
2803 by a newline. No control of formatting is possible.
2805 `print` faces the same list-ordering issue as blocks, and uses the
2811 ###### SimpleStatement Grammar
2813 | print ExpressionList ${
2814 $0 = reorder_bilist($<2);
2816 | print ExpressionList , ${
2821 $0 = reorder_bilist($0);
2832 ExpressionList -> ExpressionList , Expression ${
2845 ###### print binode cases
2848 do_indent(indent, "print");
2852 print_exec(b->left, -1, 0);
2856 b = cast(binode, b->right);
2862 ###### propagate binode cases
2865 /* don't care but all must be consistent */
2866 propagate_types(b->left, c, ok, NULL, Rnolabel);
2867 propagate_types(b->right, c, ok, NULL, Rnolabel);
2870 ###### interp binode cases
2876 for ( ; b; b = cast(binode, b->right))
2880 left = interp_exec(b->left);
2893 ###### Assignment statement
2895 An assignment will assign a value to a variable, providing it hasn't
2896 be declared as a constant. The analysis phase ensures that the type
2897 will be correct so the interpreter just needs to perform the
2898 calculation. There is a form of assignment which declares a new
2899 variable as well as assigning a value. If a name is assigned before
2900 it is declared, and error will be raised as the name is created as
2901 `Tlabel` and it is illegal to assign to such names.
2907 ###### SimpleStatement Grammar
2908 | Variable = Expression ${
2914 | VariableDecl = Expression ${
2922 if ($1->var->where_set == NULL) {
2923 type_err(config2context(config), "Variable declared with no type or value: %v",
2933 ###### print binode cases
2936 do_indent(indent, "");
2937 print_exec(b->left, indent, 0);
2939 print_exec(b->right, indent, 0);
2946 struct variable *v = cast(var, b->left)->var;
2947 do_indent(indent, "");
2948 print_exec(b->left, indent, 0);
2949 if (cast(var, b->left)->var->constant) {
2950 if (v->where_decl == v->where_set) {
2952 type_print(v->val.type, stdout);
2957 if (v->where_decl == v->where_set) {
2959 type_print(v->val.type, stdout);
2966 print_exec(b->right, indent, 0);
2973 ###### propagate binode cases
2977 /* Both must match and not be labels,
2978 * Type must support 'dup',
2979 * For Assign, left must not be constant.
2982 t = propagate_types(b->left, c, ok, NULL,
2983 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
2988 if (propagate_types(b->right, c, ok, t, 0) != t)
2989 if (b->left->type == Xvar)
2990 type_err(c, "info: variable '%v' was set as %1 here.",
2991 cast(var, b->left)->var->where_set, t, rules, Tnone);
2993 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2995 propagate_types(b->left, c, ok, t,
2996 (b->op == Assign ? Rnoconstant : 0));
2998 if (t && t->dup == NULL) {
2999 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3006 ###### interp binode cases
3009 lleft = linterp_exec(b->left);
3010 right = interp_exec(b->right);
3015 free_value(right); // NOTEST
3021 struct variable *v = cast(var, b->left)->var;
3025 right = interp_exec(b->right);
3027 right = val_init(v->val.type);
3034 ### The `use` statement
3036 The `use` statement is the last "simple" statement. It is needed when
3037 the condition in a conditional statement is a block. `use` works much
3038 like `return` in C, but only completes the `condition`, not the whole
3044 ###### SimpleStatement Grammar
3046 $0 = new_pos(binode, $1);
3051 ###### print binode cases
3054 do_indent(indent, "use ");
3055 print_exec(b->right, -1, 0);
3060 ###### propagate binode cases
3063 /* result matches value */
3064 return propagate_types(b->right, c, ok, type, 0);
3066 ###### interp binode cases
3069 rv = interp_exec(b->right);
3072 ### The Conditional Statement
3074 This is the biggy and currently the only complex statement. This
3075 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3076 It is comprised of a number of parts, all of which are optional though
3077 set combinations apply. Each part is (usually) a key word (`then` is
3078 sometimes optional) followed by either an expression or a code block,
3079 except the `casepart` which is a "key word and an expression" followed
3080 by a code block. The code-block option is valid for all parts and,
3081 where an expression is also allowed, the code block can use the `use`
3082 statement to report a value. If the code block does not report a value
3083 the effect is similar to reporting `True`.
3085 The `else` and `case` parts, as well as `then` when combined with
3086 `if`, can contain a `use` statement which will apply to some
3087 containing conditional statement. `for` parts, `do` parts and `then`
3088 parts used with `for` can never contain a `use`, except in some
3089 subordinate conditional statement.
3091 If there is a `forpart`, it is executed first, only once.
3092 If there is a `dopart`, then it is executed repeatedly providing
3093 always that the `condpart` or `cond`, if present, does not return a non-True
3094 value. `condpart` can fail to return any value if it simply executes
3095 to completion. This is treated the same as returning `True`.
3097 If there is a `thenpart` it will be executed whenever the `condpart`
3098 or `cond` returns True (or does not return any value), but this will happen
3099 *after* `dopart` (when present).
3101 If `elsepart` is present it will be executed at most once when the
3102 condition returns `False` or some value that isn't `True` and isn't
3103 matched by any `casepart`. If there are any `casepart`s, they will be
3104 executed when the condition returns a matching value.
3106 The particular sorts of values allowed in case parts has not yet been
3107 determined in the language design, so nothing is prohibited.
3109 The various blocks in this complex statement potentially provide scope
3110 for variables as described earlier. Each such block must include the
3111 "OpenScope" nonterminal before parsing the block, and must call
3112 `var_block_close()` when closing the block.
3114 The code following "`if`", "`switch`" and "`for`" does not get its own
3115 scope, but is in a scope covering the whole statement, so names
3116 declared there cannot be redeclared elsewhere. Similarly the
3117 condition following "`while`" is in a scope the covers the body
3118 ("`do`" part) of the loop, and which does not allow conditional scope
3119 extension. Code following "`then`" (both looping and non-looping),
3120 "`else`" and "`case`" each get their own local scope.
3122 The type requirements on the code block in a `whilepart` are quite
3123 unusal. It is allowed to return a value of some identifiable type, in
3124 which case the loop aborts and an appropriate `casepart` is run, or it
3125 can return a Boolean, in which case the loop either continues to the
3126 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3127 This is different both from the `ifpart` code block which is expected to
3128 return a Boolean, or the `switchpart` code block which is expected to
3129 return the same type as the casepart values. The correct analysis of
3130 the type of the `whilepart` code block is the reason for the
3131 `Rboolok` flag which is passed to `propagate_types()`.
3133 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3142 struct exec *action;
3143 struct casepart *next;
3145 struct cond_statement {
3147 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3148 struct casepart *casepart;
3151 ###### ast functions
3153 static void free_casepart(struct casepart *cp)
3157 free_exec(cp->value);
3158 free_exec(cp->action);
3165 static void free_cond_statement(struct cond_statement *s)
3169 free_exec(s->forpart);
3170 free_exec(s->condpart);
3171 free_exec(s->dopart);
3172 free_exec(s->thenpart);
3173 free_exec(s->elsepart);
3174 free_casepart(s->casepart);
3178 ###### free exec cases
3179 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3181 ###### ComplexStatement Grammar
3182 | CondStatement ${ $0 = $<1; }$
3187 // both ForThen and Whilepart open scopes, and CondSuffix only
3188 // closes one - so in the first branch here we have another to close.
3189 CondStatement -> ForThen WhilePart CondSuffix ${
3191 $0->forpart = $1.forpart; $1.forpart = NULL;
3192 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3193 $0->condpart = $2.condpart; $2.condpart = NULL;
3194 $0->dopart = $2.dopart; $2.dopart = NULL;
3195 var_block_close(config2context(config), CloseSequential);
3197 | WhilePart CondSuffix ${
3199 $0->condpart = $1.condpart; $1.condpart = NULL;
3200 $0->dopart = $1.dopart; $1.dopart = NULL;
3202 | SwitchPart CondSuffix ${
3206 | IfPart IfSuffix ${
3208 $0->condpart = $1.condpart; $1.condpart = NULL;
3209 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3210 // This is where we close an "if" statement
3211 var_block_close(config2context(config), CloseSequential);
3214 CondSuffix -> IfSuffix ${
3216 // This is where we close scope of the whole
3217 // "for" or "while" statement
3218 var_block_close(config2context(config), CloseSequential);
3220 | CasePart CondSuffix ${
3222 $1->next = $0->casepart;
3227 CasePart -> Newlines case Expression OpenScope Block ${
3228 $0 = calloc(1,sizeof(struct casepart));
3231 var_block_close(config2context(config), CloseParallel);
3233 | case Expression OpenScope Block ${
3234 $0 = calloc(1,sizeof(struct casepart));
3237 var_block_close(config2context(config), CloseParallel);
3241 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3242 | Newlines else OpenScope Block ${
3243 $0 = new(cond_statement);
3245 var_block_close(config2context(config), CloseElse);
3247 | else OpenScope Block ${
3248 $0 = new(cond_statement);
3250 var_block_close(config2context(config), CloseElse);
3252 | Newlines else OpenScope CondStatement ${
3253 $0 = new(cond_statement);
3255 var_block_close(config2context(config), CloseElse);
3257 | else OpenScope CondStatement ${
3258 $0 = new(cond_statement);
3260 var_block_close(config2context(config), CloseElse);
3265 // These scopes are closed in CondSuffix
3266 ForPart -> for OpenScope SimpleStatements ${
3267 $0 = reorder_bilist($<3);
3269 | for OpenScope Block ${
3273 ThenPart -> then OpenScope SimpleStatements ${
3274 $0 = reorder_bilist($<3);
3275 var_block_close(config2context(config), CloseSequential);
3277 | then OpenScope Block ${
3279 var_block_close(config2context(config), CloseSequential);
3282 ThenPartNL -> ThenPart OptNL ${
3286 // This scope is closed in CondSuffix
3287 WhileHead -> while OpenScope Block ${
3292 ForThen -> ForPart OptNL ThenPartNL ${
3300 // This scope is closed in CondSuffix
3301 WhilePart -> while OpenScope Expression Block ${
3302 $0.type = Xcond_statement;
3306 | WhileHead OptNL do Block ${
3307 $0.type = Xcond_statement;
3312 IfPart -> if OpenScope Expression OpenScope Block ${
3313 $0.type = Xcond_statement;
3316 var_block_close(config2context(config), CloseParallel);
3318 | if OpenScope Block OptNL then OpenScope Block ${
3319 $0.type = Xcond_statement;
3322 var_block_close(config2context(config), CloseParallel);
3326 // This scope is closed in CondSuffix
3327 SwitchPart -> switch OpenScope Expression ${
3330 | switch OpenScope Block ${
3334 ###### print exec cases
3336 case Xcond_statement:
3338 struct cond_statement *cs = cast(cond_statement, e);
3339 struct casepart *cp;
3341 do_indent(indent, "for");
3342 if (bracket) printf(" {\n"); else printf(":\n");
3343 print_exec(cs->forpart, indent+1, bracket);
3346 do_indent(indent, "} then {\n");
3348 do_indent(indent, "then:\n");
3349 print_exec(cs->thenpart, indent+1, bracket);
3351 if (bracket) do_indent(indent, "}\n");
3355 if (cs->condpart && cs->condpart->type == Xbinode &&
3356 cast(binode, cs->condpart)->op == Block) {
3358 do_indent(indent, "while {\n");
3360 do_indent(indent, "while:\n");
3361 print_exec(cs->condpart, indent+1, bracket);
3363 do_indent(indent, "} do {\n");
3365 do_indent(indent, "do:\n");
3366 print_exec(cs->dopart, indent+1, bracket);
3368 do_indent(indent, "}\n");
3370 do_indent(indent, "while ");
3371 print_exec(cs->condpart, 0, bracket);
3376 print_exec(cs->dopart, indent+1, bracket);
3378 do_indent(indent, "}\n");
3383 do_indent(indent, "switch");
3385 do_indent(indent, "if");
3386 if (cs->condpart && cs->condpart->type == Xbinode &&
3387 cast(binode, cs->condpart)->op == Block) {
3392 print_exec(cs->condpart, indent+1, bracket);
3394 do_indent(indent, "}\n");
3396 do_indent(indent, "then:\n");
3397 print_exec(cs->thenpart, indent+1, bracket);
3401 print_exec(cs->condpart, 0, bracket);
3407 print_exec(cs->thenpart, indent+1, bracket);
3409 do_indent(indent, "}\n");
3414 for (cp = cs->casepart; cp; cp = cp->next) {
3415 do_indent(indent, "case ");
3416 print_exec(cp->value, -1, 0);
3421 print_exec(cp->action, indent+1, bracket);
3423 do_indent(indent, "}\n");
3426 do_indent(indent, "else");
3431 print_exec(cs->elsepart, indent+1, bracket);
3433 do_indent(indent, "}\n");
3438 ###### propagate exec cases
3439 case Xcond_statement:
3441 // forpart and dopart must return Tnone
3442 // thenpart must return Tnone if there is a dopart,
3443 // otherwise it is like elsepart.
3445 // be bool if there is no casepart
3446 // match casepart->values if there is a switchpart
3447 // either be bool or match casepart->value if there
3449 // elsepart and casepart->action must match the return type
3450 // expected of this statement.
3451 struct cond_statement *cs = cast(cond_statement, prog);
3452 struct casepart *cp;
3454 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3455 if (!type_compat(Tnone, t, 0))
3457 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3458 if (!type_compat(Tnone, t, 0))
3461 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3462 if (!type_compat(Tnone, t, 0))
3465 if (cs->casepart == NULL)
3466 propagate_types(cs->condpart, c, ok, Tbool, 0);
3468 /* Condpart must match case values, with bool permitted */
3470 for (cp = cs->casepart;
3471 cp && !t; cp = cp->next)
3472 t = propagate_types(cp->value, c, ok, NULL, 0);
3473 if (!t && cs->condpart)
3474 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3475 // Now we have a type (I hope) push it down
3477 for (cp = cs->casepart; cp; cp = cp->next)
3478 propagate_types(cp->value, c, ok, t, 0);
3479 propagate_types(cs->condpart, c, ok, t, Rboolok);
3482 // (if)then, else, and case parts must return expected type.
3483 if (!cs->dopart && !type)
3484 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3486 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3487 for (cp = cs->casepart;
3490 type = propagate_types(cp->action, c, ok, NULL, rules);
3493 propagate_types(cs->thenpart, c, ok, type, rules);
3494 propagate_types(cs->elsepart, c, ok, type, rules);
3495 for (cp = cs->casepart; cp ; cp = cp->next)
3496 propagate_types(cp->action, c, ok, type, rules);
3502 ###### interp exec cases
3503 case Xcond_statement:
3505 struct value v, cnd;
3506 struct casepart *cp;
3507 struct cond_statement *c = cast(cond_statement, e);
3510 interp_exec(c->forpart);
3513 cnd = interp_exec(c->condpart);
3516 if (!(cnd.type == Tnone ||
3517 (cnd.type == Tbool && cnd.bool != 0)))
3519 // cnd is Tnone or Tbool, doesn't need to be freed
3521 interp_exec(c->dopart);
3524 rv = interp_exec(c->thenpart);
3525 if (rv.type != Tnone || !c->dopart)
3529 } while (c->dopart);
3531 for (cp = c->casepart; cp; cp = cp->next) {
3532 v = interp_exec(cp->value);
3533 if (value_cmp(v, cnd) == 0) {
3536 rv = interp_exec(cp->action);
3543 rv = interp_exec(c->elsepart);
3550 ### Top level structure
3552 All the language elements so far can be used in various places. Now
3553 it is time to clarify what those places are.
3555 At the top level of a file there will be a number of declarations.
3556 Many of the things that can be declared haven't been described yet,
3557 such as functions, procedures, imports, named types, and probably
3559 For now there are two sorts of things that can appear at the top
3560 level. They are predefined constants and the main program. While the
3561 syntax will allow the main program to appear multiple times, that will
3562 trigger an error if it is actually attempted.
3564 The various declarations do not return anything. They store the
3565 various declarations in the parse context.
3567 ###### Parser: grammar
3570 Ocean -> DeclarationList
3572 DeclarationList -> Declaration
3573 | DeclarationList Declaration
3575 Declaration -> DeclareConstant
3579 ## top level grammar
3581 ### The `const` section
3583 As well as being defined in with the code that uses them, constants
3584 can be declared at the top level. These have full-file scope, so they
3585 are always `InScope`. The value of a top level constant can be given
3586 as an expression, and this is evaluated immediately rather than in the
3587 later interpretation stage. Once we add functions to the language, we
3588 will need rules concern which, if any, can be used to define a top
3591 Constants are defined in a sectiont that starts with the reserved word
3592 `const` and then has a block with a list of assignment statements.
3593 For syntactic consistency, these must use the double-colon syntax to
3594 make it clear that they are constants. Type can also be given: if
3595 not, the type will be determined during analysis, as with other
3598 As the types constants are inserted at the head of a list, printing
3599 them in the same order that they were read is not straight forward.
3600 We take a quadratic approach here and count the number of constants
3601 (variables of depth 0), then count down from there, each time
3602 searching through for the Nth constant for decreasing N.
3604 ###### top level grammar
3606 DeclareConstant -> const Open ConstList Close
3607 | const Open Newlines ConstList Close
3608 | const Open SimpleConstList }
3609 | const Open Newlines SimpleConstList }
3611 | const SimpleConstList
3613 ConstList -> ComplexConsts
3614 ComplexConsts -> ComplexConst ComplexConsts
3616 ComplexConst -> SimpleConstList NEWLINE
3617 SimpleConstList -> Const ; SimpleConstList
3619 | Const ; SimpleConstList ;
3622 CType -> Type ${ $0 = $<1; }$
3625 Const -> IDENTIFIER :: CType = Expression ${ {
3629 v = var_decl(config2context(config), $1.txt);
3631 struct var *var = new_pos(var, $1);
3632 v->where_decl = var;
3637 v = var_ref(config2context(config), $1.txt);
3638 tok_err(config2context(config), "error: name already declared", &$1);
3639 type_err(config2context(config), "info: this is where '%v' was first declared",
3640 v->where_decl, NULL, 0, NULL);
3644 propagate_types($5, config2context(config), &ok, $3, 0);
3647 config2context(config)->parse_error = 1;
3649 v->val = interp_exec($5);
3658 while (target != 0) {
3660 for (v = context.in_scope; v; v=v->in_scope)
3661 if (v->depth == 0) {
3672 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
3673 type_print(v->val.type, stdout);
3675 if (v->val.type == Tstr)
3677 print_value(v->val);
3678 if (v->val.type == Tstr)
3686 ### Finally the whole program.
3688 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3689 the keyword "program" and a list of variable names which are assigned
3690 values from command line arguments. Following this is a `block` which
3691 is the code to execute. Unlike Pascal, constants and other
3692 declarations come *before* the program.
3694 As this is the top level, several things are handled a bit
3696 The whole program is not interpreted by `interp_exec` as that isn't
3697 passed the argument list which the program requires. Similarly type
3698 analysis is a bit more interesting at this level.
3703 ###### top level grammar
3705 DeclareProgram -> Program ${ {
3706 struct parse_context *c = config2context(config);
3708 type_err(c, "Program defined a second time",
3716 Program -> program OpenScope Varlist Block OptNL ${
3719 $0->left = reorder_bilist($<3);
3721 var_block_close(config2context(config), CloseSequential);
3722 if (config2context(config)->scope_stack) abort();
3725 tok_err(config2context(config),
3726 "error: unhandled parse error", &$1);
3729 Varlist -> Varlist ArgDecl ${
3738 ArgDecl -> IDENTIFIER ${ {
3739 struct variable *v = var_decl(config2context(config), $1.txt);
3746 ###### print binode cases
3748 do_indent(indent, "program");
3749 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3751 print_exec(b2->left, 0, 0);
3757 print_exec(b->right, indent+1, bracket);
3759 do_indent(indent, "}\n");
3762 ###### propagate binode cases
3763 case Program: abort(); // NOTEST
3765 ###### core functions
3767 static int analyse_prog(struct exec *prog, struct parse_context *c)
3769 struct binode *b = cast(binode, prog);
3776 propagate_types(b->right, c, &ok, Tnone, 0);
3781 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3782 struct var *v = cast(var, b->left);
3783 if (!v->var->val.type) {
3784 v->var->where_set = b;
3785 v->var->val = val_prepare(Tstr);
3788 b = cast(binode, prog);
3791 propagate_types(b->right, c, &ok, Tnone, 0);
3796 /* Make sure everything is still consistent */
3797 propagate_types(b->right, c, &ok, Tnone, 0);
3801 static void interp_prog(struct exec *prog, char **argv)
3803 struct binode *p = cast(binode, prog);
3809 al = cast(binode, p->left);
3811 struct var *v = cast(var, al->left);
3812 struct value *vl = &v->var->val;
3814 if (argv[0] == NULL) {
3815 printf("Not enough args\n");
3818 al = cast(binode, al->right);
3820 *vl = parse_value(vl->type, argv[0]);
3821 if (vl->type == NULL)
3825 v = interp_exec(p->right);
3829 ###### interp binode cases
3830 case Program: abort(); // NOTEST
3832 ## And now to test it out.
3834 Having a language requires having a "hello world" program. I'll
3835 provide a little more than that: a program that prints "Hello world"
3836 finds the GCD of two numbers, prints the first few elements of
3837 Fibonacci, and performs a binary search for a number.
3839 ###### File: oceani.mk
3842 @echo "===== TEST ====="
3843 ./oceani --section "test: hello" oceani.mdc 55 33
3849 four ::= 2 + 2 ; five ::= 10/2
3850 const pie ::= "I like Pie";
3851 cake ::= "The cake is"
3855 print "Hello World, what lovely oceans you have!"
3856 print "are there", five, "?"
3857 print pi, pie, "but", cake
3859 /* When a variable is defined in both branches of an 'if',
3860 * and used afterwards, the variables are merged.
3866 print "Is", A, "bigger than", B,"? ", bigger
3867 /* If a variable is not used after the 'if', no
3868 * merge happens, so types can be different
3871 double:string = "yes"
3872 print A, "is more than twice", B, "?", double
3875 print "double", B, "is", double
3880 if a > 0 and then b > 0:
3886 print "GCD of", A, "and", B,"is", a
3888 print a, "is not positive, cannot calculate GCD"
3890 print b, "is not positive, cannot calculate GCD"
3895 print "Fibonacci:", f1,f2,
3896 then togo = togo - 1
3904 /* Binary search... */
3909 mid := (lo + hi) / 2
3921 print "Yay, I found", target
3923 print "Closest I found was", mid
3928 for i:=1; then i = i + 1; while i < size:
3929 n := list[i-1] * list[i-1]
3930 list[i] = (n / 100) % 10000
3932 print "Before sort:"
3933 for i:=0; then i = i + 1; while i < size:
3934 print "list[",i,"]=",list[i]
3936 for i := 1; then i=i+1; while i < size:
3937 for j:=i-1; then j=j-1; while j >= 0:
3938 if list[j] > list[j+1]:
3943 for i:=0; then i = i + 1; while i < size:
3944 print "list[",i,"]=",list[i]