1 # Ocean Interpreter - Stoney Creek version
3 Ocean is intended to be an compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage, and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This second version of the interpreter exists to test out the
33 structured statement providing conditions and iteration, and simple
34 variable scoping. Clearly we need some minimal other functionality so
35 that values can be tested and instructions iterated over. All that
36 functionality is clearly not normative at this stage (not that
37 anything is **really** normative yet) and will change, so early test
38 code will certainly break in later versions.
40 The under-test parts of the language are:
42 - conditional/looping structured statements
43 - the `use` statement which is needed for that
44 - Variable binding using ":=" and "::=", and assignment using "=".
46 Elements which are present to make a usable language are:
48 - "blocks" of multiple statements.
49 - `pass`: a statement which does nothing.
50 - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
51 catenate strings. `and`, `or`, `not` manipulate Booleans, and
52 normal comparison operators can work on all three types.
53 - `print`: will print the values in a list of expressions.
54 - `program`: is given a list of identifiers to initialize from
59 Versions of the interpreter which obviously do not support a complete
60 language will be named after creeks and streams. This one is Stoney
63 Once we have something reasonably resembling a complete language, the
64 names of rivers will be used.
65 Early versions of the compiler will be named after seas. Major
66 releases of the compiler will be named after oceans. Hopefully I will
67 be finished once I get to the Pacific Ocean release.
71 As well as parsing and executing a program, the interpreter can print
72 out the program from the parsed internal structure. This is useful
73 for validating the parsing.
74 So the main requirements of the interpreter are:
76 - Parse the program, possibly with tracing
77 - Analyse the parsed program to ensure consistency
81 This is all performed by a single C program extracted with
84 There will be two formats for printing the program: a default and one
85 that uses bracketing. So a `--bracket` command line option is needed
86 for that. Normally the first code section found is used, however an
87 alternate section can be requested so that a file (such as this one)
88 can contain multiple programs This is effected with the `--section`
91 ###### File: oceani.mk
93 myCFLAGS := -Wall -g -fplan9-extensions
94 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
95 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
96 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
98 all :: $(LDLIBS) oceani
99 oceani.c oceani.h : oceani.mdc parsergen
100 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
101 oceani.mk: oceani.mdc md2c
104 oceani: oceani.o $(LDLIBS)
105 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
107 ###### Parser: header
110 struct parse_context {
111 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
132 #include <sys/mman.h>
151 static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
152 "--section=SectionName prog.ocn\n";
153 static const struct option long_options[] = {
154 {"trace", 0, NULL, 't'},
155 {"print", 0, NULL, 'p'},
156 {"noexec", 0, NULL, 'n'},
157 {"brackets", 0, NULL, 'b'},
158 {"section", 1, NULL, 's'},
161 const char *options = "tpnbs";
162 int main(int argc, char *argv[])
168 char *section = NULL;
169 struct parse_context context = {
171 .ignored = (1 << TK_line_comment)
172 | (1 << TK_block_comment),
173 .number_chars = ".,_+-",
178 int doprint=0, dotrace=0, doexec=1, brackets=0;
181 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
184 case 't': dotrace=1; break;
185 case 'p': doprint=1; break;
186 case 'n': doexec=0; break;
187 case 'b': brackets=1; break;
188 case 's': section = optarg; break;
189 default: fprintf(stderr, Usage);
193 if (optind >= argc) {
194 fprintf(stderr, "oceani: no input file given\n");
197 fd = open(argv[optind], O_RDONLY);
199 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
202 context.file_name = argv[optind];
203 len = lseek(fd, 0, 2);
204 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
205 s = code_extract(file, file+len, NULL);
207 fprintf(stderr, "oceani: could not find any code in %s\n",
213 for (ss = s; ss; ss = ss->next) {
214 struct text sec = ss->section;
215 if (sec.len == strlen(section) &&
216 strncmp(sec.txt, section, sec.len) == 0)
220 prog = parse_oceani(ss->code, &context.config,
221 dotrace ? stderr : NULL);
223 fprintf(stderr, "oceani: cannot find section %s\n",
228 prog = parse_oceani(s->code, &context.config,
229 dotrace ? stderr : NULL);
231 fprintf(stderr, "oceani: fatal parser error.\n");
232 context.parse_error = 1;
235 print_exec(*prog, 0, brackets);
236 if (prog && doexec && !context.parse_error) {
237 if (!analyse_prog(*prog, &context)) {
238 fprintf(stderr, "oceani: type error in program - not running.\n");
241 interp_prog(*prog, argv+optind+1);
248 struct section *t = s->next;
254 exit(context.parse_error ? 1 : 0);
259 These four requirements of parse, analyse, print, interpret apply to
260 each language element individually so that is how most of the code
263 Three of the four are fairly self explanatory. The one that requires
264 a little explanation is the analysis step.
266 The current language design does not require (or even allow) the types
267 of variables to be declared, but they must still have a single type.
268 Different operations impose different requirements on the variables,
269 for example addition requires both arguments to be numeric, and
270 assignment requires the variable on the left to have the same type as
271 the expression on the right.
273 Analysis involves propagating these type requirements around and
274 consequently setting the type of each variable. If any requirements
275 are violated (e.g. a string is compared with a number) or if a
276 variable needs to have two different types, then an error is raised
277 and the program will not run.
279 If the same variable is declared in both branchs of an 'if/else', or
280 in all cases of a 'switch' then the multiple instances may be merged
281 into just one variable if the variable is references after the
282 conditional statement. When this happens, the types must naturally be
283 consistent across all the branches. When the variable is not used
284 outside the if, the variables in the different branches are distinct
285 and can be of different types.
287 Determining the types of all variables early is important for
288 processing command line arguments. These can be assigned to any type
289 of variable, but we must first know the correct type so any required
290 conversion can happen. If a variable is associated with a command
291 line argument but no type can be interpreted (e.g. the variable is
292 only ever used in a `print` statement), then the type is set to
295 Undeclared names may only appear in "use" statements and "case" expressions.
296 These names are given a type of "label" and a unique value.
297 This allows them to fill the role of a name in an enumerated type, which
298 is useful for testing the `switch` statement.
300 As we will see, the condition part of a `while` statement can return
301 either a Boolean or some other type. This requires that the expect
302 type that gets passed around comprises a type (`enum vtype`) and a
303 flag to indicate that `Vbool` is also permitted.
305 As there are, as yet, no distinct types that are compatible, there
306 isn't much subtlety in the analysis. When we have distinct number
307 types, this will become more interesting.
311 When analysis discovers an inconsistency it needs to report an error;
312 just refusing to run the code ensures that the error doesn't cascade,
313 but by itself it isn't very useful. A clear understand of the sort of
314 error message that are useful will help guide the process of analysis.
316 At a simplistic level, the only sort of error that type analysis can
317 report is that the type of some construct doesn't match a contextual
318 requirement. For example, in `4 + "hello"` the addition provides a
319 contextual requirement for numbers, but `"hello"` is not a number. In
320 this particular example no further information is needed as the types
321 are obvious from local information. When a variable is involved that
322 isn't the case. It may be helpful to explain why the variable has a
323 particular type, by indicating the location where the type was set,
324 whether by declaration or usage.
326 Using a recursive-descent analysis we can easily detect a problem at
327 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
328 will detect that one argument is not a number and the usage of `hello`
329 will detect that a number was wanted, but not provided. In this
330 (early) version of the language, we will generate error reports at
331 multiple locations, so the use of `hello` will report an error and
332 explain were the value was set, and the addition will report an error
333 and say why numbers are needed. To be able to report locations for
334 errors, each language element will need to record a file location
335 (line and column) and each variable will need to record the language
336 element where its type was set. For now we will assume that each line
337 of an error message indicates one location in the file, and up to 2
338 types. So we provide a `printf`-like function which takes a format, a
339 language (a `struct exec` which has not yet been introduced), and 2
340 types. "`%1`" reports the first type, "`%2`" reports the second. We
341 will need a function to print the location, once we know how that is
342 stored. As will be explained later, there are sometimes extra rules for
343 type matching and they might affect error messages, we need to pass those
348 static void fput_loc(struct exec *loc, FILE *f);
350 ###### core functions
352 static void type_err(struct parse_context *c,
353 char *fmt, struct exec *loc,
354 enum vtype t1, int rules, enum vtype t2)
356 fprintf(stderr, "%s:", c->file_name);
357 fput_loc(loc, stderr);
358 for (; *fmt ; fmt++) {
365 case '%': fputc(*fmt, stderr); break;
366 default: fputc('?', stderr); break;
368 fputs(vtype_names[t1], stderr);
371 fputs(vtype_names[t2], stderr);
380 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
382 fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
388 One last introductory step before detailing the language elements and
389 providing their four requirements is to establish the data structures
390 to store these elements.
392 There are two key objects that we need to work with: executable
393 elements which comprise the program, and values which the program
394 works with. Between these are the variables in their various scopes
395 which hold the values.
399 Values can be numbers, which we represent as multi-precision
400 fractions, strings, Booleans and labels. When analysing the program
401 we also need to allow for places where no value is meaningful
402 (`Vnone`) and where we don't know what type to expect yet (`Vunknown`).
404 Values are never shared, they are always copied when used, and freed
405 when no longer needed.
407 When propagating type information around the program, we need to
408 determine if two types are compatible, where `Vunknown` is compatible
409 with anything. There are two special cases with type compatibility,
410 both related to the Conditional Statement which will be described
411 later. In some cases a Boolean can be accepted as well as some other
412 primary type, and in others any type is acceptable except a label (`Vlabel`).
413 A separate function encode these cases will simplify some code later.
415 When assigning command line arguments to variables, we need to be able
416 to parse each type from a string.
424 myLDLIBS := libnumber.o libstring.o -lgmp
425 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
429 enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
438 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
440 char *vtype_names[] = {"unknown", "none", "string",
441 "number", "Boolean", "label"};
445 if (rules & Rnolabel)
446 fputs(" (labels not permitted)", stderr);
450 static void free_value(struct value v)
454 case Vunknown: break;
455 case Vstr: free(v.str.txt); break;
456 case Vnum: mpq_clear(v.num); break;
462 static int vtype_compat(enum vtype require, enum vtype have, int rules)
464 if ((rules & Rboolok) && have == Vbool)
466 if ((rules & Rnolabel) && have == Vlabel)
468 if (require == Vunknown || have == Vunknown)
471 return require == have;
474 ###### value functions
476 static void val_init(struct value *val, enum vtype type)
481 case Vunknown: break;
483 mpq_init(val->num); break;
485 val->str.txt = malloc(1);
497 static struct value dup_value(struct value v)
503 case Vunknown: break;
512 mpq_set(rv.num, v.num);
515 rv.str.len = v.str.len;
516 rv.str.txt = malloc(rv.str.len);
517 memcpy(rv.str.txt, v.str.txt, v.str.len);
523 static int value_cmp(struct value left, struct value right)
526 if (left.vtype != right.vtype)
527 return left.vtype - right.vtype;
528 switch (left.vtype) {
529 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
530 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
531 case Vstr: cmp = text_cmp(left.str, right.str); break;
532 case Vbool: cmp = left.bool - right.bool; break;
534 case Vunknown: cmp = 0;
539 static struct text text_join(struct text a, struct text b)
542 rv.len = a.len + b.len;
543 rv.txt = malloc(rv.len);
544 memcpy(rv.txt, a.txt, a.len);
545 memcpy(rv.txt+a.len, b.txt, b.len);
549 static void print_value(struct value v)
553 printf("*Unknown*"); break;
555 printf("*no-value*"); break;
557 printf("*label-%p*", v.label); break;
559 printf("%.*s", v.str.len, v.str.txt); break;
561 printf("%s", v.bool ? "True":"False"); break;
566 mpf_set_q(fl, v.num);
567 gmp_printf("%Fg", fl);
574 static int parse_value(struct value *vl, char *arg)
586 vl->str.len = strlen(arg);
587 vl->str.txt = malloc(vl->str.len);
588 memcpy(vl->str.txt, arg, vl->str.len);
595 tx.txt = arg; tx.len = strlen(tx.txt);
596 if (number_parse(vl->num, tail, tx) == 0)
599 mpq_neg(vl->num, vl->num);
601 printf("Unsupported suffix: %s\n", arg);
606 if (strcasecmp(arg, "true") == 0 ||
607 strcmp(arg, "1") == 0)
609 else if (strcasecmp(arg, "false") == 0 ||
610 strcmp(arg, "0") == 0)
613 printf("Bad bool: %s\n", arg);
623 Variables are scoped named values. We store the names in a linked
624 list of "bindings" sorted lexically, and use sequential search and
631 struct binding *next; // in lexical order
635 This linked list is stored in the parse context so that "reduce"
636 functions can find or add variables, and so the analysis phase can
637 ensure that every variable gets a type.
641 struct binding *varlist; // In lexical order
645 static struct binding *find_binding(struct parse_context *c, struct text s)
647 struct binding **l = &c->varlist;
652 (cmp = text_cmp((*l)->name, s)) < 0)
656 n = calloc(1, sizeof(*n));
663 Each name can be linked to multiple variables defined in different
664 scopes. Each scope starts where the name is declared and continues
665 until the end of the containing code block. Scopes of a given name
666 cannot nest, so a declaration while a name is in-scope is an error.
668 ###### binding fields
669 struct variable *var;
673 struct variable *previous;
675 struct binding *name;
676 struct exec *where_decl;// where name was declared
677 struct exec *where_set; // where type was set
681 While the naming seems strange, we include local constants in the
682 definition of variables. A name declared `var := value` can
683 subsequently be changed, but a name declared `var ::= value` cannot -
686 ###### variable fields
689 Scopes in parallel branches can be partially merged. More
690 specifically, if a given name is declared in both branches of an
691 if/else then it's scope is a candidate for merging. Similarly if
692 every branch of an exhaustive switch (e.g. has an "else" clause)
693 declares a given name, then the scopes from the branches are
694 candidates for merging.
696 Note that names declared inside a loop (which is only parallel to
697 itself) are never visible after the loop. Similarly names defined in
698 scopes which are not parallel, such as those started by `for` and
699 `switch`, are never visible after the scope. Only variables defined in
700 both `then` and `else` (including the implicit then after an `if`, and
701 excluding `then` used with `for`) and in all `case`s and `else` of a
702 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
704 Labels, which are a bit like variables, follow different rules.
705 Labels are not explicitly declared, but if an undeclared name appears
706 in a context where a label is legal, that effectively declares the
707 name as a label. The declaration remains in force (or in scope) at
708 least to the end of the immediately containing block and conditionally
709 in any larger containing block which does not declare the name in some
710 other way. Importantly, the conditional scope extension happens even
711 if the label is only used in one parallel branch of a conditional --
712 when used in one branch it is treated as having been declared in all
715 Merge candidates are tentatively visible beyond the end of the
716 branching statement which creates them. If the name is used, the
717 merge is affirmed and they become a single variable visible at the
718 outer layer. If not - if it is redeclared first - the merge lapses.
720 To track scopes we have an extra stack, implemented as a linked list,
721 which roughly parallels the parse stack and which is used exclusively
722 for scoping. When a new scope is opened, a new frame is pushed and
723 the child-count of the parent frame is incremented. This child-count
724 is used to distinguish between the first of a set of parallel scopes,
725 in which declared variables must not be in scope, and subsequent
726 branches, whether they must already be conditionally scoped.
728 To push a new frame *before* any code in the frame is parsed, we need a
729 grammar reduction. This is most easily achieved with a grammar
730 element which derives the empty string, and creates the new scope when
731 it is recognized. This can be placed, for example, between a keyword
732 like "if" and the code following it.
736 struct scope *parent;
742 struct scope *scope_stack;
745 static void scope_pop(struct parse_context *c)
747 struct scope *s = c->scope_stack;
749 c->scope_stack = s->parent;
754 static void scope_push(struct parse_context *c)
756 struct scope *s = calloc(1, sizeof(*s));
758 c->scope_stack->child_count += 1;
759 s->parent = c->scope_stack;
767 OpenScope -> ${ scope_push(config2context(config)); }$
770 Each variable records a scope depth and is in one of four states:
772 - "in scope". This is the case between the declaration of the
773 variable and the end of the containing block, and also between
774 the usage with affirms a merge and the end of that block.
776 The scope depth is not greater than the current parse context scope
777 nest depth. When the block of that depth closes, the state will
778 change. To achieve this, all "in scope" variables are linked
779 together as a stack in nesting order.
781 - "pending". The "in scope" block has closed, but other parallel
782 scopes are still being processed. So far, every parallel block at
783 the same level that has closed has declared the name.
785 The scope depth is the depth of the last parallel block that
786 enclosed the declaration, and that has closed.
788 - "conditionally in scope". The "in scope" block and all parallel
789 scopes have closed, and no further mention of the name has been
790 seen. This state includes a secondary nest depth which records the
791 outermost scope seen since the variable became conditionally in
792 scope. If a use of the name is found, the variable becomes "in
793 scope" and that secondary depth becomes the recorded scope depth.
794 If the name is declared as a new variable, the old variable becomes
795 "out of scope" and the recorded scope depth stays unchanged.
797 - "out of scope". The variable is neither in scope nor conditionally
798 in scope. It is permanently out of scope now and can be removed from
799 the "in scope" stack.
802 ###### variable fields
803 int depth, min_depth;
804 enum { OutScope, PendingScope, CondScope, InScope } scope;
805 struct variable *in_scope;
809 struct variable *in_scope;
811 All variables with the same name are linked together using the
812 'previous' link. Those variable that have
813 been affirmatively merged all have a 'merged' pointer that points to
814 one primary variable - the most recently declared instance. When
815 merging variables, we need to also adjust the 'merged' pointer on any
816 other variables that had previously been merged with the one that will
817 no longer be primary.
819 ###### variable fields
820 struct variable *merged;
824 static void variable_merge(struct variable *primary, struct variable *secondary)
830 primary = primary->merged;
832 for (v = primary->previous; v; v=v->previous)
833 if (v == secondary || v == secondary->merged ||
834 v->merged == secondary ||
835 (v->merged && v->merged == secondary->merged)) {
843 while (context.varlist) {
844 struct binding *b = context.varlist;
845 struct variable *v = b->var;
846 context.varlist = b->next;
849 struct variable *t = v;
857 #### Manipulating Bindings
859 When a name is conditionally visible, a new declaration discards the
860 old binding - the condition lapses. Conversely a usage of the name
861 affirms the visibility and extends it to the end of the containing
862 block - i.e. the block that contains both the original declaration and
863 the latest usage. This is determined from `min_depth`. When a
864 conditionally visible variable gets affirmed like this, it is also
865 merged with other conditionally visible variables with the same name.
867 When we parse a variable declaration we either signal an error if the
868 name is currently bound, or create a new variable at the current nest
869 depth if the name is unbound or bound to a conditionally scoped or
870 pending-scope variable. If the previous variable was conditionally
871 scoped, it and its homonyms becomes out-of-scope.
873 When we parse a variable reference (including non-declarative
874 assignment) we signal an error if the name is not bound or is bound to
875 a pending-scope variable; update the scope if the name is bound to a
876 conditionally scoped variable; or just proceed normally if the named
877 variable is in scope.
879 When we exit a scope, any variables bound at this level are either
880 marked out of scope or pending-scoped, depending on whether the
881 scope was sequential or parallel.
883 When exiting a parallel scope we check if there are any variables that
884 were previously pending and are still visible. If there are, then
885 there weren't redeclared in the most recent scope, so they cannot be
886 merged and must become out-of-scope. If it is not the first of
887 parallel scopes (based on `child_count`), we check that there was a
888 previous binding that is still pending-scope. If there isn't, the new
889 variable must now be out-of-scope.
891 When exiting a sequential scope that immediately enclosed parallel
892 scopes, we need to resolve any pending-scope variables. If there was
893 no `else` clause, and we cannot determine that the `switch` was exhaustive,
894 we need to mark all pending-scope variable as out-of-scope. Otherwise
895 all pending-scope variables become conditionally scoped.
898 enum closetype { CloseSequential, CloseParallel, CloseElse };
902 static struct variable *var_decl(struct parse_context *c, struct text s)
904 struct binding *b = find_binding(c, s);
905 struct variable *v = b->var;
907 switch (v ? v->scope : OutScope) {
909 /* Caller will report the error */
913 v && v->scope == CondScope;
919 v = calloc(1, sizeof(*v));
920 v->previous = b->var;
923 v->min_depth = v->depth = c->scope_depth;
925 v->in_scope = c->in_scope;
927 val_init(&v->val, Vunknown);
931 static struct variable *var_ref(struct parse_context *c, struct text s)
933 struct binding *b = find_binding(c, s);
934 struct variable *v = b->var;
937 switch (v ? v->scope : OutScope) {
940 /* Signal an error - once that is possible */
943 /* All CondScope variables of this name need to be merged
946 v->depth = v->min_depth;
948 for (v2 = v->previous;
949 v2 && v2->scope == CondScope;
951 variable_merge(v, v2);
959 static void var_block_close(struct parse_context *c, enum closetype ct)
961 /* close of all variables that are in_scope */
962 struct variable *v, **vp, *v2;
965 for (vp = &c->in_scope;
966 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
970 case CloseParallel: /* handle PendingScope */
974 if (c->scope_stack->child_count == 1)
975 v->scope = PendingScope;
976 else if (v->previous &&
977 v->previous->scope == PendingScope)
978 v->scope = PendingScope;
979 else if (v->val.vtype == Vlabel)
980 v->scope = PendingScope;
981 else if (v->name->var == v)
983 if (ct == CloseElse) {
984 /* All Pending variables with this name
985 * are now Conditional */
987 v2 && v2->scope == PendingScope;
989 v2->scope = CondScope;
994 v2 && v2->scope == PendingScope;
996 if (v2->val.vtype != Vlabel)
997 v2->scope = OutScope;
999 case OutScope: break;
1002 case CloseSequential:
1003 if (v->val.vtype == Vlabel)
1004 v->scope = PendingScope;
1007 v->scope = OutScope;
1010 /* There was no 'else', so we can only become
1011 * conditional if we know the cases were exhaustive,
1012 * and that doesn't mean anything yet.
1013 * So only labels become conditional..
1016 v2 && v2->scope == PendingScope;
1018 if (v2->val.vtype == Vlabel) {
1019 v2->scope = CondScope;
1020 v2->min_depth = c->scope_depth;
1022 v2->scope = OutScope;
1025 case OutScope: break;
1029 if (v->scope == OutScope)
1038 Executables can be lots of different things. In many cases an
1039 executable is just an operation combined with one or two other
1040 executables. This allows for expressions and lists etc. Other times
1041 an executable is something quite specific like a constant or variable
1042 name. So we define a `struct exec` to be a general executable with a
1043 type, and a `struct binode` which is a subclass of `exec`, forms a
1044 node in a binary tree, and holds an operation. There will be other
1045 subclasses, and to access these we need to be able to `cast` the
1046 `exec` into the various other types.
1049 #define cast(structname, pointer) ({ \
1050 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1051 if (__mptr && *__mptr != X##structname) abort(); \
1052 (struct structname *)( (char *)__mptr);})
1054 #define new(structname) ({ \
1055 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1056 __ptr->type = X##structname; \
1057 __ptr->line = -1; __ptr->column = -1; \
1060 #define new_pos(structname, token) ({ \
1061 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1062 __ptr->type = X##structname; \
1063 __ptr->line = token.line; __ptr->column = token.col; \
1072 enum exec_types type;
1080 struct exec *left, *right;
1083 ###### ast functions
1085 static int __fput_loc(struct exec *loc, FILE *f)
1087 if (loc->line >= 0) {
1088 fprintf(f, "%d:%d: ", loc->line, loc->column);
1091 if (loc->type == Xbinode)
1092 return __fput_loc(cast(binode,loc)->left, f) ||
1093 __fput_loc(cast(binode,loc)->right, f);
1096 static void fput_loc(struct exec *loc, FILE *f)
1098 if (!__fput_loc(loc, f))
1099 fprintf(f, "??:??: ");
1102 Each different type of `exec` node needs a number of functions
1103 defined, a bit like methods. We must be able to be able to free it,
1104 print it, analyse it and execute it. Once we have specific `exec`
1105 types we will need to parse them too. Let's take this a bit more
1110 The parser generator requires a `free_foo` function for each struct
1111 that stores attributes and they will be `exec`s and subtypes there-of.
1112 So we need `free_exec` which can handle all the subtypes, and we need
1115 ###### ast functions
1117 static void free_binode(struct binode *b)
1122 free_exec(b->right);
1126 ###### core functions
1127 static void free_exec(struct exec *e)
1136 ###### forward decls
1138 static void free_exec(struct exec *e);
1140 ###### free exec cases
1141 case Xbinode: free_binode(cast(binode, e)); break;
1145 Printing an `exec` requires that we know the current indent level for
1146 printing line-oriented components. As will become clear later, we
1147 also want to know what sort of bracketing to use.
1149 ###### ast functions
1151 static void do_indent(int i, char *str)
1158 ###### core functions
1159 static void print_binode(struct binode *b, int indent, int bracket)
1163 ## print binode cases
1167 static void print_exec(struct exec *e, int indent, int bracket)
1173 print_binode(cast(binode, e), indent, bracket); break;
1178 ###### forward decls
1180 static void print_exec(struct exec *e, int indent, int bracket);
1184 As discussed, analysis involves propagating type requirements around
1185 the program and looking for errors.
1187 So `propagate_types` is passed an expected type (being a `vtype`
1188 together with some `val_rules` flags) that the `exec` is expected to
1189 return, and returns the type that it does return, either of which can
1190 be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
1191 when an error is found, and `2` when any change is made. If it
1192 remains unchanged at `1`, then no more propagation is needed.
1194 ###### core functions
1196 static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1197 enum vtype type, int rules)
1204 switch (prog->type) {
1207 struct binode *b = cast(binode, prog);
1209 ## propagate binode cases
1213 ## propagate exec cases
1220 Interpreting an `exec` doesn't require anything but the `exec`. State
1221 is stored in variables and each variable will be directly linked from
1222 within the `exec` tree. The exception to this is the whole `program`
1223 which needs to look at command line arguments. The `program` will be
1224 interpreted separately.
1226 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
1228 ###### core functions
1230 static struct value interp_exec(struct exec *e)
1240 struct binode *b = cast(binode, e);
1241 struct value left, right;
1242 left.vtype = right.vtype = Vnone;
1244 ## interp binode cases
1246 free_value(left); free_value(right);
1249 ## interp exec cases
1254 ## Language elements
1256 Each language element needs to be parsed, printed, analysed,
1257 interpreted, and freed. There are several, so let's just start with
1258 the easy ones and work our way up.
1262 We have already met values as separate objects. When manifest
1263 constants appear in the program text, that must result in an executable
1264 which has a constant value. So the `val` structure embeds a value in
1280 $0 = new_pos(val, $1);
1281 $0->val.vtype = Vbool;
1285 $0 = new_pos(val, $1);
1286 $0->val.vtype = Vbool;
1290 $0 = new_pos(val, $1);
1291 $0->val.vtype = Vnum;
1294 if (number_parse($0->val.num, tail, $1.txt) == 0)
1295 mpq_init($0->val.num);
1297 tok_err(config2context(config), "error: unsupported number suffix.",
1302 $0 = new_pos(val, $1);
1303 $0->val.vtype = Vstr;
1306 string_parse(&$1, '\\', &$0->val.str, tail);
1308 tok_err(config2context(config), "error: unsupported string suffix.",
1313 $0 = new_pos(val, $1);
1314 $0->val.vtype = Vstr;
1317 string_parse(&$1, '\\', &$0->val.str, tail);
1319 tok_err(config2context(config), "error: unsupported string suffix.",
1324 ###### print exec cases
1327 struct val *v = cast(val, e);
1328 if (v->val.vtype == Vstr)
1330 print_value(v->val);
1331 if (v->val.vtype == Vstr)
1336 ###### propagate exec cases
1339 struct val *val = cast(val, prog);
1340 if (!vtype_compat(type, val->val.vtype, rules)) {
1341 type_err(c, "error: expected %1%r found %2",
1342 prog, type, rules, val->val.vtype);
1345 return val->val.vtype;
1348 ###### interp exec cases
1350 return dup_value(cast(val, e)->val);
1352 ###### ast functions
1353 static void free_val(struct val *v)
1361 ###### free exec cases
1362 case Xval: free_val(cast(val, e)); break;
1364 ###### ast functions
1365 // Move all nodes from 'b' to 'rv', reversing the order.
1366 // In 'b' 'left' is a list, and 'right' is the last node.
1367 // In 'rv', left' is the first node and 'right' is a list.
1368 static struct binode *reorder_bilist(struct binode *b)
1370 struct binode *rv = NULL;
1373 struct exec *t = b->right;
1377 b = cast(binode, b->left);
1387 Just as we used a `val` to wrap a value into an `exec`, we similarly
1388 need a `var` to wrap a `variable` into an exec. While each `val`
1389 contained a copy of the value, each `var` hold a link to the variable
1390 because it really is the same variable no matter where it appears.
1391 When a variable is used, we need to remember to follow the `->merged`
1392 link to find the primary instance.
1400 struct variable *var;
1406 VariableDecl -> IDENTIFIER := ${ {
1407 struct variable *v = var_decl(config2context(config), $1.txt);
1408 $0 = new_pos(var, $1);
1413 v = var_ref(config2context(config), $1.txt);
1415 type_err(config2context(config), "error: variable '%v' redeclared",
1416 $0, Vnone, 0, Vnone);
1417 type_err(config2context(config), "info: this is where '%v' was first declared",
1418 v->where_decl, Vnone, 0, Vnone);
1421 | IDENTIFIER ::= ${ {
1422 struct variable *v = var_decl(config2context(config), $1.txt);
1423 $0 = new_pos(var, $1);
1429 v = var_ref(config2context(config), $1.txt);
1431 type_err(config2context(config), "error: variable '%v' redeclared",
1432 $0, Vnone, 0, Vnone);
1433 type_err(config2context(config), "info: this is where '%v' was first declared",
1434 v->where_decl, Vnone, 0, Vnone);
1438 Variable -> IDENTIFIER ${ {
1439 struct variable *v = var_ref(config2context(config), $1.txt);
1440 $0 = new_pos(var, $1);
1442 /* This might be a label - allocate a var just in case */
1443 v = var_decl(config2context(config), $1.txt);
1445 val_init(&v->val, Vlabel);
1452 ###### print exec cases
1455 struct var *v = cast(var, e);
1457 struct binding *b = v->var->name;
1458 printf("%.*s", b->name.len, b->name.txt);
1465 if (loc->type == Xvar) {
1466 struct var *v = cast(var, loc);
1468 struct binding *b = v->var->name;
1469 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1471 fputs("???", stderr);
1473 fputs("NOTVAR", stderr);
1476 ###### propagate exec cases
1480 struct var *var = cast(var, prog);
1481 struct variable *v = var->var;
1483 type_err(c, "%d:BUG: no variable!!", prog, Vnone, 0, Vnone);
1489 if (v->val.vtype == Vunknown) {
1490 if (type != Vunknown && *ok != 0) {
1491 val_init(&v->val, type);
1492 v->where_set = prog;
1497 if (!vtype_compat(type, v->val.vtype, rules)) {
1498 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
1499 type, rules, v->val.vtype);
1500 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1501 v->val.vtype, rules, Vnone);
1504 if (type == Vunknown)
1505 return v->val.vtype;
1509 ###### interp exec cases
1512 struct var *var = cast(var, e);
1513 struct variable *v = var->var;
1517 return dup_value(v->val);
1520 ###### ast functions
1522 static void free_var(struct var *v)
1527 ###### free exec cases
1528 case Xvar: free_var(cast(var, e)); break;
1530 ### Expressions: Boolean
1532 Our first user of the `binode` will be expressions, and particularly
1533 Boolean expressions. As I haven't implemented precedence in the
1534 parser generator yet, we need different names from each precedence
1535 level used by expressions. The outer most or lowest level precedence
1536 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1547 Expression -> Expression or BTerm ${ {
1548 struct binode *b = new(binode);
1554 | BTerm ${ $0 = $<1; }$
1556 BTerm -> BTerm and BFact ${ {
1557 struct binode *b = new(binode);
1563 | BFact ${ $0 = $<1; }$
1565 BFact -> not BFact ${ {
1566 struct binode *b = new(binode);
1573 ###### print binode cases
1575 print_exec(b->left, -1, 0);
1577 print_exec(b->right, -1, 0);
1580 print_exec(b->left, -1, 0);
1582 print_exec(b->right, -1, 0);
1586 print_exec(b->right, -1, 0);
1589 ###### propagate binode cases
1593 /* both must be Vbool, result is Vbool */
1594 propagate_types(b->left, c, ok, Vbool, 0);
1595 propagate_types(b->right, c, ok, Vbool, 0);
1596 if (type != Vbool && type != Vunknown) {
1597 type_err(c, "error: %1 operation found where %2 expected", prog,
1603 ###### interp binode cases
1605 rv = interp_exec(b->left);
1606 right = interp_exec(b->right);
1607 rv.bool = rv.bool && right.bool;
1610 rv = interp_exec(b->left);
1611 right = interp_exec(b->right);
1612 rv.bool = rv.bool || right.bool;
1615 rv = interp_exec(b->right);
1619 ### Expressions: Comparison
1621 Of slightly higher precedence that Boolean expressions are
1623 A comparison takes arguments of any type, but the two types must be
1626 To simplify the parsing we introduce an `eop` which can record an
1627 expression operator.
1634 ###### ast functions
1635 static void free_eop(struct eop *e)
1650 | Expr CMPop Expr ${ {
1651 struct binode *b = new(binode);
1657 | Expr ${ $0 = $<1; }$
1662 CMPop -> < ${ $0.op = Less; }$
1663 | > ${ $0.op = Gtr; }$
1664 | <= ${ $0.op = LessEq; }$
1665 | >= ${ $0.op = GtrEq; }$
1666 | == ${ $0.op = Eql; }$
1667 | != ${ $0.op = NEql; }$
1669 ###### print binode cases
1677 print_exec(b->left, -1, 0);
1679 case Less: printf(" < "); break;
1680 case LessEq: printf(" <= "); break;
1681 case Gtr: printf(" > "); break;
1682 case GtrEq: printf(" >= "); break;
1683 case Eql: printf(" == "); break;
1684 case NEql: printf(" != "); break;
1687 print_exec(b->right, -1, 0);
1690 ###### propagate binode cases
1697 /* Both must match but not labels, result is Vbool */
1698 t = propagate_types(b->left, c, ok, Vunknown, Rnolabel);
1700 propagate_types(b->right, c, ok, t, 0);
1702 t = propagate_types(b->right, c, ok, Vunknown, Rnolabel);
1704 t = propagate_types(b->left, c, ok, t, 0);
1706 if (!vtype_compat(type, Vbool, 0)) {
1707 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1708 Vbool, rules, type);
1713 ###### interp binode cases
1722 left = interp_exec(b->left);
1723 right = interp_exec(b->right);
1724 cmp = value_cmp(left, right);
1727 case Less: rv.bool = cmp < 0; break;
1728 case LessEq: rv.bool = cmp <= 0; break;
1729 case Gtr: rv.bool = cmp > 0; break;
1730 case GtrEq: rv.bool = cmp >= 0; break;
1731 case Eql: rv.bool = cmp == 0; break;
1732 case NEql: rv.bool = cmp != 0; break;
1733 default: rv.bool = 0; break;
1738 ### Expressions: The rest
1740 The remaining expressions with the highest precedence are arithmetic
1741 and string concatenation. They are `Expr`, `Term`, and `Factor`.
1742 The `Factor` is where the `Value` and `Variable` that we already have
1745 `+` and `-` are both infix and prefix operations (where they are
1746 absolute value and negation). These have different operator names.
1748 We also have a 'Bracket' operator which records where parentheses were
1749 found. This make it easy to reproduce these when printing. Once
1750 precedence is handled better I might be able to discard this.
1762 Expr -> Expr Eop Term ${ {
1763 struct binode *b = new(binode);
1769 | Term ${ $0 = $<1; }$
1771 Term -> Term Top Factor ${ {
1772 struct binode *b = new(binode);
1778 | Factor ${ $0 = $<1; }$
1780 Factor -> ( Expression ) ${ {
1781 struct binode *b = new_pos(binode, $1);
1787 struct binode *b = new(binode);
1792 | Value ${ $0 = $<1; }$
1793 | Variable ${ $0 = $<1; }$
1796 Eop -> + ${ $0.op = Plus; }$
1797 | - ${ $0.op = Minus; }$
1799 Uop -> + ${ $0.op = Absolute; }$
1800 | - ${ $0.op = Negate; }$
1802 Top -> * ${ $0.op = Times; }$
1803 | / ${ $0.op = Divide; }$
1804 | ++ ${ $0.op = Concat; }$
1806 ###### print binode cases
1812 print_exec(b->left, indent, 0);
1814 case Plus: printf(" + "); break;
1815 case Minus: printf(" - "); break;
1816 case Times: printf(" * "); break;
1817 case Divide: printf(" / "); break;
1818 case Concat: printf(" ++ "); break;
1821 print_exec(b->right, indent, 0);
1825 print_exec(b->right, indent, 0);
1829 print_exec(b->right, indent, 0);
1833 print_exec(b->right, indent, 0);
1837 ###### propagate binode cases
1842 /* both must be numbers, result is Vnum */
1845 /* as propagate_types ignores a NULL,
1846 * unary ops fit here too */
1847 propagate_types(b->left, c, ok, Vnum, 0);
1848 propagate_types(b->right, c, ok, Vnum, 0);
1849 if (!vtype_compat(type, Vnum, 0)) {
1850 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
1857 /* both must be Vstr, result is Vstr */
1858 propagate_types(b->left, c, ok, Vstr, 0);
1859 propagate_types(b->right, c, ok, Vstr, 0);
1860 if (!vtype_compat(type, Vstr, 0)) {
1861 type_err(c, "error: Concat returns %1 but %2 expected", prog,
1868 return propagate_types(b->right, c, ok, type, 0);
1870 ###### interp binode cases
1873 rv = interp_exec(b->left);
1874 right = interp_exec(b->right);
1875 mpq_add(rv.num, rv.num, right.num);
1878 rv = interp_exec(b->left);
1879 right = interp_exec(b->right);
1880 mpq_sub(rv.num, rv.num, right.num);
1883 rv = interp_exec(b->left);
1884 right = interp_exec(b->right);
1885 mpq_mul(rv.num, rv.num, right.num);
1888 rv = interp_exec(b->left);
1889 right = interp_exec(b->right);
1890 mpq_div(rv.num, rv.num, right.num);
1893 rv = interp_exec(b->right);
1894 mpq_neg(rv.num, rv.num);
1897 rv = interp_exec(b->right);
1898 mpq_abs(rv.num, rv.num);
1901 rv = interp_exec(b->right);
1904 left = interp_exec(b->left);
1905 right = interp_exec(b->right);
1907 rv.str = text_join(left.str, right.str);
1910 ### Blocks, Statements, and Statement lists.
1912 Now that we have expressions out of the way we need to turn to
1913 statements. There are simple statements and more complex statements.
1914 Simple statements do not contain newlines, complex statements do.
1916 Statements often come in sequences and we have corresponding simple
1917 statement lists and complex statement lists.
1918 The former comprise only simple statements separated by semicolons.
1919 The later comprise complex statements and simple statement lists. They are
1920 separated by newlines. Thus the semicolon is only used to separate
1921 simple statements on the one line. This may be overly restrictive,
1922 but I'm not sure I ever want a complex statement to share a line with
1925 Note that a simple statement list can still use multiple lines if
1926 subsequent lines are indented, so
1928 ###### Example: wrapped simple statement list
1933 is a single simple statement list. This might allow room for
1934 confusion, so I'm not set on it yet.
1936 A simple statement list needs no extra syntax. A complex statement
1937 list has two syntactic forms. It can be enclosed in braces (much like
1938 C blocks), or it can be introduced by a colon and continue until an
1939 unindented newline (much like Python blocks). With this extra syntax
1940 it is referred to as a block.
1942 Note that a block does not have to include any newlines if it only
1943 contains simple statements. So both of:
1945 if condition: a=b; d=f
1947 if condition { a=b; print f }
1951 In either case the list is constructed from a `binode` list with
1952 `Block` as the operator. When parsing the list it is most convenient
1953 to append to the end, so a list is a list and a statement. When using
1954 the list it is more convenient to consider a list to be a statement
1955 and a list. So we need a function to re-order a list.
1956 `reorder_bilist` serves this purpose.
1958 The only stand-alone statement we introduce at this stage is `pass`
1959 which does nothing and is represented as a `NULL` pointer in a `Block`
1979 Block -> Open Statementlist Close ${ $0 = $<2; }$
1980 | Open Newlines Statementlist Close ${ $0 = $<3; }$
1981 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1982 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
1983 | : Statementlist ${ $0 = $<2; }$
1984 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1986 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1988 ComplexStatements -> ComplexStatements ComplexStatement ${
1994 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1995 | ComplexStatement ${
2003 ComplexStatement -> SimpleStatements NEWLINE ${
2004 $0 = reorder_bilist($<1);
2006 ## ComplexStatement Grammar
2009 SimpleStatements -> SimpleStatements ; SimpleStatement ${
2015 | SimpleStatement ${
2021 | SimpleStatements ; ${ $0 = $<1; }$
2023 SimpleStatement -> pass ${ $0 = NULL; }$
2024 ## SimpleStatement Grammar
2026 ###### print binode cases
2030 if (b->left == NULL)
2033 print_exec(b->left, indent, 0);
2036 print_exec(b->right, indent, 0);
2039 // block, one per line
2040 if (b->left == NULL)
2041 do_indent(indent, "pass\n");
2043 print_exec(b->left, indent, bracket);
2045 print_exec(b->right, indent, bracket);
2049 ###### propagate binode cases
2052 /* If any statement returns something other then Vnone
2053 * or Vbool then all such must return same type.
2054 * As each statement may be Vnone or something else,
2055 * we must always pass Vunknown down, otherwise an incorrect
2056 * error might occur. We never return Vnone unless it is
2061 for (e = b; e; e = cast(binode, e->right)) {
2062 t = propagate_types(e->left, c, ok, Vunknown, rules);
2063 if ((rules & Rboolok) && t == Vbool)
2065 if (t != Vunknown && t != Vnone && t != Vbool) {
2066 if (type == Vunknown)
2068 else if (t != type) {
2069 type_err(c, "error: expected %1%r, found %2",
2070 e->left, type, rules, t);
2078 ###### interp binode cases
2080 while (rv.vtype == Vnone &&
2083 rv = interp_exec(b->left);
2084 b = cast(binode, b->right);
2088 ### The Print statement
2090 `print` is a simple statement that takes a comma-separated list of
2091 expressions and prints the values separated by spaces and terminated
2092 by a newline. No control of formatting is possible.
2094 `print` faces the same list-ordering issue as blocks, and uses the
2100 ###### SimpleStatement Grammar
2102 | print ExpressionList ${
2103 $0 = reorder_bilist($<2);
2105 | print ExpressionList , ${
2110 $0 = reorder_bilist($0);
2121 ExpressionList -> ExpressionList , Expression ${
2134 ###### print binode cases
2137 do_indent(indent, "print");
2141 print_exec(b->left, -1, 0);
2145 b = cast(binode, b->right);
2151 ###### propagate binode cases
2154 /* don't care but all must be consistent */
2155 propagate_types(b->left, c, ok, Vunknown, Rnolabel);
2156 propagate_types(b->right, c, ok, Vunknown, Rnolabel);
2159 ###### interp binode cases
2165 for ( ; b; b = cast(binode, b->right))
2169 left = interp_exec(b->left);
2182 ###### Assignment statement
2184 An assignment will assign a value to a variable, providing it hasn't
2185 be declared as a constant. The analysis phase ensures that the type
2186 will be correct so the interpreter just needs to perform the
2187 calculation. There is a form of assignment which declares a new
2188 variable as well as assigning a value. If a name is assigned before
2189 it is declared, and error will be raised as the name is created as
2190 `Vlabel` and it is illegal to assign to such names.
2196 ###### SimpleStatement Grammar
2197 | Variable = Expression ${ {
2198 struct var *v = cast(var, $1);
2204 if (v->var && !v->var->constant) {
2208 | VariableDecl Expression ${
2215 ###### print binode cases
2218 do_indent(indent, "");
2219 print_exec(b->left, indent, 0);
2221 print_exec(b->right, indent, 0);
2227 do_indent(indent, "");
2228 print_exec(b->left, indent, 0);
2229 if (cast(var, b->left)->var->constant)
2233 print_exec(b->right, indent, 0);
2238 ###### propagate binode cases
2242 /* Both must match and not be labels, result is Vnone */
2243 t = propagate_types(b->left, c, ok, Vunknown, Rnolabel);
2244 if (t != Vunknown) {
2245 if (propagate_types(b->right, c, ok, t, 0) != t)
2246 if (b->left->type == Xvar)
2247 type_err(c, "info: variable '%v' was set as %1 here.",
2248 cast(var, b->left)->var->where_set, t, rules, Vnone);
2250 t = propagate_types(b->right, c, ok, Vunknown, Rnolabel);
2252 propagate_types(b->left, c, ok, t, 0);
2258 ###### interp binode cases
2263 struct variable *v = cast(var, b->left)->var;
2266 right = interp_exec(b->right);
2269 right.vtype = Vunknown;
2273 ### The `use` statement
2275 The `use` statement is the last "simple" statement. It is needed when
2276 the condition in a conditional statement is a block. `use` works much
2277 like `return` in C, but only completes the `condition`, not the whole
2283 ###### SimpleStatement Grammar
2285 $0 = new_pos(binode, $1);
2290 ###### print binode cases
2293 do_indent(indent, "use ");
2294 print_exec(b->right, -1, 0);
2299 ###### propagate binode cases
2302 /* result matches value */
2303 return propagate_types(b->right, c, ok, type, 0);
2305 ###### interp binode cases
2308 rv = interp_exec(b->right);
2311 ### The Conditional Statement
2313 This is the biggy and currently the only complex statement. This
2314 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2315 It is comprised of a number of parts, all of which are optional though
2316 set combinations apply. Each part is (usually) a key word (`then` is
2317 sometimes optional) followed by either an expression or a code block,
2318 except the `casepart` which is a "key word and an expression" followed
2319 by a code block. The code-block option is valid for all parts and,
2320 where an expression is also allowed, the code block can use the `use`
2321 statement to report a value. If the code block does not report a value
2322 the effect is similar to reporting `True`.
2324 The `else` and `case` parts, as well as `then` when combined with
2325 `if`, can contain a `use` statement which will apply to some
2326 containing conditional statement. `for` parts, `do` parts and `then`
2327 parts used with `for` can never contain a `use`, except in some
2328 subordinate conditional statement.
2330 If there is a `forpart`, it is executed first, only once.
2331 If there is a `dopart`, then it is executed repeatedly providing
2332 always that the `condpart` or `cond`, if present, does not return a non-True
2333 value. `condpart` can fail to return any value if it simply executes
2334 to completion. This is treated the same as returning `True`.
2336 If there is a `thenpart` it will be executed whenever the `condpart`
2337 or `cond` returns True (or does not return any value), but this will happen
2338 *after* `dopart` (when present).
2340 If `elsepart` is present it will be executed at most once when the
2341 condition returns `False` or some value that isn't `True` and isn't
2342 matched by any `casepart`. If there are any `casepart`s, they will be
2343 executed when the condition returns a matching value.
2345 The particular sorts of values allowed in case parts has not yet been
2346 determined in the language design, so nothing is prohibited.
2348 The various blocks in this complex statement potentially provide scope
2349 for variables as described earlier. Each such block must include the
2350 "OpenScope" nonterminal before parsing the block, and must call
2351 `var_block_close()` when closing the block.
2353 The code following "`if`", "`switch`" and "`for`" does not get its own
2354 scope, but is in a scope covering the whole statement, so names
2355 declared there cannot be redeclared elsewhere. Similarly the
2356 condition following "`while`" is in a scope the covers the body
2357 ("`do`" part) of the loop, and which does not allow conditional scope
2358 extension. Code following "`then`" (both looping and non-looping),
2359 "`else`" and "`case`" each get their own local scope.
2361 The type requirements on the code block in a `whilepart` are quite
2362 unusal. It is allowed to return a value of some identifiable type, in
2363 which case the loop aborts and an appropriate `casepart` is run, or it
2364 can return a Boolean, in which case the loop either continues to the
2365 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2366 This is different both from the `ifpart` code block which is expected to
2367 return a Boolean, or the `switchpart` code block which is expected to
2368 return the same type as the casepart values. The correct analysis of
2369 the type of the `whilepart` code block is the reason for the
2370 `Rboolok` flag which is passed to `propagate_types()`.
2372 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2381 struct exec *action;
2382 struct casepart *next;
2384 struct cond_statement {
2386 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2387 struct casepart *casepart;
2390 ###### ast functions
2392 static void free_casepart(struct casepart *cp)
2396 free_exec(cp->value);
2397 free_exec(cp->action);
2404 static void free_cond_statement(struct cond_statement *s)
2408 free_exec(s->forpart);
2409 free_exec(s->condpart);
2410 free_exec(s->dopart);
2411 free_exec(s->thenpart);
2412 free_exec(s->elsepart);
2413 free_casepart(s->casepart);
2417 ###### free exec cases
2418 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2420 ###### ComplexStatement Grammar
2421 | CondStatement ${ $0 = $<1; }$
2426 // both ForThen and Whilepart open scopes, and CondSuffix only
2427 // closes one - so in the first branch here we have another to close.
2428 CondStatement -> ForThen WhilePart CondSuffix ${
2430 $0->forpart = $1.forpart; $1.forpart = NULL;
2431 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2432 $0->condpart = $2.condpart; $2.condpart = NULL;
2433 $0->dopart = $2.dopart; $2.dopart = NULL;
2434 var_block_close(config2context(config), CloseSequential);
2436 | WhilePart CondSuffix ${
2438 $0->condpart = $1.condpart; $1.condpart = NULL;
2439 $0->dopart = $1.dopart; $1.dopart = NULL;
2441 | SwitchPart CondSuffix ${
2445 | IfPart IfSuffix ${
2447 $0->condpart = $1.condpart; $1.condpart = NULL;
2448 $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2449 // This is where we close an "if" statement
2450 var_block_close(config2context(config), CloseSequential);
2453 CondSuffix -> IfSuffix ${
2455 // This is where we close scope of the whole
2456 // "for" or "while" statement
2457 var_block_close(config2context(config), CloseSequential);
2459 | CasePart CondSuffix ${
2461 $1->next = $0->casepart;
2466 CasePart -> Newlines case Expression OpenScope Block ${
2467 $0 = calloc(1,sizeof(struct casepart));
2470 var_block_close(config2context(config), CloseParallel);
2472 | case Expression OpenScope Block ${
2473 $0 = calloc(1,sizeof(struct casepart));
2476 var_block_close(config2context(config), CloseParallel);
2480 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2481 | Newlines else OpenScope Block ${
2482 $0 = new(cond_statement);
2484 var_block_close(config2context(config), CloseElse);
2486 | else OpenScope Block ${
2487 $0 = new(cond_statement);
2489 var_block_close(config2context(config), CloseElse);
2491 | Newlines else OpenScope CondStatement ${
2492 $0 = new(cond_statement);
2494 var_block_close(config2context(config), CloseElse);
2496 | else OpenScope CondStatement ${
2497 $0 = new(cond_statement);
2499 var_block_close(config2context(config), CloseElse);
2504 // These scopes are closed in CondSuffix
2505 ForPart -> for OpenScope SimpleStatements ${
2506 $0 = reorder_bilist($<3);
2508 | for OpenScope Block ${
2512 ThenPart -> then OpenScope SimpleStatements ${
2513 $0 = reorder_bilist($<3);
2514 var_block_close(config2context(config), CloseSequential);
2516 | then OpenScope Block ${
2518 var_block_close(config2context(config), CloseSequential);
2521 ThenPartNL -> ThenPart OptNL ${
2525 // This scope is closed in CondSuffix
2526 WhileHead -> while OpenScope Block ${
2531 ForThen -> ForPart OptNL ThenPartNL ${
2539 // This scope is closed in CondSuffix
2540 WhilePart -> while OpenScope Expression Block ${
2541 $0.type = Xcond_statement;
2545 | WhileHead OptNL do Block ${
2546 $0.type = Xcond_statement;
2551 IfPart -> if OpenScope Expression OpenScope Block ${
2552 $0.type = Xcond_statement;
2555 var_block_close(config2context(config), CloseParallel);
2557 | if OpenScope Block OptNL then OpenScope Block ${
2558 $0.type = Xcond_statement;
2561 var_block_close(config2context(config), CloseParallel);
2565 // This scope is closed in CondSuffix
2566 SwitchPart -> switch OpenScope Expression ${
2569 | switch OpenScope Block ${
2573 ###### print exec cases
2575 case Xcond_statement:
2577 struct cond_statement *cs = cast(cond_statement, e);
2578 struct casepart *cp;
2580 do_indent(indent, "for");
2581 if (bracket) printf(" {\n"); else printf(":\n");
2582 print_exec(cs->forpart, indent+1, bracket);
2585 do_indent(indent, "} then {\n");
2587 do_indent(indent, "then:\n");
2588 print_exec(cs->thenpart, indent+1, bracket);
2590 if (bracket) do_indent(indent, "}\n");
2594 if (cs->condpart && cs->condpart->type == Xbinode &&
2595 cast(binode, cs->condpart)->op == Block) {
2597 do_indent(indent, "while {\n");
2599 do_indent(indent, "while:\n");
2600 print_exec(cs->condpart, indent+1, bracket);
2602 do_indent(indent, "} do {\n");
2604 do_indent(indent, "do:\n");
2605 print_exec(cs->dopart, indent+1, bracket);
2607 do_indent(indent, "}\n");
2609 do_indent(indent, "while ");
2610 print_exec(cs->condpart, 0, bracket);
2615 print_exec(cs->dopart, indent+1, bracket);
2617 do_indent(indent, "}\n");
2622 do_indent(indent, "switch");
2624 do_indent(indent, "if");
2625 if (cs->condpart && cs->condpart->type == Xbinode &&
2626 cast(binode, cs->condpart)->op == Block) {
2631 print_exec(cs->condpart, indent+1, bracket);
2633 do_indent(indent, "}\n");
2635 do_indent(indent, "then:\n");
2636 print_exec(cs->thenpart, indent+1, bracket);
2640 print_exec(cs->condpart, 0, bracket);
2646 print_exec(cs->thenpart, indent+1, bracket);
2648 do_indent(indent, "}\n");
2653 for (cp = cs->casepart; cp; cp = cp->next) {
2654 do_indent(indent, "case ");
2655 print_exec(cp->value, -1, 0);
2660 print_exec(cp->action, indent+1, bracket);
2662 do_indent(indent, "}\n");
2665 do_indent(indent, "else");
2670 print_exec(cs->elsepart, indent+1, bracket);
2672 do_indent(indent, "}\n");
2677 ###### propagate exec cases
2678 case Xcond_statement:
2680 // forpart and dopart must return Vnone
2681 // thenpart must return Vnone if there is a dopart,
2682 // otherwise it is like elsepart.
2684 // be bool if there is not casepart
2685 // match casepart->values if there is a switchpart
2686 // either be bool or match casepart->value if there
2688 // elsepart, casepart->action must match there return type
2689 // expected of this statement.
2690 struct cond_statement *cs = cast(cond_statement, prog);
2691 struct casepart *cp;
2693 t = propagate_types(cs->forpart, c, ok, Vnone, 0);
2694 if (!vtype_compat(Vnone, t, 0))
2696 t = propagate_types(cs->dopart, c, ok, Vnone, 0);
2697 if (!vtype_compat(Vnone, t, 0))
2700 t = propagate_types(cs->thenpart, c, ok, Vnone, 0);
2701 if (!vtype_compat(Vnone, t, 0))
2704 if (cs->casepart == NULL)
2705 propagate_types(cs->condpart, c, ok, Vbool, 0);
2707 /* Condpart must match case values, with bool permitted */
2709 for (cp = cs->casepart;
2710 cp && (t == Vunknown); cp = cp->next)
2711 t = propagate_types(cp->value, c, ok, Vunknown, 0);
2712 if (t == Vunknown && cs->condpart)
2713 t = propagate_types(cs->condpart, c, ok, Vunknown, Rboolok);
2714 // Now we have a type (I hope) push it down
2715 if (t != Vunknown) {
2716 for (cp = cs->casepart; cp; cp = cp->next)
2717 propagate_types(cp->value, c, ok, t, 0);
2718 propagate_types(cs->condpart, c, ok, t, Rboolok);
2721 // (if)then, else, and case parts must return expected type.
2722 if (!cs->dopart && type == Vunknown)
2723 type = propagate_types(cs->thenpart, c, ok, Vunknown, rules);
2724 if (type == Vunknown)
2725 type = propagate_types(cs->elsepart, c, ok, Vunknown, rules);
2726 for (cp = cs->casepart;
2727 cp && type == Vunknown;
2729 type = propagate_types(cp->action, c, ok, Vunknown, rules);
2730 if (type != Vunknown) {
2732 propagate_types(cs->thenpart, c, ok, type, rules);
2733 propagate_types(cs->elsepart, c, ok, type, rules);
2734 for (cp = cs->casepart; cp ; cp = cp->next)
2735 propagate_types(cp->action, c, ok, type, rules);
2741 ###### interp exec cases
2742 case Xcond_statement:
2744 struct value v, cnd;
2745 struct casepart *cp;
2746 struct cond_statement *c = cast(cond_statement, e);
2749 interp_exec(c->forpart);
2752 cnd = interp_exec(c->condpart);
2755 if (!(cnd.vtype == Vnone ||
2756 (cnd.vtype == Vbool && cnd.bool != 0)))
2758 // cnd is Vnone or Vbool, doesn't need to be freed
2760 interp_exec(c->dopart);
2763 v = interp_exec(c->thenpart);
2764 if (v.vtype != Vnone || !c->dopart)
2768 } while (c->dopart);
2770 for (cp = c->casepart; cp; cp = cp->next) {
2771 v = interp_exec(cp->value);
2772 if (value_cmp(v, cnd) == 0) {
2775 return interp_exec(cp->action);
2781 return interp_exec(c->elsepart);
2786 ### Finally the whole program.
2788 Somewhat reminiscent of Pascal a (current) Ocean program starts with
2789 the keyword "program" and a list of variable names which are assigned
2790 values from command line arguments. Following this is a `block` which
2791 is the code to execute.
2793 As this is the top level, several things are handled a bit
2795 The whole program is not interpreted by `interp_exec` as that isn't
2796 passed the argument list which the program requires. Similarly type
2797 analysis is a bit more interesting at this level.
2802 ###### Parser: grammar
2805 Program -> program OpenScope Varlist Block OptNL ${
2808 $0->left = reorder_bilist($<3);
2810 var_block_close(config2context(config), CloseSequential);
2811 if (config2context(config)->scope_stack) abort();
2814 tok_err(config2context(config),
2815 "error: unhandled parse error.", &$1);
2818 Varlist -> Varlist ArgDecl ${
2827 ArgDecl -> IDENTIFIER ${ {
2828 struct variable *v = var_decl(config2context(config), $1.txt);
2835 ###### print binode cases
2837 do_indent(indent, "program");
2838 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
2840 print_exec(b2->left, 0, 0);
2846 print_exec(b->right, indent+1, bracket);
2848 do_indent(indent, "}\n");
2851 ###### propagate binode cases
2852 case Program: abort();
2854 ###### core functions
2856 static int analyse_prog(struct exec *prog, struct parse_context *c)
2858 struct binode *b = cast(binode, prog);
2865 propagate_types(b->right, c, &ok, Vnone, 0);
2870 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2871 struct var *v = cast(var, b->left);
2872 if (v->var->val.vtype == Vunknown) {
2873 v->var->where_set = b;
2874 val_init(&v->var->val, Vstr);
2877 b = cast(binode, prog);
2880 propagate_types(b->right, c, &ok, Vnone, 0);
2885 /* Make sure everything is still consistent */
2886 propagate_types(b->right, c, &ok, Vnone, 0);
2890 static void interp_prog(struct exec *prog, char **argv)
2892 struct binode *p = cast(binode, prog);
2898 al = cast(binode, p->left);
2900 struct var *v = cast(var, al->left);
2901 struct value *vl = &v->var->val;
2903 if (argv[0] == NULL) {
2904 printf("Not enough args\n");
2907 al = cast(binode, al->right);
2909 if (!parse_value(vl, argv[0]))
2913 v = interp_exec(p->right);
2917 ###### interp binode cases
2918 case Program: abort();
2920 ## And now to test it out.
2922 Having a language requires having a "hello world" program. I'll
2923 provide a little more than that: a program that prints "Hello world"
2924 finds the GCD of two numbers, prints the first few elements of
2925 Fibonacci, and performs a binary search for a number.
2927 ###### File: oceani.mk
2930 @echo "===== TEST ====="
2931 ./oceani --section "test: hello" oceani.mdc 55 33
2936 print "Hello World, what lovely oceans you have!"
2937 /* When a variable is defined in both branches of an 'if',
2938 * and used afterwards, the variables are merged.
2944 print "Is", A, "bigger than", B,"? ", bigger
2945 /* If a variable is not used after the 'if', no
2946 * merge happens, so types can be different
2950 print A, "is more than twice", B, "?", double
2953 print "double", A, "is only", double
2962 print "GCD of", A, "and", B,"is", a
2964 print a, "is not positive, cannot calculate GCD"
2966 print b, "is not positive, cannot calculate GCD"
2971 print "Fibonacci:", f1,f2,
2972 then togo = togo - 1
2980 /* Binary search... */
2985 mid := (lo + hi) / 2
2997 print "Yay, I found", target
2999 print "Closest I found was", mid