--- /dev/null
+<link href="http://jasonm23.github.io/markdown-css-themes/markdown.css" rel="stylesheet"/>
+# Ocean Interpreter - Falls Creek version
+
+Ocean is intended to be an compiled language, so this interpreter is
+not targeted at being the final product. It is very much an intermediate
+stage, and fills that role in two distinct ways.
+
+Firstly, it exists as a platform to experiment with the early language
+design. An interpreter is easy to write and easy to get working, so
+the barrier for entry is lower if I aim to start with an interpreter.
+
+Secondly, the plan for the Ocean compiler is to write it in the
+[Ocean language](http://ocean-lang.org). To achieve this we naturally
+need some sort of boot-strap process and this interpreter - written in
+portable C - will fill that role. It will be used to bootstrap the
+Ocean compiler.
+
+Two features that are not needed to fill either of these roles are
+performance and completeness. The interpreter only needs to be fast
+enough to run small test programs and occasionally to run the compiler
+on itself. It only needs to be complete enough to test aspects of the
+design which are developed before the compiler is working, and to run
+the compiler on itself. Any features not used by the compiler when
+compiling itself are superfluous. They may be included anyway, but
+they may not.
+
+Nonetheless, the interpreter should end up being reasonably complete,
+and any performance bottlenecks which appear and are easily fixed, will
+be.
+
+## Current version
+
+This initial version of the interpreter exists to test out the
+structured statement providing conditions and iteration. Clearly we
+need some minimal other functionality so that values can be tested and
+instructions iterated over. All that functionality is clearly not
+normative at this stage (not that anything is **really** normative
+yet) and will change, so early test code will certainly break.
+
+Beyond the structured statement and the `use` statement which is
+intimately related to it we have:
+
+ - "blocks" of multiple statements.
+ - `pass`: a statement which does nothing.
+ - variables: any identifier is assumed to store a number, string,
+ or Boolean.
+ - expressions: `+`, `-`, `*`, `/` can apply to integers and `++` can
+ catenate strings. `and`, `or`, `not` manipulate Booleans, and
+ normal comparison operators can work on all three types.
+ - assignments: can assign the value of an expression to a variable.
+ - `print`: will print the values in a list of expressions.
+ - `program`: is given a list of identifiers to initialize from
+ arguments.
+
+## Naming
+
+Versions of the interpreter which obviously do not support a complete
+language will be named after creeks and streams. This one is Falls
+Creek.
+
+Once we have something reasonably resembling a complete language, the
+names of rivers will be used.
+Early versions of the compiler will be named after seas. Major
+releases of the compiler will be named after oceans. Hopefully I will
+be finished once I get to the Pacific Ocean release.
+
+## Outline
+
+As well as parsing and executing a program, the interpreter can print
+out the program from the parsed internal structure. This is useful
+for validating the parsing.
+So the main requirements of the interpreter are:
+
+- Parse the program
+- Analyse the parsed program to ensure consistency
+- print the program
+- execute the program
+
+This is all performed by a single C program extracted with
+`parsergen`.
+
+There will be two formats for printing the program a default and one
+that uses bracketing. So an extra command line option is needed for
+that.
+
+###### File: oceani.mk
+
+ myCFLAGS := -Wall -g -fplan9-extensions
+ CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
+ myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
+ LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
+ ## libs
+ all :: oceani
+ oceani.c oceani.h : oceani.mdc parsergen
+ ./parsergen -o oceani --LALR --tag Parser oceani.mdc
+ oceani.mk: oceani.mdc md2c
+ ./md2c oceani.mdc
+
+ oceani: oceani.o
+
+###### Parser: header
+ ## macros
+ ## ast
+ struct parse_context {
+ struct token_config config;
+ ## parse context
+ };
+
+###### Parser: code
+
+ #include <unistd.h>
+ #include <stdlib.h>
+ #include <fcntl.h>
+ #include <errno.h>
+ #include <sys/mman.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <locale.h>
+ #include <malloc.h>
+ #include "mdcode.h"
+ #include "scanner.h"
+ #include "parser.h"
+
+ ## includes
+
+ #include "oceani.h"
+
+ ## forward decls
+ ## value functions
+ ## ast functions
+ ## core functions
+
+ #include <getopt.h>
+ static char Usage[] = "Usage: oceani --trace --print --noexec prog.ocn\n";
+ static const struct option long_options[] = {
+ {"trace", 0, NULL, 't'},
+ {"print", 0, NULL, 'p'},
+ {"noexec", 0, NULL, 'n'},
+ {"brackets", 0, NULL, 'b'},
+ {NULL, 0, NULL, 0},
+ };
+ const char *options = "tpnb";
+ int main(int argc, char *argv[])
+ {
+ int fd;
+ int len;
+ char *file;
+ struct section *s;
+ struct parse_context context = {
+ .config = {
+ .ignored = (1 << TK_line_comment)
+ | (1 << TK_block_comment),
+ .number_chars = ".,_+-",
+ .word_start = "_",
+ .word_cont = "_",
+ },
+ };
+ int doprint=0, dotrace=0, doexec=1, brackets=0;
+ struct exec **prog;
+ int opt;
+ while ((opt = getopt_long(argc, argv, options, long_options, NULL))
+ != -1) {
+ switch(opt) {
+ case 't': dotrace=1; break;
+ case 'p': doprint=1; break;
+ case 'n': doexec=0; break;
+ case 'b': brackets=1; break;
+ default: fprintf(stderr, Usage);
+ exit(1);
+ }
+ }
+ if (optind >= argc) {
+ fprintf(stderr, "oceani: no input file given\n");
+ exit(1);
+ }
+ fd = open(argv[optind], O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
+ exit(1);
+ }
+ len = lseek(fd, 0, 2);
+ file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+ s = code_extract(file, file+len, NULL);
+ if (!s) {
+ fprintf(stderr, "oceani: could not find any code in %s\n",
+ argv[optind]);
+ exit(1);
+ }
+ prog = parse_oceani(s->code, &context.config,
+ dotrace ? stderr : NULL);
+ if (prog && doprint)
+ print_exec(*prog, 0, brackets);
+ if (prog && doexec) {
+ if (!analyse_prog(*prog, &context)) {
+ fprintf(stderr, "oceani: type error in program\n");
+ exit(1);
+ }
+ interp_prog(*prog, argv+optind+1);
+ }
+ if (prog) {
+ free_exec(*prog);
+ free(prog);
+ }
+ while (s) {
+ struct section *t = s->next;
+ code_free(s->code);
+ free(s);
+ s = t;
+ }
+ ## free context
+ exit(0);
+ }
+
+### Analysis
+
+These four requirements of parse, analyse, print, interpret apply to
+each language element individually so that is how most of the code
+will be structured.
+
+Three of the four are fairly self explanatory. The one that requires
+a little explanation is the analysis step.
+
+The current language design does not require variables to be declared,
+but they must have a single type. Different operations impose
+different requirements on the variables, for example addition requires
+both arguments to be numeric, and assignment requires the variable on
+the left to have the same type as the expression on the right.
+
+Analysis involves propagating these type requirements around
+consequently setting the type of each variable. If any requirements
+are violated (e.g. a string is compared with a number) or if a
+variable needs to have two different types, then an error is raised
+and the program will not run.
+
+Determining the types of all variables early is important for
+processing command line arguments. These can be assigned to any type
+of variable, but we must first know the correct type so any required
+conversion can happen. If a variable is associated with a command
+line argument but no type can be interpreted (e.g. the variable is
+only ever used in a `print` statement), then the type is set to
+'string'.
+
+If the type of a variable cannot be determined at all, then it is set
+to be a number and given a unique value. This allows it to fill the
+role of a name in an enumerated type, which is useful for testing the
+`switch` statement.
+
+## Data Structures
+
+One last introductory step before detailing the language elements and
+providing their four requirements is to establish the data structures
+to store these elements.
+
+There are two key objects that we need to work with: executable
+elements which comprise the program, and values which the program
+works with. Between these is the set of variables which hold the
+values.
+
+### Values
+
+Values can be numbers, which we represent as multi-precision
+fractions, strings and Booleans. When analysing the program we also
+need to allow for places where no value is meaningful (`Vnone`) and
+where we don't know what type to expect yet (`Vunknown`).
+A 2 character 'tail' is included in each value as the scanner wants
+to parse that from the end of numbers and we need somewhere to put
+it. It is currently ignored but one day might allow for
+e.g. "imaginary" numbers.
+
+Values are never shared, they are always copied when used, and freed
+when no longer needed.
+
+###### includes
+ #include <gmp.h>
+ #include "string.h"
+ #include "number.h"
+
+###### libs
+ myLDLIBS := libnumber.o libstring.o -lgmp
+ LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
+
+###### ast
+ struct value {
+ enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool} vtype;
+ union {
+ struct text str;
+ mpq_t num;
+ int bool;
+ };
+ char tail[2];
+ };
+
+###### ast functions
+ void free_value(struct value v)
+ {
+ switch (v.vtype) {
+ case Vnone:
+ case Vunknown: break;
+ case Vstr: free(v.str.txt); break;
+ case Vnum: mpq_clear(v.num); break;
+ case Vbool: break;
+ }
+ }
+
+###### value functions
+
+ static void val_init(struct value *val, enum vtype type)
+ {
+ val->vtype = type;
+ switch(type) {
+ case Vnone:abort();
+ case Vunknown: break;
+ case Vnum:
+ mpq_init(val->num); break;
+ case Vstr:
+ val->str.txt = malloc(1);
+ val->str.len = 0;
+ break;
+ case Vbool:
+ val->bool = 0;
+ break;
+ }
+ }
+
+ static struct value dup_value(struct value v)
+ {
+ struct value rv;
+ rv.vtype = v.vtype;
+ switch (rv.vtype) {
+ case Vnone:
+ case Vunknown: break;
+ case Vbool:
+ rv.bool = v.bool;
+ break;
+ case Vnum:
+ mpq_init(rv.num);
+ mpq_set(rv.num, v.num);
+ break;
+ case Vstr:
+ rv.str.len = v.str.len;
+ rv.str.txt = malloc(rv.str.len);
+ memcpy(rv.str.txt, v.str.txt, v.str.len);
+ break;
+ }
+ return rv;
+ }
+
+ static int value_cmp(struct value left, struct value right)
+ {
+ int cmp;
+ if (left.vtype != right.vtype)
+ return left.vtype - right.vtype;
+ switch (left.vtype) {
+ case Vnum: cmp = mpq_cmp(left.num, right.num); break;
+ case Vstr: cmp = text_cmp(left.str, right.str); break;
+ case Vbool: cmp = left.bool - right.bool; break;
+ case Vnone:
+ case Vunknown: cmp = 0;
+ }
+ return cmp;
+ }
+
+ static struct text text_join(struct text a, struct text b)
+ {
+ struct text rv;
+ rv.len = a.len + b.len;
+ rv.txt = malloc(rv.len);
+ memcpy(rv.txt, a.txt, a.len);
+ memcpy(rv.txt+a.len, b.txt, b.len);
+ return rv;
+ }
+
+ static void print_value(struct value v)
+ {
+ switch (v.vtype) {
+ case Vunknown:
+ printf("*Unknown*"); break;
+ case Vnone:
+ printf("*no-value*"); break;
+ case Vstr:
+ printf("%.*s", v.str.len, v.str.txt); break;
+ case Vbool:
+ printf("%s", v.bool ? "True":"False"); break;
+ case Vnum:
+ {
+ mpf_t fl;
+ mpf_init2(fl, 20);
+ mpf_set_q(fl, v.num);
+ gmp_printf("%Fg", fl);
+ mpf_clear(fl);
+ break;
+ }
+ }
+ }
+
+ static int parse_value(struct value *vl, char *arg)
+ {
+ struct text tx;
+ switch(vl->vtype) {
+ case Vunknown:
+ case Vnone:
+ return 0;
+ case Vstr:
+ vl->str.txt = arg;
+ vl->str.len = strlen(arg);
+ break;
+ case Vnum:
+ tx.txt = arg; tx.len = strlen(tx.txt);
+ if (number_parse(vl->num, vl->tail, tx) == 0)
+ mpq_init(vl->num);
+ break;
+ case Vbool:
+ if (strcasecmp(arg, "true") == 0 ||
+ strcmp(arg, "1") == 0)
+ vl->bool = 1;
+ else if (strcasecmp(arg, "false") == 0 ||
+ strcmp(arg, "0") == 0)
+ vl->bool = 2;
+ else {
+ printf("Bad bool: %s\n", arg);
+ return 0;
+ }
+ break;
+ }
+ return 1;
+ }
+
+### Variables
+
+Variables are simply named values. We store them in a linked list
+sorted by name and use sequential search and insertion sort.
+
+This linked list is stored in the parse context so that reduce
+functions can find or add variables, and so the analysis phase can
+ensure that every variable gets a type.
+
+###### ast
+
+ struct variable {
+ struct text name;
+ struct variable *next;
+ struct value val;
+ };
+
+###### macros
+
+ #define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+###### parse context
+
+ struct variable *varlist;
+
+###### free context
+ while (context.varlist) {
+ struct variable *v = context.varlist;
+ context.varlist = v->next;
+ free_value(v->val);
+ free(v);
+ }
+
+###### ast functions
+
+ static struct variable *find_variable(struct token_config *conf, struct text s)
+ {
+ struct variable **l = &container_of(conf, struct parse_context,
+ config)->varlist;
+ struct variable *n;
+ int cmp = 1;
+
+ while (*l &&
+ (cmp = text_cmp((*l)->name, s)) < 0)
+ l = & (*l)->next;
+ if (cmp == 0)
+ return *l;
+ n = calloc(1, sizeof(*n));
+ n->name = s;
+ n->val.vtype = Vunknown;
+ n->next = *l;
+ *l = n;
+ return n;
+ }
+
+### Executables
+
+Executables can be lots of different things. In many cases an
+executable is just an operation combined with one or two other
+executables. This allows for expressions and lists etc. Other times
+an executable is something quite specific like a constant or variable
+name. So we define a `struct exec` to be a general executable with a
+type, and a `struct binode` which is a subclass of `exec` and forms a
+node in a binary tree and holding an operation. There will be other
+subclasses, and to access these we need to be able to `cast` the
+`exec` into the various other types.
+
+###### macros
+ #define cast(structname, pointer) ({ \
+ const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
+ if (__mptr && *__mptr != X##structname) abort(); \
+ (struct structname *)( (char *)__mptr);})
+
+ #define new(structname) ({ \
+ struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
+ __ptr->type = X##structname; \
+ __ptr;})
+
+###### ast
+ enum exec_types {
+ Xbinode,
+ ## exec type
+ };
+ struct exec {
+ enum exec_types type;
+ };
+ struct binode {
+ struct exec;
+ enum Btype {
+ ## Binode types
+ } op;
+ struct exec *left, *right;
+ };
+
+Each different type of `exec` node needs a number of functions
+defined, a bit like methods. We must be able to be able to free it,
+print it, analyse it and execute it. Once we have specific `exec`
+types we will need to parse them to. Let's take this a bit more
+slowly.
+
+#### Freeing
+
+The parser generator requires as `free_foo` function for each struct
+that stores attributes and they will be `exec`s of subtypes there-of.
+So we need `free_exec` which can handle all the subtypes, and we need
+`free_binode`.
+
+###### ast functions
+
+ static void free_binode(struct binode *b)
+ {
+ if (!b)
+ return;
+ free_exec(b->left);
+ free_exec(b->right);
+ free(b);
+ }
+
+###### core functions
+ static void free_exec(struct exec *e)
+ {
+ if (!e)
+ return;
+ switch(e->type) {
+ ## free exec cases
+ }
+ }
+
+###### forward decls
+
+ static void free_exec(struct exec *e);
+
+###### free exec cases
+ case Xbinode: free_binode(cast(binode, e)); break;
+
+#### Printing
+
+Printing an `exec` requires that we know the current indent level for
+printing line-oriented components. As will become clear later, we
+also want to know what sort of bracketing to use.
+
+###### ast functions
+
+ static void do_indent(int i, char *str)
+ {
+ while (i--)
+ printf(" ");
+ printf("%s", str);
+ }
+
+###### core functions
+ static void print_binode(struct binode *b, int indent, int bracket)
+ {
+ struct binode *b2;
+ switch(b->op) {
+ ## print binode cases
+ }
+ }
+
+ static void print_exec(struct exec *e, int indent, int bracket)
+ {
+ switch (e->type) {
+ case Xbinode:
+ print_binode(cast(binode, e), indent, bracket); break;
+ ## print exec cases
+ }
+ }
+
+###### forward decls
+
+ static void print_exec(struct exec *e, int indent, int bracket);
+
+#### Analysing
+
+As discusses, analysis involves propagating type requirements around
+the program and looking for errors.
+
+So propagate_types is passed a type that the `exec` is expected to return,
+and returns the type that it does return, either of which can be `Vunknown`.
+An `ok` flag is passed by reference. It is set to `0` when an error is
+found, and `2` when any change is made. If it remains unchanged at
+`1`, then no more propagation is needed.
+
+###### core functions
+
+ static enum vtype propagate_types(struct exec *prog, enum vtype type,
+ int *ok)
+ {
+ enum vtype t;
+
+ if (!prog) {
+ if (type != Vunknown && type != Vnone)
+ *ok = 0;
+ return Vnone;
+ }
+
+ switch (prog->type) {
+ case Xbinode:
+ {
+ struct binode *b = cast(binode, prog);
+ switch (b->op) {
+ ## propagate binode cases
+ }
+ break;
+ }
+ ## propagate exec cases
+ }
+ return Vnone;
+ }
+
+#### Interpreting
+
+Interpreting an `exec` doesn't require anything but the `exec`. State
+is stored in variables and each variable will be directly linked from
+within the `exec` tree. The exception to this is the whole `program`
+which needs to look at command line arguments. The `program` will be
+interpreted separately.
+
+Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
+
+###### core functions
+
+ static struct value interp_exec(struct exec *e)
+ {
+ struct value rv;
+ rv.vtype = Vnone;
+ if (!e)
+ return rv;
+
+ switch(e->type) {
+ case Xbinode:
+ {
+ struct binode *b = cast(binode, e);
+ struct value left, right;
+ left.vtype = right.vtype = Vnone;
+ switch (b->op) {
+ ## interp binode cases
+ }
+ free_value(left); free_value(right);
+ break;
+ }
+ ## interp exec cases
+ }
+ return rv;
+ }
+
+## Language elements
+
+Each language element needs to be parsed, printed, analysed,
+interpreted, and freed. There are several, so let's just start with
+the easy ones and work our way up.
+
+### Values
+
+We have already met values and separate objects. When manifest
+constants appear in the program text that must result in an executable
+which has a constant value. So the `val` structure embeds a value in
+an executable.
+
+###### exec type
+ Xval,
+
+###### ast
+ struct val {
+ struct exec;
+ struct value val;
+ };
+
+###### Grammar
+
+ $*val
+ Value -> True ${
+ $0 = new(val);
+ $0->val.vtype = Vbool;
+ $0->val.bool = 1;
+ }$
+ | False ${
+ $0 = new(val);
+ $0->val.vtype = Vbool;
+ $0->val.bool = 0;
+ }$
+ | NUMBER ${
+ $0 = new(val);
+ $0->val.vtype = Vnum;
+ if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
+ mpq_init($0->val.num);
+ }$
+ | STRING ${
+ $0 = new(val);
+ $0->val.vtype = Vstr;
+ string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
+ }$
+
+###### print exec cases
+ case Xval:
+ {
+ struct val *v = cast(val, e);
+ if (v->val.vtype == Vstr)
+ printf("\"");
+ print_value(v->val);
+ if (v->val.vtype == Vstr)
+ printf("\"");
+ break;
+ }
+
+###### propagate exec cases
+ case Xval:
+ {
+ struct val *val = cast(val, prog);
+ if (type != Vunknown &&
+ type != val->val.vtype)
+ *ok = 0;
+ return val->val.vtype;
+ }
+
+###### interp exec cases
+ case Xval:
+ return dup_value(cast(val, e)->val);
+
+###### ast functions
+ void free_val(struct val *v)
+ {
+ if (!v)
+ return;
+ free_value(v->val);
+ free(v);
+ }
+
+###### free exec cases
+ case Xval: free_val(cast(val, e)); break;
+
+###### ast functions
+ // Move all nodes from 'b' to 'rv', reversing the order.
+ // In 'b' 'left' is a list, and 'right' is the last node.
+ // In 'rv', left' is the first node and 'right' is a list.
+ struct binode *reorder_bilist(struct binode *b)
+ {
+ struct binode *rv = NULL;
+
+ while (b) {
+ struct exec *t = b->right;
+ b->right = rv;
+ rv = b;
+ if (b->left)
+ b = cast(binode, b->left);
+ else
+ b = NULL;
+ rv->left = t;
+ }
+ return rv;
+ }
+
+### Variables
+
+Just as we used as `val` to wrap a value into an `exec`, we similarly
+need a `var` to wrap a `variable` into an exec. While each `val`
+contained a copy of the value, each `var` hold a link to the variable
+because it really is the same variable no matter where it appears.
+
+###### exec type
+ Xvar,
+
+###### ast
+ struct var {
+ struct exec;
+ struct variable *var;
+ };
+
+###### Grammar
+ $*var
+ Variable -> IDENTIFIER ${
+ $0 = new(var);
+ $0->var = find_variable(config, $1.txt);
+ }$
+
+###### print exec cases
+ case Xvar:
+ {
+ struct var *v = cast(var, e);
+ printf("%.*s", v->var->name.len, v->var->name.txt);
+ break;
+ }
+
+###### propagate exec cases
+
+ case Xvar:
+ {
+ struct var *var = cast(var, prog);
+ if (var->var->val.vtype == Vunknown) {
+ if (type != Vunknown && *ok != 0) {
+ val_init(&var->var->val, type);
+ *ok = 2;
+ }
+ return type;
+ }
+ if (type == Vunknown)
+ return var->var->val.vtype;
+ if (type != var->var->val.vtype)
+ *ok = 0;
+ return type;
+ }
+
+###### interp exec cases
+ case Xvar:
+ return dup_value(cast(var, e)->var->val);
+
+###### ast functions
+
+ void free_var(struct var *v)
+ {
+ free(v);
+ }
+
+###### free exec cases
+ case Xvar: free_var(cast(var, e)); break;
+
+### Expressions: Boolean
+
+Our first user of the `binode` will be expressions, and particularly
+Boolean expressions. As I haven't implemented precedence in the
+parser generator yet, we need different names from each precedence
+level used by expressions. The outer most or lowest level precedence
+are Boolean `or` `and`, and `not` which form and `Expression` our of `BTerm`s
+and `BFact`s.
+
+###### Binode types
+ And,
+ Or,
+ Not,
+
+####### Grammar
+
+ $*binode
+ Expression -> Expression or BTerm ${
+ $0 = new(binode);
+ $0->op = Or;
+ $0->left = $<1;
+ $0->right = $<3;
+ }$
+ | BTerm ${ $0 = $<1; }$
+
+ BTerm -> BTerm and BFact ${
+ $0 = new(binode);
+ $0->op = And;
+ $0->left = $<1;
+ $0->right = $<3;
+ }$
+ | BFact ${ $0 = $<1; }$
+
+ BFact -> not BFact ${
+ $0 = new(binode);
+ $0->op = Not;
+ $0->right = $<2;
+ }$
+ ## other BFact
+
+###### print binode cases
+ case And:
+ print_exec(b->left, -1, 0);
+ printf(" and ");
+ print_exec(b->right, -1, 0);
+ break;
+ case Or:
+ print_exec(b->left, -1, 0);
+ printf(" or ");
+ print_exec(b->right, -1, 0);
+ break;
+ case Not:
+ printf("not ");
+ print_exec(b->right, -1, 0);
+ break;
+
+###### propagate binode cases
+ case And:
+ case Or:
+ case Not:
+ /* both must be Vbool, result is Vbool */
+ propagate_types(b->left, Vbool, ok);
+ propagate_types(b->right, Vbool, ok);
+ if (type != Vbool && type != Vunknown)
+ *ok = 0;
+ return Vbool;
+
+###### interp binode cases
+ case And:
+ rv = interp_exec(b->left);
+ right = interp_exec(b->right);
+ rv.bool = rv.bool && right.bool;
+ break;
+ case Or:
+ rv = interp_exec(b->left);
+ right = interp_exec(b->right);
+ rv.bool = rv.bool || right.bool;
+ break;
+ case Not:
+ rv = interp_exec(b->right);
+ rv.bool = !rv.bool;
+ break;
+
+### Expressions: Comparison
+
+Of slightly higher precedence that Boolean expressions are
+Comparisons.
+A comparison takes arguments of any type, but the two types must be
+the same.
+
+To simplify the parsing we introduce an `eop` which can return an
+expression operator.
+
+###### ast
+ struct eop {
+ enum Btype op;
+ };
+
+###### ast functions
+ static void free_eop(struct eop *e)
+ {
+ if (e)
+ free(e);
+ }
+
+###### Binode types
+ Less,
+ Gtr,
+ LessEq,
+ GtrEq,
+ Eql,
+ NEql,
+
+###### other BFact
+ | Expr CMPop Expr ${
+ $0 = new(binode);
+ $0->op = $2.op;
+ $0->left = $<1;
+ $0->right = $<3;
+ }$
+ | Expr ${ $0 = $<1; }$
+
+###### Grammar
+
+ $eop
+ CMPop -> < ${ $0.op = Less; }$
+ | > ${ $0.op = Gtr; }$
+ | <= ${ $0.op = LessEq; }$
+ | >= ${ $0.op = GtrEq; }$
+ | == ${ $0.op = Eql; }$
+ | != ${ $0.op = NEql; }$
+
+###### print binode cases
+
+ case Less:
+ case LessEq:
+ case Gtr:
+ case GtrEq:
+ case Eql:
+ case NEql:
+ print_exec(b->left, -1, 0);
+ switch(b->op) {
+ case Less: printf(" < "); break;
+ case LessEq: printf(" <= "); break;
+ case Gtr: printf(" > "); break;
+ case GtrEq: printf(" >= "); break;
+ case Eql: printf(" == "); break;
+ case NEql: printf(" != "); break;
+ default: abort();
+ }
+ print_exec(b->right, -1, 0);
+ break;
+
+###### propagate binode cases
+ case Less:
+ case LessEq:
+ case Gtr:
+ case GtrEq:
+ case Eql:
+ case NEql:
+ /* Both must match, result is Vbool */
+ t = propagate_types(b->left, Vunknown, ok);
+ if (t != Vunknown)
+ propagate_types(b->right, t, ok);
+ else {
+ t = propagate_types(b->right, Vunknown, ok);
+ if (t != Vunknown)
+ t = propagate_types(b->left, t, ok);
+ }
+ if (type != Vbool && type != Vunknown)
+ *ok = 0;
+ return Vbool;
+
+###### interp binode cases
+ case Less:
+ case LessEq:
+ case Gtr:
+ case GtrEq:
+ case Eql:
+ case NEql:
+ {
+ int cmp;
+ left = interp_exec(b->left);
+ right = interp_exec(b->right);
+ cmp = value_cmp(left, right);
+ rv.vtype = Vbool;
+ switch (b->op) {
+ case Less: rv.bool = cmp < 0; break;
+ case LessEq: rv.bool = cmp <= 0; break;
+ case Gtr: rv.bool = cmp > 0; break;
+ case GtrEq: rv.bool = cmp >= 0; break;
+ case Eql: rv.bool = cmp == 0; break;
+ case NEql: rv.bool = cmp != 0; break;
+ default: rv.bool = 0; break;
+ }
+ break;
+ }
+
+### Expressions: The rest
+
+The remaining expressions with the highest precedence are arithmetic
+and string concatenation. There are `Expr`, `Term`, and `Factor`.
+The `Factor` is where the `Value` and `Variable` that we already have
+are included.
+
+`+` and `-` are both infix and prefix operations (where they are
+absolute value and negation). These have different operator names.
+
+We also have a 'Bracket' operator which records where parentheses were
+found. This make it easy to reproduce these when printing. Once
+precedence is handled better I might be able to discard this.
+
+###### Binode types
+ Plus, Minus,
+ Times, Divide,
+ Concat,
+ Absolute, Negate,
+ Bracket,
+
+###### Grammar
+
+ $*binode
+ Expr -> Expr Eop Term ${
+ $0 = new(binode);
+ $0->op = $2.op;
+ $0->left = $<1;
+ $0->right = $<3;
+ }$
+ | Term ${ $0 = $<1; }$
+
+ Term -> Term Top Factor ${
+ $0 = new(binode);
+ $0->op = $2.op;
+ $0->left = $<1;
+ $0->right = $<3;
+ }$
+ | Factor ${ $0 = $<1; }$
+
+ Factor -> ( Expression ) ${
+ $0 = new(binode);
+ $0->op = Bracket;
+ $0->right = $<2;
+ }$
+ | Uop Factor ${
+ $0 = new(binode);
+ $0->op = $1.op;
+ $0->right = $<2;
+ }$
+ | Value ${ $0 = (struct binode *)$<1; }$
+ | Variable ${ $0 = (struct binode *)$<1; }$
+
+ $eop
+ Eop -> + ${ $0.op = Plus; }$
+ | - ${ $0.op = Minus; }$
+
+ Uop -> + ${ $0.op = Absolute; }$
+ | - ${ $0.op = Negate; }$
+
+ Top -> * ${ $0.op = Times; }$
+ | / ${ $0.op = Divide; }$
+ | ++ ${ $0.op = Concat; }$
+
+###### print binode cases
+ case Plus:
+ case Minus:
+ case Times:
+ case Divide:
+ case Concat:
+ print_exec(b->left, indent, 0);
+ switch(b->op) {
+ case Plus: printf(" + "); break;
+ case Minus: printf(" - "); break;
+ case Times: printf(" * "); break;
+ case Divide: printf(" / "); break;
+ case Concat: printf(" ++ "); break;
+ default: abort();
+ }
+ print_exec(b->right, indent, 0);
+ break;
+ case Absolute:
+ printf("+");
+ print_exec(b->right, indent, 0);
+ break;
+ case Negate:
+ printf("-");
+ print_exec(b->right, indent, 0);
+ break;
+ case Bracket:
+ printf("(");
+ print_exec(b->right, indent, 0);
+ printf(")");
+ break;
+
+###### propagate binode cases
+ case Plus:
+ case Minus:
+ case Times:
+ case Divide:
+ /* both must be numbers, result is Vnum */
+ case Absolute:
+ case Negate:
+ /* as propagate_types ignores a NULL,
+ * unary ops fit here too */
+ propagate_types(b->left, Vnum, ok);
+ propagate_types(b->right, Vnum, ok);
+ if (type != Vnum && type != Vunknown)
+ *ok = 0;
+ return Vnum;
+
+ case Concat:
+ /* both must be Vstr, result is Vstr */
+ propagate_types(b->left, Vstr, ok);
+ propagate_types(b->right, Vstr, ok);
+ if (type != Vstr && type != Vunknown)
+ *ok = 0;
+ return Vstr;
+
+ case Bracket:
+ return propagate_types(b->right, type, ok);
+
+###### interp binode cases
+
+ case Plus:
+ rv = interp_exec(b->left);
+ right = interp_exec(b->right);
+ mpq_add(rv.num, rv.num, right.num);
+ break;
+ case Minus:
+ rv = interp_exec(b->left);
+ right = interp_exec(b->right);
+ mpq_sub(rv.num, rv.num, right.num);
+ break;
+ case Times:
+ rv = interp_exec(b->left);
+ right = interp_exec(b->right);
+ mpq_mul(rv.num, rv.num, right.num);
+ break;
+ case Divide:
+ rv = interp_exec(b->left);
+ right = interp_exec(b->right);
+ mpq_div(rv.num, rv.num, right.num);
+ break;
+ case Negate:
+ rv = interp_exec(b->right);
+ mpq_neg(rv.num, rv.num);
+ break;
+ case Absolute:
+ rv = interp_exec(b->right);
+ mpq_abs(rv.num, rv.num);
+ break;
+ case Bracket:
+ rv = interp_exec(b->right);
+ break;
+ case Concat:
+ left = interp_exec(b->left);
+ right = interp_exec(b->right);
+ rv.vtype = Vstr;
+ rv.str = text_join(left.str, right.str);
+ break;
+
+### Blocks, Statements, and Statement lists.
+
+Now that we have expressions out of the way we need to turn to
+statements. There are simple statements and more complex statements.
+Simple statements do not contain newlines, complex statements do.
+
+Statements often come in sequences and we have corresponding simple
+statement lists and complex statement lists.
+The former comprise only simple statements separated by semicolons.
+The later comprise complex statements and simple statement lists. They are
+separated by newlines. Thus the semicolon is only used to separate
+simple statements on the one line. This may be overly restrictive,
+but I'm not sure I every want a complex statement to share a line with
+anything else.
+
+Note that a simple statement list can still use multiple lines if
+subsequent lines are indented, so
+
+###### Example: wrapped simple statement list
+
+ a = b; c = d;
+ e = f; print g
+
+is a single simple statement list. This might allow room for
+confusion, so I'm not set on it yet.
+
+A simple statement list needs no extra syntax. A complex statement
+list has two syntactic forms. It can be enclosed in braces (much like
+C blocks), or it can be introduced by a colon and continue until an
+unindented newline (much like Python blocks). With this extra syntax
+it is referred to as a block.
+
+Note that a block does not have to include any newlines if it only
+contains simple statements. So both of:
+
+ if condition: a=b; d=f
+
+ if condition { a=b; print f }
+
+are valid.
+
+In either case the list is constructed from a `binode` list with
+`Block` as the operator. When parsing the list it is most convenient
+to append to the end, so a list is a list and a statement. When using
+the list it is more convenient to consider a list to be a statement
+and a list. So we need a function to re-order a list.
+`reorder_bilist` serves this purpose.
+
+The only stand-alone statement we introduce at this stage is `pass`
+which does nothing and is represented as a `NULL` pointer in a `Block`
+list.
+
+###### Binode types
+ Block,
+
+###### Grammar
+
+ $void
+ OptNL -> Newlines
+ |
+
+ Newlines -> NEWLINE
+ | Newlines NEWLINE
+
+ $*binode
+ Open -> {
+ | NEWLINE {
+ Close -> }
+ | NEWLINE }
+ Block -> Open Statementlist Close ${ $0 = $<2; }$
+ | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
+ | : Statementlist ${ $0 = $<2; }$
+ | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
+
+ Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
+
+ ComplexStatements -> ComplexStatements ComplexStatement ${
+ $0 = new(binode);
+ $0->op = Block;
+ $0->left = $<1;
+ $0->right = $<2;
+ }$
+ | ComplexStatements NEWLINE ${ $0 = $<1; }$
+ | ComplexStatement ${
+ $0 = new(binode);
+ $0->op = Block;
+ $0->left = NULL;
+ $0->right = $<1;
+ }$
+
+ $*exec
+ ComplexStatement -> SimpleStatements NEWLINE ${
+ $0 = reorder_bilist($<1);
+ }$
+ ## ComplexStatement Grammar
+
+ $*binode
+ SimpleStatements -> SimpleStatements ; SimpleStatement ${
+ $0 = new(binode);
+ $0->op = Block;
+ $0->left = $<1;
+ $0->right = $<3;
+ }$
+ | SimpleStatement ${
+ $0 = new(binode);
+ $0->op = Block;
+ $0->left = NULL;
+ $0->right = $<1;
+ }$
+ | SimpleStatements ; ${ $0 = $<1; }$
+
+ SimpleStatement -> pass ${ $0 = NULL; }$
+ ## SimpleStatement Grammar
+
+###### print binode cases
+ case Block:
+ if (indent < 0) {
+ // simple statement
+ if (b->left == NULL)
+ printf("pass");
+ else
+ print_exec(b->left, indent, 0);
+ if (b->right) {
+ printf("; ");
+ print_exec(b->right, indent, 0);
+ }
+ } else {
+ // block, one per line
+ if (b->left == NULL)
+ do_indent(indent, "pass\n");
+ else
+ print_exec(b->left, indent, bracket);
+ if (b->right)
+ print_exec(b->right, indent, bracket);
+ }
+ break;
+
+###### propagate binode cases
+ case Block:
+ {
+ /* If any statement returns something other then Vnone
+ * then all such must return same type.
+ * As each statement may be Vnone or something else,
+ * we must always pass Vunknown down, otherwise an incorrect
+ * error might occur.
+ */
+ struct binode *e;
+
+ for (e = b; e; e = cast(binode, e->right)) {
+ t = propagate_types(e->left, Vunknown, ok);
+ if (t != Vunknown && t != Vnone) {
+ if (type == Vunknown)
+ type = t;
+ else if (t != type)
+ *ok = 0;
+ }
+ }
+ return type;
+ }
+
+###### interp binode cases
+ case Block:
+ while (rv.vtype == Vnone &&
+ b) {
+ if (b->left)
+ rv = interp_exec(b->left);
+ b = cast(binode, b->right);
+ }
+ break;
+
+### The Print statement
+
+`print` is a simple statement that takes a comma-separated list of
+expressions and prints the values separated by spaces and terminated
+by a newline. No control of formatting is possible.
+
+`print` faces the same list-ordering issue as blocks, and uses the
+same solution.
+
+###### Binode types
+ Print,
+
+###### SimpleStatement Grammar
+
+ | print ExpressionList ${
+ $0 = reorder_bilist($<2);
+ }$
+ | print ExpressionList , ${
+ $0 = new(binode);
+ $0->op = Print;
+ $0->right = NULL;
+ $0->left = $<2;
+ $0 = reorder_bilist($0);
+ }$
+ | print ${
+ $0 = new(binode);
+ $0->op = Print;
+ $0->right = NULL;
+ }$
+
+###### Grammar
+
+ $*binode
+ ExpressionList -> ExpressionList , Expression ${
+ $0 = new(binode);
+ $0->op = Print;
+ $0->left = $<1;
+ $0->right = $<3;
+ }$
+ | Expression ${
+ $0 = new(binode);
+ $0->op = Print;
+ $0->left = NULL;
+ $0->right = $<1;
+ }$
+
+###### print binode cases
+
+ case Print:
+ do_indent(indent, "print");
+ while (b) {
+ if (b->left) {
+ printf(" ");
+ print_exec(b->left, -1, 0);
+ if (b->right)
+ printf(",");
+ }
+ b = cast(binode, b->right);
+ }
+ if (indent >= 0)
+ printf("\n");
+ break;
+
+###### propagate binode cases
+
+ case Print:
+ /* don't care but all must be consistent */
+ propagate_types(b->left, Vunknown, ok);
+ propagate_types(b->right, Vunknown, ok);
+ break;
+
+###### interp binode cases
+
+ case Print:
+ {
+ char sep = 0;
+ int eol = 1;
+ for ( ; b; b = cast(binode, b->right))
+ if (b->left) {
+ if (sep)
+ putchar(sep);
+ left = interp_exec(b->left);
+ print_value(left);
+ free_value(left);
+ if (b->right)
+ sep = ' ';
+ } else if (sep)
+ eol = 0;
+ left.vtype = Vnone;
+ if (eol)
+ printf("\n");
+ break;
+ }
+
+###### Assignment statement
+
+An assignment will assign a value to a variable. The analysis phase
+ensures that the type will be correct so the interpreted just needs to
+perform the calculation.
+
+###### Binode types
+ Assign,
+
+###### SimpleStatement Grammar
+ | Variable = Expression ${
+ $0 = new(binode);
+ $0->op = Assign;
+ $0->left = $<1;
+ $0->right =$<3;
+ }$
+
+###### print binode cases
+
+ case Assign:
+ do_indent(indent, "");
+ print_exec(b->left, indent, 0);
+ printf(" = ");
+ print_exec(b->right, indent, 0);
+ if (indent >= 0)
+ printf("\n");
+ break;
+
+###### propagate binode cases
+
+ case Assign:
+ /* Both must match, result is Vnone */
+ t = propagate_types(b->left, Vunknown, ok);
+ if (t != Vunknown)
+ propagate_types(b->right, t, ok);
+ else {
+ t = propagate_types(b->right, Vunknown, ok);
+ if (t != Vunknown)
+ t = propagate_types(b->left, t, ok);
+ }
+ return Vnone;
+
+###### interp binode cases
+
+ case Assign:
+ {
+ struct variable *v = cast(var, b->left)->var;
+ right = interp_exec(b->right);
+ free_value(v->val);
+ v->val = right;
+ right.vtype = Vunknown;
+ break;
+ }
+
+### The `use` statement
+
+The `use` statement is the last "simple" statement. It is needed when
+the condition in a conditional statement is a block. `use` works much
+like `return` in C, but only completes the `condition`, not the whole
+function.
+
+###### Binode types
+ Use,
+
+###### SimpleStatement Grammar
+ | use Expression ${
+ $0 = new(binode);
+ $0->op = Use;
+ $0->right = $<2;
+ }$
+
+###### print binode cases
+
+ case Use:
+ do_indent(indent, "use ");
+ print_exec(b->right, -1, 0);
+ if (indent >= 0)
+ printf("\n");
+ break;
+
+###### propagate binode cases
+
+ case Use:
+ /* result matches value */
+ return propagate_types(b->right, type, ok);
+
+###### interp binode cases
+
+ case Use:
+ rv = interp_exec(b->right);
+ break;
+
+### The Conditional Statement
+
+This is the biggy and currently the only complex statement.
+This subsumes `if`, `while`, `do/while`, `switch`, and some part of
+`for`. It is comprised of a number of parts, all of which are
+optional though set combinations apply.
+
+If there is a `forpart`, it is executed first, only once.
+If there is a `dopart`, then it is executed repeatedly providing
+always that the `condpart` or `cond`, if present, does not return a non-True
+value. `condpart` can fail to return any value if it simply executes
+to completion. This is treated the same as returning True.
+
+If there is a `thenpart` it will be executed whenever the `condpart`
+or `cond` returns True (or does not return), but this will happen
+*after* `dopart` (when present).
+
+If `elsepart` is present it will be executed at most once when the
+condition returns False. If there are any `casepart`s, they will be
+executed when the condition returns a matching value.
+
+The particular sorts of values allowed in case parts has not yet been
+determined in the language design.
+
+The cond_statement cannot fit into a `binode` so a new `exec` is
+defined.
+
+###### exec type
+ Xcond_statement,
+
+###### ast
+ struct casepart {
+ struct exec *value;
+ struct exec *action;
+ struct casepart *next;
+ };
+ struct cond_statement {
+ struct exec;
+ struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
+ struct casepart *casepart;
+ };
+
+###### ast functions
+
+ static void free_casepart(struct casepart *cp)
+ {
+ while (cp) {
+ struct casepart *t;
+ free_exec(cp->value);
+ free_exec(cp->action);
+ t = cp->next;
+ free(cp);
+ cp = t;
+ }
+ }
+
+ void free_cond_statement(struct cond_statement *s)
+ {
+ if (!s)
+ return;
+ free_exec(s->forpart);
+ free_exec(s->condpart);
+ free_exec(s->dopart);
+ free_exec(s->thenpart);
+ free_exec(s->elsepart);
+ free_casepart(s->casepart);
+ free(s);
+ }
+
+###### free exec cases
+ case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
+
+###### ComplexStatement Grammar
+ | CondStatement ${ $0 = $<1; }$
+
+###### Grammar
+
+ $*cond_statement
+ CondStatement -> ForPart OptNL WhilePart CondSuffix ${
+ $0 = $<4;
+ $0->forpart = $<1;
+ $0->condpart = $3.condpart; $3.condpart = NULL;
+ $0->dopart = $3.dopart; $3.dopart = NULL;
+ }$
+ | WhilePart CondSuffix ${
+ $0 = $<2;
+ $0->condpart = $1.condpart; $1.condpart = NULL;
+ $0->dopart = $1.dopart; $1.dopart = NULL;
+ }$
+ | SwitchPart CondSuffix ${
+ $0 = $<2;
+ $0->condpart = $<1;
+ }$
+ | IfPart IfSuffix ${
+ $0 = $<2;
+ $0->condpart = $1.condpart; $1.condpart = NULL;
+ $0->thenpart = $1.thenpart; $1.thenpart = NULL;
+ }$
+ CondSuffix -> IfSuffix ${ $0 = $<1; }$
+ | Newlines case Expression Block CondSuffix ${ {
+ struct casepart *cp = calloc(1, sizeof(*cp));
+ $0 = $<5;
+ cp->value = $<3;
+ cp->action = $<4;
+ cp->next = $0->casepart;
+ $0->casepart = cp;
+ } }$
+ | case Expression Block CondSuffix ${ {
+ struct casepart *cp = calloc(1, sizeof(*cp));
+ $0 = $<4;
+ cp->value = $<2;
+ cp->action = $<3;
+ cp->next = $0->casepart;
+ $0->casepart = cp;
+ } }$
+
+ IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
+ | Newlines else Block ${
+ $0 = new(cond_statement);
+ $0->elsepart = $<3;
+ }$
+ | else Block ${
+ $0 = new(cond_statement);
+ $0->elsepart = $<2;
+ }$
+ | Newlines else CondStatement ${
+ $0 = new(cond_statement);
+ $0->elsepart = $<3;
+ }$
+ | else CondStatement ${
+ $0 = new(cond_statement);
+ $0->elsepart = $<2;
+ }$
+
+
+ $*exec
+ ForPart -> for SimpleStatements ${
+ $0 = reorder_bilist($<2);
+ }$
+ | for Block ${
+ $0 = $<2;
+ }$
+
+ WhileHead -> while Block ${
+ $0 = $<2;
+ }$
+
+ $cond_statement
+ WhilePart -> while Expression Block ${
+ $0.type = Xcond_statement;
+ $0.condpart = $<2;
+ $0.dopart = $<3;
+ }$
+ | WhileHead OptNL do Block ${
+ $0.type = Xcond_statement;
+ $0.condpart = $<1;
+ $0.dopart = $<4;
+ }$
+
+ IfPart -> if Expression Block ${
+ $0.type = Xcond_statement;
+ $0.condpart = $<2;
+ $0.thenpart = $<3;
+ }$
+ | if Block OptNL then Block ${
+ $0.type = Xcond_statement;
+ $0.condpart = $<2;
+ $0.thenpart = $<5;
+ }$
+
+ $*exec
+ SwitchPart -> switch Expression ${
+ $0 = $<2;
+ }$
+ | switch Block ${
+ $0 = $<2;
+ }$
+
+###### print exec cases
+
+ case Xcond_statement:
+ {
+ struct cond_statement *cs = cast(cond_statement, e);
+ struct casepart *cp;
+ if (cs->forpart) {
+ do_indent(indent, "for");
+ if (bracket) printf(" {\n"); else printf(":\n");
+ print_exec(cs->forpart, indent+1, bracket);
+ if (bracket) do_indent(indent, "}\n");
+ }
+ if (cs->dopart) {
+ // a loop
+ if (cs->condpart && cs->condpart->type == Xbinode &&
+ cast(binode, cs->condpart)->op == Block) {
+ if (bracket)
+ do_indent(indent, "while {\n");
+ else
+ do_indent(indent, "while:\n");
+ print_exec(cs->condpart, indent+1, bracket);
+ if (cs->thenpart) {
+ if (bracket)
+ do_indent(indent, "} then {\n");
+ else
+ do_indent(indent, "then:\n");
+ print_exec(cs->thenpart, indent+1, bracket);
+ if (bracket)
+ do_indent(indent, "}\n");
+ }
+ if (bracket)
+ do_indent(indent, "} do {\n");
+ else
+ do_indent(indent, "do:\n");
+ print_exec(cs->dopart, indent+1, bracket);
+ if (bracket)
+ do_indent(indent, "}\n");
+ } else {
+ do_indent(indent, "while ");
+ print_exec(cs->condpart, 0, bracket);
+ if (bracket)
+ printf(" {\n");
+ else
+ printf(":\n");
+ print_exec(cs->dopart, indent+1, bracket);
+ if (bracket)
+ do_indent(indent, "}\n");
+ }
+ } else {
+ // a condition
+ if (cs->casepart)
+ do_indent(indent, "switch");
+ else
+ do_indent(indent, "if");
+ if (cs->condpart && cs->condpart->type == Xbinode &&
+ cast(binode, cs->condpart)->op == Block) {
+ printf(":\n");
+ print_exec(cs->condpart, indent+1, bracket);
+ do_indent(indent, "then:\n");
+ print_exec(cs->thenpart, indent+1, bracket);
+ } else {
+ printf(" ");
+ print_exec(cs->condpart, 0, bracket);
+ printf(":\n");
+ print_exec(cs->thenpart, indent+1, bracket);
+ }
+ }
+ for (cp = cs->casepart; cp; cp = cp->next) {
+ do_indent(indent, "case ");
+ print_exec(cp->value, -1, 0);
+ printf(":\n");
+ print_exec(cp->action, indent+1, bracket);
+ }
+ if (cs->elsepart) {
+ do_indent(indent, "else:\n");
+ print_exec(cs->elsepart, indent+1, bracket);
+ }
+ break;
+ }
+
+###### propagate exec cases
+ case Xcond_statement:
+ {
+ // forpart and dopart must return Vnone
+ // condpart must be bool or match casepart->values
+ // thenpart, elsepart, casepart->action must match
+ // or be Vnone
+ struct cond_statement *cs = cast(cond_statement, prog);
+ struct casepart *c;
+
+ t = propagate_types(cs->forpart, Vnone, ok);
+ if (t != Vunknown && t != Vnone)
+ *ok = 0;
+ t = propagate_types(cs->dopart, Vnone, ok);
+ if (t != Vunknown && t != Vnone)
+ *ok = 0;
+ if (cs->casepart == NULL)
+ propagate_types(cs->condpart, Vbool, ok);
+ else {
+ t = Vunknown;
+ for (c = cs->casepart;
+ c && (t == Vunknown); c = c->next)
+ t = propagate_types(c->value, Vunknown, ok);
+ if (t == Vunknown && cs->condpart)
+ t = propagate_types(cs->condpart, Vunknown, ok);
+ // Now we have a type (I hope) push it down
+ if (t != Vunknown) {
+ for (c = cs->casepart; c; c = c->next)
+ propagate_types(c->value, t, ok);
+ propagate_types(cs->condpart, t, ok);
+ }
+ }
+ if (type == Vunknown || type == Vnone)
+ type = propagate_types(cs->thenpart, Vunknown, ok);
+ if (type == Vunknown || type == Vnone)
+ type = propagate_types(cs->elsepart, Vunknown, ok);
+ for (c = cs->casepart;
+ c && (type == Vunknown || type == Vnone);
+ c = c->next)
+ type = propagate_types(c->action, Vunknown, ok);
+ if (type != Vunknown && type != Vnone) {
+ propagate_types(cs->thenpart, type, ok);
+ propagate_types(cs->elsepart, type, ok);
+ for (c = cs->casepart; c ; c = c->next)
+ propagate_types(c->action, type, ok);
+ return type;
+ } else
+ return Vunknown;
+ }
+
+###### interp exec cases
+ case Xcond_statement:
+ {
+ struct value v, cnd;
+ struct casepart *cp;
+ struct cond_statement *c = cast(cond_statement, e);
+ if (c->forpart)
+ interp_exec(c->forpart);
+ do {
+ if (c->condpart)
+ cnd = interp_exec(c->condpart);
+ else
+ cnd.vtype = Vnone;
+ if (!(cnd.vtype == Vnone ||
+ (cnd.vtype == Vbool && cnd.bool != 0)))
+ break;
+ if (c->dopart) {
+ free_value(cnd);
+ interp_exec(c->dopart);
+ }
+ if (c->thenpart) {
+ v = interp_exec(c->thenpart);
+ if (v.vtype != Vnone || !c->dopart)
+ return v;
+ free_value(v);
+ }
+ } while (c->dopart);
+
+ for (cp = c->casepart; cp; cp = cp->next) {
+ v = interp_exec(cp->value);
+ if (value_cmp(v, cnd) == 0) {
+ free_value(v);
+ free_value(cnd);
+ return interp_exec(cp->action);
+ }
+ free_value(v);
+ }
+ free_value(cnd);
+ if (c->elsepart)
+ return interp_exec(c->elsepart);
+ v.vtype = Vnone;
+ return v;
+ }
+
+### Finally the whole program.
+
+Somewhat reminiscent of Pascal a (current) Ocean program starts with
+the keyword "program" and list of variable names which are assigned
+values from command line arguments. Following this is a `block` which
+is the code to execute.
+
+As this is the top level, several things are handled a bit
+differently.
+The whole program is not interpreted by `interp_exec` as that isn't
+passed the argument list which the program requires. Similarly type
+analysis is a bit more interesting at this level.
+
+###### Binode types
+ Program,
+
+###### Parser: grammar
+
+ $*binode
+ Program -> program Varlist Block OptNL ${
+ $0 = new(binode);
+ $0->op = Program;
+ $0->left = reorder_bilist($<2);
+ $0->right = $<3;
+ }$
+
+ Varlist -> Varlist Variable ${
+ $0 = new(binode);
+ $0->op = Program;
+ $0->left = $<1;
+ $0->right = $<2;
+ }$
+ | ${ $0 = NULL; }$
+ ## Grammar
+
+###### print binode cases
+ case Program:
+ do_indent(indent, "program");
+ for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
+ printf(" ");
+ print_exec(b2->left, 0, 0);
+ }
+ if (bracket)
+ printf(" {\n");
+ else
+ printf(":\n");
+ print_exec(b->right, indent+1, bracket);
+ if (bracket)
+ do_indent(indent, "}\n");
+ break;
+
+###### propagate binode cases
+ case Program: abort();
+
+###### core functions
+
+ static int analyse_prog(struct exec *prog, struct parse_context *c)
+ {
+ struct binode *b = cast(binode, prog);
+ struct variable *v;
+ int ok = 1;
+ int uniq = 314159;
+ do {
+ ok = 1;
+ propagate_types(b->right, Vnone, &ok);
+ } while (ok == 2);
+ if (!ok)
+ return 0;
+
+ for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
+ struct var *v = cast(var, b->left);
+ if (v->var->val.vtype == Vunknown)
+ val_init(&v->var->val, Vstr);
+ }
+ b = cast(binode, prog);
+ do {
+ ok = 1;
+ propagate_types(b->right, Vnone, &ok);
+ } while (ok == 2);
+ if (!ok)
+ return 0;
+
+ for (v = c->varlist; v; v = v->next)
+ if (v->val.vtype == Vunknown) {
+ v->val.vtype = Vnum;
+ mpq_init(v->val.num);
+ mpq_set_ui(v->val.num, uniq, 1);
+ uniq++;
+ }
+ /* Make sure everything is still consistent */
+ propagate_types(b->right, Vnone, &ok);
+ return !!ok;
+ }
+
+ static void interp_prog(struct exec *prog, char **argv)
+ {
+ struct binode *p = cast(binode, prog);
+ struct binode *al = cast(binode, p->left);
+ struct value v;
+
+ while (al) {
+ struct var *v = cast(var, al->left);
+ struct value *vl = &v->var->val;
+
+ if (argv[0] == NULL) {
+ printf("Not enough args\n");
+ exit(1);
+ }
+ al = cast(binode, al->right);
+ free_value(*vl);
+ if (!parse_value(vl, argv[0]))
+ exit(1);
+ argv++;
+ }
+ v = interp_exec(p->right);
+ free_value(v);
+ }
+
+###### interp binode cases
+ case Program: abort();