+###### parse context
+
+ struct variable *in_scope;
+
+All variables with the same name are linked together using the
+'previous' link. Those variable that have been affirmatively merged all
+have a 'merged' pointer that points to one primary variable - the most
+recently declared instance. When merging variables, we need to also
+adjust the 'merged' pointer on any other variables that had previously
+been merged with the one that will no longer be primary.
+
+A variable that is no longer the most recent instance of a name may
+still have "pending" scope, if it might still be merged with most
+recent instance. These variables don't really belong in the
+"in_scope" list, but are not immediately removed when a new instance
+is found. Instead, they are detected and ignored when considering the
+list of in_scope names.
+
+The storage of the value of a variable will be described later. For now
+we just need to know that when a variable goes out of scope, it might
+need to be freed. For this we need to be able to find it, so assume that
+`var_value()` will provide that.
+
+###### variable fields
+ struct variable *merged;
+
+###### ast functions
+
+ static void variable_merge(struct variable *primary, struct variable *secondary)
+ {
+ struct variable *v;
+
+ if (primary->merged)
+ // shouldn't happen
+ primary = primary->merged; // NOTEST
+
+ for (v = primary->previous; v; v=v->previous)
+ if (v == secondary || v == secondary->merged ||
+ v->merged == secondary ||
+ (v->merged && v->merged == secondary->merged)) {
+ v->scope = OutScope;
+ v->merged = primary;
+ }
+ }
+
+###### forward decls
+ static struct value *var_value(struct parse_context *c, struct variable *v);
+
+###### free context vars
+
+ while (context.varlist) {
+ struct binding *b = context.varlist;
+ struct variable *v = b->var;
+ context.varlist = b->next;
+ free(b);
+ while (v) {
+ struct variable *t = v;
+
+ v = t->previous;
+ free_value(t->type, var_value(&context, t));
+ if (t->depth == 0)
+ // This is a global constant
+ free_exec(t->where_decl);
+ free(t);
+ }
+ }
+
+#### Manipulating Bindings
+
+When a name is conditionally visible, a new declaration discards the
+old binding - the condition lapses. Conversely a usage of the name
+affirms the visibility and extends it to the end of the containing
+block - i.e. the block that contains both the original declaration and
+the latest usage. This is determined from `min_depth`. When a
+conditionally visible variable gets affirmed like this, it is also
+merged with other conditionally visible variables with the same name.
+
+When we parse a variable declaration we either report an error if the
+name is currently bound, or create a new variable at the current nest
+depth if the name is unbound or bound to a conditionally scoped or
+pending-scope variable. If the previous variable was conditionally
+scoped, it and its homonyms becomes out-of-scope.
+
+When we parse a variable reference (including non-declarative assignment
+"foo = bar") we report an error if the name is not bound or is bound to
+a pending-scope variable; update the scope if the name is bound to a
+conditionally scoped variable; or just proceed normally if the named
+variable is in scope.
+
+When we exit a scope, any variables bound at this level are either
+marked out of scope or pending-scoped, depending on whether the scope
+was sequential or parallel. Here a "parallel" scope means the "then"
+or "else" part of a conditional, or any "case" or "else" branch of a
+switch. Other scopes are "sequential".
+
+When exiting a parallel scope we check if there are any variables that
+were previously pending and are still visible. If there are, then
+there weren't redeclared in the most recent scope, so they cannot be
+merged and must become out-of-scope. If it is not the first of
+parallel scopes (based on `child_count`), we check that there was a
+previous binding that is still pending-scope. If there isn't, the new
+variable must now be out-of-scope.
+
+When exiting a sequential scope that immediately enclosed parallel
+scopes, we need to resolve any pending-scope variables. If there was
+no `else` clause, and we cannot determine that the `switch` was exhaustive,
+we need to mark all pending-scope variable as out-of-scope. Otherwise
+all pending-scope variables become conditionally scoped.
+
+###### ast
+ enum closetype { CloseSequential, CloseParallel, CloseElse };
+
+###### ast functions
+
+ static struct variable *var_decl(struct parse_context *c, struct text s)
+ {
+ struct binding *b = find_binding(c, s);
+ struct variable *v = b->var;
+
+ switch (v ? v->scope : OutScope) {
+ case InScope:
+ /* Caller will report the error */
+ return NULL;
+ case CondScope:
+ for (;
+ v && v->scope == CondScope;
+ v = v->previous)
+ v->scope = OutScope;
+ break;
+ default: break;
+ }
+ v = calloc(1, sizeof(*v));
+ v->previous = b->var;
+ b->var = v;
+ v->name = b;
+ v->min_depth = v->depth = c->scope_depth;
+ v->scope = InScope;
+ v->in_scope = c->in_scope;
+ c->in_scope = v;
+ return v;
+ }
+
+ static struct variable *var_ref(struct parse_context *c, struct text s)
+ {
+ struct binding *b = find_binding(c, s);
+ struct variable *v = b->var;
+ struct variable *v2;
+
+ switch (v ? v->scope : OutScope) {
+ case OutScope:
+ case PendingScope:
+ /* Caller will report the error */
+ return NULL;
+ case CondScope:
+ /* All CondScope variables of this name need to be merged
+ * and become InScope
+ */
+ v->depth = v->min_depth;
+ v->scope = InScope;
+ for (v2 = v->previous;
+ v2 && v2->scope == CondScope;
+ v2 = v2->previous)
+ variable_merge(v, v2);
+ break;
+ case InScope:
+ break;
+ }
+ return v;
+ }
+
+ static void var_block_close(struct parse_context *c, enum closetype ct)
+ {
+ /* Close off all variables that are in_scope */
+ struct variable *v, **vp, *v2;
+
+ scope_pop(c);
+ for (vp = &c->in_scope;
+ v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
+ ) {
+ if (v->name->var == v) switch (ct) {
+ case CloseElse:
+ case CloseParallel: /* handle PendingScope */
+ switch(v->scope) {
+ case InScope:
+ case CondScope:
+ if (c->scope_stack->child_count == 1)
+ v->scope = PendingScope;
+ else if (v->previous &&
+ v->previous->scope == PendingScope)
+ v->scope = PendingScope;
+ else if (v->type == Tlabel)
+ v->scope = PendingScope;
+ else if (v->name->var == v)
+ v->scope = OutScope;
+ if (ct == CloseElse) {
+ /* All Pending variables with this name
+ * are now Conditional */
+ for (v2 = v;
+ v2 && v2->scope == PendingScope;
+ v2 = v2->previous)
+ v2->scope = CondScope;
+ }
+ break;
+ case PendingScope:
+ for (v2 = v;
+ v2 && v2->scope == PendingScope;
+ v2 = v2->previous)
+ if (v2->type != Tlabel)
+ v2->scope = OutScope;
+ break;
+ case OutScope: break;
+ }
+ break;
+ case CloseSequential:
+ if (v->type == Tlabel)
+ v->scope = PendingScope;
+ switch (v->scope) {
+ case InScope:
+ v->scope = OutScope;
+ break;
+ case PendingScope:
+ /* There was no 'else', so we can only become
+ * conditional if we know the cases were exhaustive,
+ * and that doesn't mean anything yet.
+ * So only labels become conditional..
+ */
+ for (v2 = v;
+ v2 && v2->scope == PendingScope;
+ v2 = v2->previous)
+ if (v2->type == Tlabel) {
+ v2->scope = CondScope;
+ v2->min_depth = c->scope_depth;
+ } else
+ v2->scope = OutScope;
+ break;
+ case CondScope:
+ case OutScope: break;
+ }
+ break;
+ }
+ if (v->scope == OutScope || v->name->var != v)
+ *vp = v->in_scope;
+ else
+ vp = &v->in_scope;
+ }
+ }
+
+#### Storing Values
+
+The value of a variable is store separately from the variable, on an
+analogue of a stack frame. There are (currently) two frames that can be
+active. A global frame which currently only stores constants, and a
+stacked frame which stores local variables. Each variable knows if it
+is global or not, and what its index into the frame is.
+
+Values in the global frame are known immediately they are relevant, so
+the frame needs to be reallocated as it grows so it can store those
+values. The local frame doesn't get values until the interpreted phase
+is started, so there is no need to allocate until the size is known.
+
+###### variable fields
+ short frame_pos;
+ short global;
+
+###### parse context
+
+ short global_size, global_alloc;
+ short local_size;
+ void *global, *local;
+
+###### ast functions
+
+ static struct value *var_value(struct parse_context *c, struct variable *v)
+ {
+ if (!v->global) {
+ if (!c->local || !v->type)
+ return NULL;
+ if (v->frame_pos + v->type->size > c->local_size) {
+ printf("INVALID frame_pos\n"); // NOTEST
+ exit(2); // NOTEST
+ }
+ return c->local + v->frame_pos;
+ }
+ if (c->global_size > c->global_alloc) {
+ int old = c->global_alloc;
+ c->global_alloc = (c->global_size | 1023) + 1024;
+ c->global = realloc(c->global, c->global_alloc);
+ memset(c->global + old, 0, c->global_alloc - old);
+ }
+ return c->global + v->frame_pos;
+ }
+
+ static struct value *global_alloc(struct parse_context *c, struct type *t,
+ struct variable *v, struct value *init)
+ {
+ struct value *ret;
+ struct variable scratch;
+
+ if (t->prepare_type)
+ t->prepare_type(c, t, 1);
+
+ if (c->global_size & (t->align - 1))
+ c->global_size = (c->global_size + t->align) & ~(t->align-1);
+ if (!v) {
+ v = &scratch;
+ v->type = t;
+ }
+ v->frame_pos = c->global_size;
+ v->global = 1;
+ c->global_size += v->type->size;
+ ret = var_value(c, v);
+ if (init)
+ memcpy(ret, init, t->size);
+ else
+ val_init(t, ret);
+ return ret;
+ }
+
+As global values are found -- struct field initializers, labels etc --
+`global_alloc()` is called to record the value in the global frame.
+
+When the program is fully parsed, we need to walk the list of variables
+to find any that weren't merged away and that aren't global, and to
+calculate the frame size and assign a frame position for each variable.
+For this we have `scope_finalize()`.
+
+###### ast functions
+
+ static void scope_finalize(struct parse_context *c)
+ {
+ struct binding *b;
+
+ for (b = c->varlist; b; b = b->next) {
+ struct variable *v;
+ for (v = b->var; v; v = v->previous) {
+ struct type *t = v->type;
+ if (v->merged && v->merged != v)
+ continue;
+ if (v->global)
+ continue;
+ if (c->local_size & (t->align - 1))
+ c->local_size = (c->local_size + t->align) & ~(t->align-1);
+ v->frame_pos = c->local_size;
+ c->local_size += v->type->size;
+ }
+ }
+ c->local = calloc(1, c->local_size);
+ }
+
+###### free context vars
+ free(context.global);
+ free(context.local);
+
+### Executables
+
+Executables can be lots of different things. In many cases an
+executable is just an operation combined with one or two other
+executables. This allows for expressions and lists etc. Other times an
+executable is something quite specific like a constant or variable name.
+So we define a `struct exec` to be a general executable with a type, and
+a `struct binode` which is a subclass of `exec`, forms a node in a
+binary tree, and holds an operation. There will be other subclasses,
+and to access these we need to be able to `cast` the `exec` into the
+various other types. The first field in any `struct exec` is the type
+from the `exec_types` enum.
+
+###### macros
+ #define cast(structname, pointer) ({ \
+ const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
+ if (__mptr && *__mptr != X##structname) abort(); \
+ (struct structname *)( (char *)__mptr);})
+
+ #define new(structname) ({ \
+ struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
+ __ptr->type = X##structname; \
+ __ptr->line = -1; __ptr->column = -1; \
+ __ptr;})
+
+ #define new_pos(structname, token) ({ \
+ struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
+ __ptr->type = X##structname; \
+ __ptr->line = token.line; __ptr->column = token.col; \
+ __ptr;})
+
+###### ast
+ enum exec_types {
+ Xbinode,
+ ## exec type
+ };
+ struct exec {
+ enum exec_types type;
+ int line, column;
+ };
+ struct binode {
+ struct exec;
+ enum Btype {
+ ## Binode types
+ } op;
+ struct exec *left, *right;
+ };
+
+###### ast functions
+
+ static int __fput_loc(struct exec *loc, FILE *f)
+ {
+ if (!loc)
+ return 0; // NOTEST
+ if (loc->line >= 0) {
+ fprintf(f, "%d:%d: ", loc->line, loc->column);
+ return 1;
+ }
+ if (loc->type == Xbinode)
+ return __fput_loc(cast(binode,loc)->left, f) ||
+ __fput_loc(cast(binode,loc)->right, f); // NOTEST
+ return 0; // NOTEST
+ }
+ static void fput_loc(struct exec *loc, FILE *f)
+ {
+ if (!__fput_loc(loc, f))
+ fprintf(f, "??:??: "); // NOTEST
+ }
+
+Each different type of `exec` node needs a number of functions defined,
+a bit like methods. We must be able to free it, print it, analyse it
+and execute it. Once we have specific `exec` types we will need to
+parse them too. Let's take this a bit more slowly.
+
+#### Freeing
+
+The parser generator requires a `free_foo` function for each struct
+that stores attributes and they will often be `exec`s and subtypes
+there-of. So we need `free_exec` which can handle all the subtypes,
+and we need `free_binode`.
+
+###### ast functions
+
+ static void free_binode(struct binode *b)
+ {
+ if (!b)
+ return;
+ free_exec(b->left);
+ free_exec(b->right);
+ free(b);
+ }
+
+###### core functions
+ static void free_exec(struct exec *e)
+ {
+ if (!e)
+ return;
+ switch(e->type) {
+ ## free exec cases
+ }
+ }
+
+###### forward decls
+
+ static void free_exec(struct exec *e);
+
+###### free exec cases
+ case Xbinode: free_binode(cast(binode, e)); break;
+
+#### Printing
+
+Printing an `exec` requires that we know the current indent level for
+printing line-oriented components. As will become clear later, we
+also want to know what sort of bracketing to use.
+
+###### ast functions
+
+ static void do_indent(int i, char *str)
+ {
+ while (i--)
+ printf(" ");
+ printf("%s", str);
+ }
+
+###### core functions
+ static void print_binode(struct binode *b, int indent, int bracket)
+ {
+ struct binode *b2;
+ switch(b->op) {
+ ## print binode cases
+ }
+ }
+
+ static void print_exec(struct exec *e, int indent, int bracket)
+ {
+ if (!e)
+ return; // NOTEST
+ switch (e->type) {
+ case Xbinode:
+ print_binode(cast(binode, e), indent, bracket); break;
+ ## print exec cases
+ }
+ }
+
+###### forward decls
+
+ static void print_exec(struct exec *e, int indent, int bracket);
+
+#### Analysing
+
+As discussed, analysis involves propagating type requirements around the
+program and looking for errors.
+
+So `propagate_types` is passed an expected type (being a `struct type`
+pointer together with some `val_rules` flags) that the `exec` is
+expected to return, and returns the type that it does return, either
+of which can be `NULL` signifying "unknown". An `ok` flag is passed
+by reference. It is set to `0` when an error is found, and `2` when
+any change is made. If it remains unchanged at `1`, then no more
+propagation is needed.
+
+###### ast
+
+ enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
+
+###### format cases
+ case 'r':
+ if (rules & Rnolabel)
+ fputs(" (labels not permitted)", stderr);
+ break;
+
+###### core functions
+
+ static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
+ struct type *type, int rules);
+ static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
+ struct type *type, int rules)
+ {
+ struct type *t;
+
+ if (!prog)
+ return Tnone;
+
+ switch (prog->type) {
+ case Xbinode:
+ {
+ struct binode *b = cast(binode, prog);
+ switch (b->op) {
+ ## propagate binode cases
+ }
+ break;
+ }
+ ## propagate exec cases
+ }
+ return Tnone;
+ }
+
+ static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
+ struct type *type, int rules)
+ {
+ struct type *ret = __propagate_types(prog, c, ok, type, rules);
+
+ if (c->parse_error)
+ *ok = 0;
+ return ret;
+ }
+
+#### Interpreting
+
+Interpreting an `exec` doesn't require anything but the `exec`. State
+is stored in variables and each variable will be directly linked from
+within the `exec` tree. The exception to this is the `main` function
+which needs to look at command line arguments. This function will be
+interpreted separately.
+
+Each `exec` can return a value combined with a type in `struct lrval`.
+The type may be `Tnone` but must be non-NULL. Some `exec`s will return
+the location of a value, which can be updated, in `lval`. Others will
+set `lval` to NULL indicating that there is a value of appropriate type
+in `rval`.
+
+###### core functions
+
+ struct lrval {
+ struct type *type;
+ struct value rval, *lval;
+ };
+
+ static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
+
+ static struct value interp_exec(struct parse_context *c, struct exec *e,
+ struct type **typeret)
+ {
+ struct lrval ret = _interp_exec(c, e);
+
+ if (!ret.type) abort();
+ if (typeret)
+ *typeret = ret.type;
+ if (ret.lval)
+ dup_value(ret.type, ret.lval, &ret.rval);
+ return ret.rval;
+ }
+
+ static struct value *linterp_exec(struct parse_context *c, struct exec *e,
+ struct type **typeret)
+ {
+ struct lrval ret = _interp_exec(c, e);
+
+ if (ret.lval)
+ *typeret = ret.type;
+ else
+ free_value(ret.type, &ret.rval);
+ return ret.lval;
+ }
+
+ static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
+ {
+ struct lrval ret;
+ struct value rv = {}, *lrv = NULL;
+ struct type *rvtype;
+
+ rvtype = ret.type = Tnone;
+ if (!e) {
+ ret.lval = lrv;
+ ret.rval = rv;
+ return ret;
+ }
+
+ switch(e->type) {
+ case Xbinode:
+ {
+ struct binode *b = cast(binode, e);
+ struct value left, right, *lleft;
+ struct type *ltype, *rtype;
+ ltype = rtype = Tnone;
+ switch (b->op) {
+ ## interp binode cases
+ }
+ free_value(ltype, &left);
+ free_value(rtype, &right);
+ break;
+ }
+ ## interp exec cases
+ }
+ ret.lval = lrv;
+ ret.rval = rv;
+ ret.type = rvtype;
+ return ret;
+ }
+
+### Complex types
+
+Now that we have the shape of the interpreter in place we can add some
+complex types and connected them in to the data structures and the
+different phases of parse, analyse, print, interpret.
+
+Thus far we have arrays and structs.
+
+#### Arrays
+
+Arrays can be declared by giving a size and a type, as `[size]type' so
+`freq:[26]number` declares `freq` to be an array of 26 numbers. The
+size can be either a literal number, or a named constant. Some day an
+arbitrary expression will be supported.
+
+As a formal parameter to a function, the array can be declared with a
+new variable as the size: `name:[size::number]string`. The `size`
+variable is set to the size of the array and must be a constant. As
+`number` is the only supported type, it can be left out:
+`name:[size::]string`.
+
+Arrays cannot be assigned. When pointers are introduced we will also
+introduce array slices which can refer to part or all of an array -
+the assignment syntax will create a slice. For now, an array can only
+ever be referenced by the name it is declared with. It is likely that
+a "`copy`" primitive will eventually be define which can be used to
+make a copy of an array with controllable recursive depth.
+
+For now we have two sorts of array, those with fixed size either because
+it is given as a literal number or because it is a struct member (which
+cannot have a runtime-changing size), and those with a size that is
+determined at runtime - local variables with a const size. The former
+have their size calculated at parse time, the latter at run time.
+
+For the latter type, the `size` field of the type is the size of a
+pointer, and the array is reallocated every time it comes into scope.
+
+We differentiate struct fields with a const size from local variables
+with a const size by whether they are prepared at parse time or not.
+
+###### type union fields
+
+ struct {
+ int unspec; // size is unspecified - vsize must be set.
+ short size;
+ short static_size;
+ struct variable *vsize;
+ struct type *member;
+ } array;
+
+###### value union fields
+ void *array; // used if not static_size
+
+###### value functions
+
+ static void array_prepare_type(struct parse_context *c, struct type *type,
+ int parse_time)
+ {
+ struct value *vsize;
+ mpz_t q;
+ if (!type->array.vsize || type->array.static_size)
+ return;
+
+ vsize = var_value(c, type->array.vsize);
+ mpz_init(q);
+ mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
+ type->array.size = mpz_get_si(q);
+ mpz_clear(q);
+
+ if (parse_time) {
+ type->array.static_size = 1;
+ type->size = type->array.size * type->array.member->size;
+ type->align = type->array.member->align;
+ }
+ }
+
+ static void array_init(struct type *type, struct value *val)
+ {
+ int i;
+ void *ptr = val->ptr;
+
+ if (!val)
+ return;
+ if (!type->array.static_size) {
+ val->array = calloc(type->array.size,
+ type->array.member->size);
+ ptr = val->array;
+ }
+ for (i = 0; i < type->array.size; i++) {
+ struct value *v;
+ v = (void*)ptr + i * type->array.member->size;
+ val_init(type->array.member, v);
+ }
+ }
+
+ static void array_free(struct type *type, struct value *val)
+ {
+ int i;
+ void *ptr = val->ptr;
+
+ if (!type->array.static_size)
+ ptr = val->array;
+ for (i = 0; i < type->array.size; i++) {
+ struct value *v;
+ v = (void*)ptr + i * type->array.member->size;
+ free_value(type->array.member, v);
+ }
+ if (!type->array.static_size)
+ free(ptr);
+ }
+
+ static int array_compat(struct type *require, struct type *have)
+ {
+ if (have->compat != require->compat)
+ return 0;
+ /* Both are arrays, so we can look at details */
+ if (!type_compat(require->array.member, have->array.member, 0))
+ return 0;
+ if (have->array.unspec && require->array.unspec) {
+ if (have->array.vsize && require->array.vsize &&
+ have->array.vsize != require->array.vsize)
+ /* sizes might not be the same */
+ return 0;
+ return 1;
+ }
+ if (have->array.unspec || require->array.unspec)
+ return 1;
+ if (require->array.vsize == NULL && have->array.vsize == NULL)
+ return require->array.size == have->array.size;
+
+ return require->array.vsize == have->array.vsize;
+ }
+
+ static void array_print_type(struct type *type, FILE *f)
+ {
+ fputs("[", f);
+ if (type->array.vsize) {
+ struct binding *b = type->array.vsize->name;
+ fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
+ type->array.unspec ? "::" : "");
+ } else
+ fprintf(f, "%d]", type->array.size);
+ type_print(type->array.member, f);
+ }
+
+ static struct type array_prototype = {
+ .init = array_init,
+ .prepare_type = array_prepare_type,
+ .print_type = array_print_type,
+ .compat = array_compat,
+ .free = array_free,
+ .size = sizeof(void*),
+ .align = sizeof(void*),
+ };