different specific code elements which comprise or manipulate these
various entities.
-### Types
+### Executables
-Values come in a wide range of types, with more likely to be added.
-Each type needs to be able to print its own values (for convenience at
-least) as well as to compare two values, at least for equality and
-possibly for order. For now, values might need to be duplicated and
-freed, though eventually such manipulations will be better integrated
-into the language.
+Executables can be lots of different things. In many cases an
+executable is just an operation combined with one or two other
+executables. This allows for expressions and lists etc. Other times an
+executable is something quite specific like a constant or variable name.
+So we define a `struct exec` to be a general executable with a type, and
+a `struct binode` which is a subclass of `exec`, forms a node in a
+binary tree, and holds an operation. There will be other subclasses,
+and to access these we need to be able to `cast` the `exec` into the
+various other types. The first field in any `struct exec` is the type
+from the `exec_types` enum.
-Rather than requiring every numeric type to support all numeric
-operations (add, multiple, etc), we allow types to be able to present
-as one of a few standard types: integer, float, and fraction. The
-existence of these conversion functions eventually enable types to
-determine if they are compatible with other types, though such types
-have not yet been implemented.
+###### macros
+ #define cast(structname, pointer) ({ \
+ const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
+ if (__mptr && *__mptr != X##structname) abort(); \
+ (struct structname *)( (char *)__mptr);})
-Named type are stored in a simple linked list. Objects of each type are
-"values" which are often passed around by value.
+ #define new(structname) ({ \
+ struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
+ __ptr->type = X##structname; \
+ __ptr->line = -1; __ptr->column = -1; \
+ __ptr;})
-###### ast
+ #define new_pos(structname, token) ({ \
+ struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
+ __ptr->type = X##structname; \
+ __ptr->line = token.line; __ptr->column = token.col; \
+ __ptr;})
- struct value {
- union {
- char ptr[1];
- ## value union fields
- };
+###### ast
+ enum exec_types {
+ Xbinode,
+ ## exec type
};
-
- struct type {
- struct text name;
- struct type *next;
- int size, align;
- void (*init)(struct type *type, struct value *val);
- void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
- void (*print)(struct type *type, struct value *val, FILE *f);
- void (*print_type)(struct type *type, FILE *f);
- int (*cmp_order)(struct type *t1, struct type *t2,
- struct value *v1, struct value *v2);
- int (*cmp_eq)(struct type *t1, struct type *t2,
- struct value *v1, struct value *v2);
- void (*dup)(struct type *type, struct value *vold, struct value *vnew);
- void (*free)(struct type *type, struct value *val);
- void (*free_type)(struct type *t);
- long long (*to_int)(struct value *v);
- double (*to_float)(struct value *v);
- int (*to_mpq)(mpq_t *q, struct value *v);
- ## type functions
- union {
- ## type union fields
- };
+ struct exec {
+ enum exec_types type;
+ int line, column;
+ ## exec fields
+ };
+ struct binode {
+ struct exec;
+ enum Btype {
+ ## Binode types
+ } op;
+ struct exec *left, *right;
};
-
-###### parse context
-
- struct type *typelist;
###### ast functions
- static struct type *find_type(struct parse_context *c, struct text s)
+ static int __fput_loc(struct exec *loc, FILE *f)
{
- struct type *l = c->typelist;
-
- while (l &&
- text_cmp(l->name, s) != 0)
- l = l->next;
- return l;
+ if (!loc)
+ return 0;
+ if (loc->line >= 0) {
+ fprintf(f, "%d:%d: ", loc->line, loc->column);
+ return 1;
+ }
+ if (loc->type == Xbinode)
+ return __fput_loc(cast(binode,loc)->left, f) ||
+ __fput_loc(cast(binode,loc)->right, f); // NOTEST
+ return 0;
}
-
- static struct type *add_type(struct parse_context *c, struct text s,
- struct type *proto)
+ static void fput_loc(struct exec *loc, FILE *f)
{
- struct type *n;
-
- n = calloc(1, sizeof(*n));
- *n = *proto;
- n->name = s;
- n->next = c->typelist;
- c->typelist = n;
- return n;
+ if (!__fput_loc(loc, f))
+ fprintf(f, "??:??: ");
}
- static void free_type(struct type *t)
+Each different type of `exec` node needs a number of functions defined,
+a bit like methods. We must be able to free it, print it, analyse it
+and execute it. Once we have specific `exec` types we will need to
+parse them too. Let's take this a bit more slowly.
+
+#### Freeing
+
+The parser generator requires a `free_foo` function for each struct
+that stores attributes and they will often be `exec`s and subtypes
+there-of. So we need `free_exec` which can handle all the subtypes,
+and we need `free_binode`.
+
+###### ast functions
+
+ static void free_binode(struct binode *b)
{
- /* The type is always a reference to something in the
- * context, so we don't need to free anything.
- */
+ if (!b)
+ return;
+ free_exec(b->left);
+ free_exec(b->right);
+ free(b);
}
- static void free_value(struct type *type, struct value *v)
+###### core functions
+ static void free_exec(struct exec *e)
{
- if (type && v) {
- type->free(type, v);
- memset(v, 0x5a, type->size);
+ if (!e)
+ return;
+ switch(e->type) {
+ ## free exec cases
}
}
- static void type_print(struct type *type, FILE *f)
- {
- if (!type)
- fputs("*unknown*type*", f); // NOTEST
- else if (type->name.len)
- fprintf(f, "%.*s", type->name.len, type->name.txt);
- else if (type->print_type)
- type->print_type(type, f);
- else
- fputs("*invalid*type*", f); // NOTEST
- }
+###### forward decls
- static void val_init(struct type *type, struct value *val)
- {
- if (type && type->init)
- type->init(type, val);
- }
+ static void free_exec(struct exec *e);
- static void dup_value(struct type *type,
- struct value *vold, struct value *vnew)
+###### free exec cases
+ case Xbinode: free_binode(cast(binode, e)); break;
+
+#### Printing
+
+Printing an `exec` requires that we know the current indent level for
+printing line-oriented components. As will become clear later, we
+also want to know what sort of bracketing to use.
+
+###### ast functions
+
+ static void do_indent(int i, char *str)
{
- if (type && type->dup)
- type->dup(type, vold, vnew);
+ while (i-- > 0)
+ printf(" ");
+ printf("%s", str);
}
- static int value_cmp(struct type *tl, struct type *tr,
- struct value *left, struct value *right)
+###### core functions
+ static void print_binode(struct binode *b, int indent, int bracket)
{
- if (tl && tl->cmp_order)
- return tl->cmp_order(tl, tr, left, right);
- if (tl && tl->cmp_eq) // NOTEST
- return tl->cmp_eq(tl, tr, left, right); // NOTEST
- return -1; // NOTEST
+ struct binode *b2;
+ switch(b->op) {
+ ## print binode cases
+ }
}
- static void print_value(struct type *type, struct value *v, FILE *f)
+ static void print_exec(struct exec *e, int indent, int bracket)
{
- if (type && type->print)
- type->print(type, v, f);
- else
- fprintf(f, "*Unknown*"); // NOTEST
+ if (!e)
+ return;
+ switch (e->type) {
+ case Xbinode:
+ print_binode(cast(binode, e), indent, bracket); break;
+ ## print exec cases
+ }
+ if (e->to_free) {
+ struct variable *v;
+ do_indent(indent, "/* FREE");
+ for (v = e->to_free; v; v = v->next_free) {
+ printf(" %.*s", v->name->name.len, v->name->name.txt);
+ printf("[%d,%d]", v->scope_start, v->scope_end);
+ if (v->frame_pos >= 0)
+ printf("(%d+%d)", v->frame_pos,
+ v->type ? v->type->size:0);
+ }
+ printf(" */\n");
+ }
}
###### forward decls
- static void free_value(struct type *type, struct value *v);
- static int type_compat(struct type *require, struct type *have, int rules);
- static void type_print(struct type *type, FILE *f);
- static void val_init(struct type *type, struct value *v);
- static void dup_value(struct type *type,
- struct value *vold, struct value *vnew);
- static int value_cmp(struct type *tl, struct type *tr,
- struct value *left, struct value *right);
- static void print_value(struct type *type, struct value *v, FILE *f);
-
-###### free context types
+ static void print_exec(struct exec *e, int indent, int bracket);
- while (context.typelist) {
- struct type *t = context.typelist;
+#### Analysing
- context.typelist = t->next;
- if (t->free_type)
- t->free_type(t);
- free(t);
- }
+As discussed, analysis involves propagating type requirements around the
+program and looking for errors.
-Type can be specified for local variables, for fields in a structure,
-for formal parameters to functions, and possibly elsewhere. Different
-rules may apply in different contexts. As a minimum, a named type may
-always be used. Currently the type of a formal parameter can be
-different from types in other contexts, so we have a separate grammar
-symbol for those.
-
-###### Grammar
-
- $*type
- Type -> IDENTIFIER ${
- $0 = find_type(c, $1.txt);
- if (!$0) {
- tok_err(c,
- "error: undefined type", &$1);
-
- $0 = Tnone;
- }
- }$
- ## type grammar
-
- FormalType -> Type ${ $0 = $<1; }$
- ## formal type grammar
+So `propagate_types` is passed an expected type (being a `struct type`
+pointer together with some `val_rules` flags) that the `exec` is
+expected to return, and returns the type that it does return, either
+of which can be `NULL` signifying "unknown". An `ok` flag is passed
+by reference. It is set to `0` when an error is found, and `2` when
+any change is made. If it remains unchanged at `1`, then no more
+propagation is needed.
-#### Base Types
+###### ast
-Values of the base types can be numbers, which we represent as
-multi-precision fractions, strings, Booleans and labels. When
-analysing the program we also need to allow for places where no value
-is meaningful (type `Tnone`) and where we don't know what type to
-expect yet (type is `NULL`).
+ enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 1<<2};
-Values are never shared, they are always copied when used, and freed
-when no longer needed.
+###### format cases
+ case 'r':
+ if (rules & Rnolabel)
+ fputs(" (labels not permitted)", stderr);
+ break;
-When propagating type information around the program, we need to
-determine if two types are compatible, where type `NULL` is compatible
-with anything. There are two special cases with type compatibility,
-both related to the Conditional Statement which will be described
-later. In some cases a Boolean can be accepted as well as some other
-primary type, and in others any type is acceptable except a label (`Vlabel`).
-A separate function encoding these cases will simplify some code later.
+###### forward decls
+ static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
+ struct type *type, int rules);
+###### core functions
-###### type functions
+ static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
+ struct type *type, int rules)
+ {
+ struct type *t;
- int (*compat)(struct type *this, struct type *other);
+ if (!prog)
+ return Tnone;
-###### ast functions
+ switch (prog->type) {
+ case Xbinode:
+ {
+ struct binode *b = cast(binode, prog);
+ switch (b->op) {
+ ## propagate binode cases
+ }
+ break;
+ }
+ ## propagate exec cases
+ }
+ return Tnone;
+ }
- static int type_compat(struct type *require, struct type *have, int rules)
+ static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
+ struct type *type, int rules)
{
- if ((rules & Rboolok) && have == Tbool)
- return 1; // NOTEST
- if ((rules & Rnolabel) && have == Tlabel)
- return 0; // NOTEST
- if (!require || !have)
- return 1;
-
- if (require->compat)
- return require->compat(require, have);
+ struct type *ret = __propagate_types(prog, c, ok, type, rules);
- return require == have;
+ if (c->parse_error)
+ *ok = 0;
+ return ret;
}
-###### includes
- #include <gmp.h>
- #include "parse_string.h"
- #include "parse_number.h"
+#### Interpreting
-###### libs
- myLDLIBS := libnumber.o libstring.o -lgmp
- LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
+Interpreting an `exec` doesn't require anything but the `exec`. State
+is stored in variables and each variable will be directly linked from
+within the `exec` tree. The exception to this is the `main` function
+which needs to look at command line arguments. This function will be
+interpreted separately.
-###### type union fields
- enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
+Each `exec` can return a value combined with a type in `struct lrval`.
+The type may be `Tnone` but must be non-NULL. Some `exec`s will return
+the location of a value, which can be updated, in `lval`. Others will
+set `lval` to NULL indicating that there is a value of appropriate type
+in `rval`.
-###### value union fields
- struct text str;
- mpq_t num;
- unsigned char bool;
- void *label;
+###### core functions
-###### ast functions
- static void _free_value(struct type *type, struct value *v)
- {
- if (!v)
- return; // NOTEST
- switch (type->vtype) {
- case Vnone: break;
- case Vstr: free(v->str.txt); break;
- case Vnum: mpq_clear(v->num); break;
- case Vlabel:
- case Vbool: break;
- }
- }
+ struct lrval {
+ struct type *type;
+ struct value rval, *lval;
+ };
-###### value functions
+ /* If dest is passed, dtype must give the expected type, and
+ * result can go there, in which case type is returned as NULL.
+ */
+ static struct lrval _interp_exec(struct parse_context *c, struct exec *e,
+ struct value *dest, struct type *dtype);
- static void _val_init(struct type *type, struct value *val)
+ static struct value interp_exec(struct parse_context *c, struct exec *e,
+ struct type **typeret)
{
- switch(type->vtype) {
- case Vnone: // NOTEST
- break; // NOTEST
- case Vnum:
- mpq_init(val->num); break;
- case Vstr:
- val->str.txt = malloc(1);
- val->str.len = 0;
- break;
- case Vbool:
- val->bool = 0;
- break;
- case Vlabel:
- val->label = NULL;
- break;
- }
- }
+ struct lrval ret = _interp_exec(c, e, NULL, NULL);
- static void _dup_value(struct type *type,
- struct value *vold, struct value *vnew)
- {
- switch (type->vtype) {
- case Vnone: // NOTEST
- break; // NOTEST
- case Vlabel:
- vnew->label = vold->label;
- break;
- case Vbool:
- vnew->bool = vold->bool;
- break;
- case Vnum:
- mpq_init(vnew->num);
- mpq_set(vnew->num, vold->num);
- break;
- case Vstr:
- vnew->str.len = vold->str.len;
- vnew->str.txt = malloc(vnew->str.len);
- memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
- break;
- }
+ if (!ret.type) abort();
+ if (typeret)
+ *typeret = ret.type;
+ if (ret.lval)
+ dup_value(ret.type, ret.lval, &ret.rval);
+ return ret.rval;
}
- static int _value_cmp(struct type *tl, struct type *tr,
- struct value *left, struct value *right)
+ static struct value *linterp_exec(struct parse_context *c, struct exec *e,
+ struct type **typeret)
{
- int cmp;
- if (tl != tr)
- return tl - tr; // NOTEST
- switch (tl->vtype) {
- case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
- case Vnum: cmp = mpq_cmp(left->num, right->num); break;
- case Vstr: cmp = text_cmp(left->str, right->str); break;
- case Vbool: cmp = left->bool - right->bool; break;
- case Vnone: cmp = 0; // NOTEST
- }
- return cmp;
+ struct lrval ret = _interp_exec(c, e, NULL, NULL);
+
+ if (!ret.type) abort();
+ if (ret.lval)
+ *typeret = ret.type;
+ else
+ free_value(ret.type, &ret.rval);
+ return ret.lval;
}
- static void _print_value(struct type *type, struct value *v, FILE *f)
+ /* dinterp_exec is used when the destination type is certain and
+ * the value has a place to go.
+ */
+ static void dinterp_exec(struct parse_context *c, struct exec *e,
+ struct value *dest, struct type *dtype,
+ int need_free)
{
- switch (type->vtype) {
- case Vnone: // NOTEST
- fprintf(f, "*no-value*"); break; // NOTEST
- case Vlabel: // NOTEST
- fprintf(f, "*label-%p*", v->label); break; // NOTEST
- case Vstr:
- fprintf(f, "%.*s", v->str.len, v->str.txt); break;
- case Vbool:
- fprintf(f, "%s", v->bool ? "True":"False"); break;
- case Vnum:
- {
- mpf_t fl;
- mpf_init2(fl, 20);
- mpf_set_q(fl, v->num);
- gmp_fprintf(f, "%Fg", fl);
- mpf_clear(fl);
- break;
- }
- }
+ struct lrval ret = _interp_exec(c, e, dest, dtype);
+ if (!ret.type)
+ return;
+ if (need_free)
+ free_value(dtype, dest);
+ if (ret.lval)
+ dup_value(dtype, ret.lval, dest);
+ else
+ memcpy(dest, &ret.rval, dtype->size);
}
- static void _free_value(struct type *type, struct value *v);
+ static struct lrval _interp_exec(struct parse_context *c, struct exec *e,
+ struct value *dest, struct type *dtype)
+ {
+ /* If the result is copied to dest, ret.type is set to NULL */
+ struct lrval ret;
+ struct value rv = {}, *lrv = NULL;
+ struct type *rvtype;
- static struct type base_prototype = {
- .init = _val_init,
- .print = _print_value,
- .cmp_order = _value_cmp,
- .cmp_eq = _value_cmp,
- .dup = _dup_value,
- .free = _free_value,
- };
-
- static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
-
-###### ast functions
- static struct type *add_base_type(struct parse_context *c, char *n,
- enum vtype vt, int size)
- {
- struct text txt = { n, strlen(n) };
- struct type *t;
+ rvtype = ret.type = Tnone;
+ if (!e) {
+ ret.lval = lrv;
+ ret.rval = rv;
+ return ret;
+ }
- t = add_type(c, txt, &base_prototype);
- t->vtype = vt;
- t->size = size;
- t->align = size > sizeof(void*) ? sizeof(void*) : size;
- if (t->size & (t->align - 1))
- t->size = (t->size | (t->align - 1)) + 1; // NOTEST
- return t;
+ switch(e->type) {
+ case Xbinode:
+ {
+ struct binode *b = cast(binode, e);
+ struct value left, right, *lleft;
+ struct type *ltype, *rtype;
+ ltype = rtype = Tnone;
+ switch (b->op) {
+ ## interp binode cases
+ }
+ free_value(ltype, &left);
+ free_value(rtype, &right);
+ break;
+ }
+ ## interp exec cases
+ }
+ if (rvtype) {
+ ret.lval = lrv;
+ ret.rval = rv;
+ ret.type = rvtype;
+ }
+ ## interp exec cleanup
+ return ret;
}
-###### context initialization
+### Types
- Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
- Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
- Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
- Tnone = add_base_type(&context, "none", Vnone, 0);
- Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
+Values come in a wide range of types, with more likely to be added.
+Each type needs to be able to print its own values (for convenience at
+least) as well as to compare two values, at least for equality and
+possibly for order. For now, values might need to be duplicated and
+freed, though eventually such manipulations will be better integrated
+into the language.
-### Variables
+Rather than requiring every numeric type to support all numeric
+operations (add, multiply, etc), we allow types to be able to present
+as one of a few standard types: integer, float, and fraction. The
+existence of these conversion functions eventually enable types to
+determine if they are compatible with other types, though such types
+have not yet been implemented.
-Variables are scoped named values. We store the names in a linked list
-of "bindings" sorted in lexical order, and use sequential search and
-insertion sort.
+Named type are stored in a simple linked list. Objects of each type are
+"values" which are often passed around by value.
###### ast
- struct binding {
- struct text name;
- struct binding *next; // in lexical order
- ## binding fields
+ struct value {
+ union {
+ char ptr[1];
+ ## value union fields
+ };
};
-This linked list is stored in the parse context so that "reduce"
-functions can find or add variables, and so the analysis phase can
-ensure that every variable gets a type.
+ struct type {
+ struct text name;
+ struct type *next;
+ int size, align;
+ void (*init)(struct type *type, struct value *val);
+ void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
+ void (*print)(struct type *type, struct value *val, FILE *f);
+ void (*print_type)(struct type *type, FILE *f);
+ int (*cmp_order)(struct type *t1, struct type *t2,
+ struct value *v1, struct value *v2);
+ int (*cmp_eq)(struct type *t1, struct type *t2,
+ struct value *v1, struct value *v2);
+ void (*dup)(struct type *type, struct value *vold, struct value *vnew);
+ void (*free)(struct type *type, struct value *val);
+ void (*free_type)(struct type *t);
+ long long (*to_int)(struct value *v);
+ double (*to_float)(struct value *v);
+ int (*to_mpq)(mpq_t *q, struct value *v);
+ ## type functions
+ union {
+ ## type union fields
+ };
+ };
###### parse context
- struct binding *varlist; // In lexical order
+ struct type *typelist;
###### ast functions
- static struct binding *find_binding(struct parse_context *c, struct text s)
+ static struct type *find_type(struct parse_context *c, struct text s)
{
- struct binding **l = &c->varlist;
- struct binding *n;
- int cmp = 1;
+ struct type *l = c->typelist;
+
+ while (l &&
+ text_cmp(l->name, s) != 0)
+ l = l->next;
+ return l;
+ }
+
+ static struct type *add_type(struct parse_context *c, struct text s,
+ struct type *proto)
+ {
+ struct type *n;
- while (*l &&
- (cmp = text_cmp((*l)->name, s)) < 0)
- l = & (*l)->next;
- if (cmp == 0)
- return *l;
n = calloc(1, sizeof(*n));
+ *n = *proto;
n->name = s;
- n->next = *l;
- *l = n;
+ n->next = c->typelist;
+ c->typelist = n;
return n;
}
-Each name can be linked to multiple variables defined in different
-scopes. Each scope starts where the name is declared and continues
-until the end of the containing code block. Scopes of a given name
-cannot nest, so a declaration while a name is in-scope is an error.
-
-###### binding fields
- struct variable *var;
+ static void free_type(struct type *t)
+ {
+ /* The type is always a reference to something in the
+ * context, so we don't need to free anything.
+ */
+ }
-###### ast
- struct variable {
- struct variable *previous;
- struct type *type;
- struct binding *name;
- struct exec *where_decl;// where name was declared
- struct exec *where_set; // where type was set
- ## variable fields
- };
+ static void free_value(struct type *type, struct value *v)
+ {
+ if (type && v) {
+ type->free(type, v);
+ memset(v, 0x5a, type->size);
+ }
+ }
-When a scope closes, the values of the variables might need to be freed.
-This happens in the context of some `struct exec` and each `exec` will
-need to know which variables need to be freed when it completes.
+ static void type_print(struct type *type, FILE *f)
+ {
+ if (!type)
+ fputs("*unknown*type*", f); // NOTEST
+ else if (type->name.len)
+ fprintf(f, "%.*s", type->name.len, type->name.txt);
+ else if (type->print_type)
+ type->print_type(type, f);
+ else
+ fputs("*invalid*type*", f); // NOTEST
+ }
-####### exec fields
- struct variable *to_free;
+ static void val_init(struct type *type, struct value *val)
+ {
+ if (type && type->init)
+ type->init(type, val);
+ }
-####### variable fields
- struct exec *cleanup_exec;
- struct variable *next_free;
+ static void dup_value(struct type *type,
+ struct value *vold, struct value *vnew)
+ {
+ if (type && type->dup)
+ type->dup(type, vold, vnew);
+ }
-####### interp exec cleanup
+ static int value_cmp(struct type *tl, struct type *tr,
+ struct value *left, struct value *right)
{
- struct variable *v;
- for (v = e->to_free; v; v = v->next_free) {
- struct value *val = var_value(c, v);
- free_value(v->type, val);
- }
+ if (tl && tl->cmp_order)
+ return tl->cmp_order(tl, tr, left, right);
+ if (tl && tl->cmp_eq) // NOTEST
+ return tl->cmp_eq(tl, tr, left, right); // NOTEST
+ return -1; // NOTEST
}
-###### ast functions
- static void variable_unlink_exec(struct variable *v)
+ static void print_value(struct type *type, struct value *v, FILE *f)
{
- struct variable **vp;
- if (!v->cleanup_exec)
- return;
- for (vp = &v->cleanup_exec->to_free;
- *vp; vp = &(*vp)->next_free) {
- if (*vp != v)
- continue;
- *vp = v->next_free;
- v->cleanup_exec = NULL;
- break;
- }
+ if (type && type->print)
+ type->print(type, v, f);
+ else
+ fprintf(f, "*Unknown*"); // NOTEST
}
-While the naming seems strange, we include local constants in the
-definition of variables. A name declared `var := value` can
-subsequently be changed, but a name declared `var ::= value` cannot -
-it is constant
+###### forward decls
-###### variable fields
- int constant;
+ static void free_value(struct type *type, struct value *v);
+ static int type_compat(struct type *require, struct type *have, int rules);
+ static void type_print(struct type *type, FILE *f);
+ static void val_init(struct type *type, struct value *v);
+ static void dup_value(struct type *type,
+ struct value *vold, struct value *vnew);
+ static int value_cmp(struct type *tl, struct type *tr,
+ struct value *left, struct value *right);
+ static void print_value(struct type *type, struct value *v, FILE *f);
-Scopes in parallel branches can be partially merged. More
-specifically, if a given name is declared in both branches of an
-if/else then its scope is a candidate for merging. Similarly if
-every branch of an exhaustive switch (e.g. has an "else" clause)
-declares a given name, then the scopes from the branches are
-candidates for merging.
+###### free context types
-Note that names declared inside a loop (which is only parallel to
-itself) are never visible after the loop. Similarly names defined in
-scopes which are not parallel, such as those started by `for` and
-`switch`, are never visible after the scope. Only variables defined in
-both `then` and `else` (including the implicit then after an `if`, and
-excluding `then` used with `for`) and in all `case`s and `else` of a
-`switch` or `while` can be visible beyond the `if`/`switch`/`while`.
-
-Labels, which are a bit like variables, follow different rules.
-Labels are not explicitly declared, but if an undeclared name appears
-in a context where a label is legal, that effectively declares the
-name as a label. The declaration remains in force (or in scope) at
-least to the end of the immediately containing block and conditionally
-in any larger containing block which does not declare the name in some
-other way. Importantly, the conditional scope extension happens even
-if the label is only used in one parallel branch of a conditional --
-when used in one branch it is treated as having been declared in all
-branches.
-
-Merge candidates are tentatively visible beyond the end of the
-branching statement which creates them. If the name is used, the
-merge is affirmed and they become a single variable visible at the
-outer layer. If not - if it is redeclared first - the merge lapses.
-
-To track scopes we have an extra stack, implemented as a linked list,
-which roughly parallels the parse stack and which is used exclusively
-for scoping. When a new scope is opened, a new frame is pushed and
-the child-count of the parent frame is incremented. This child-count
-is used to distinguish between the first of a set of parallel scopes,
-in which declared variables must not be in scope, and subsequent
-branches, whether they may already be conditionally scoped.
-
-We need a total ordering of scopes so we can easily compare to variables
-to see if they are concurrently in scope. To achieve this we record a
-`scope_count` which is actually a count of both beginnings and endings
-of scopes. Then each variable has a record of the scope count where it
-enters scope, and where it leaves.
-
-To push a new frame *before* any code in the frame is parsed, we need a
-grammar reduction. This is most easily achieved with a grammar
-element which derives the empty string, and creates the new scope when
-it is recognised. This can be placed, for example, between a keyword
-like "if" and the code following it.
-
-###### ast
- struct scope {
- struct scope *parent;
- int child_count;
- };
-
-###### parse context
- int scope_depth;
- int scope_count;
- struct scope *scope_stack;
-
-###### variable fields
- int scope_start, scope_end;
-
-###### ast functions
- static void scope_pop(struct parse_context *c)
- {
- struct scope *s = c->scope_stack;
+ while (context.typelist) {
+ struct type *t = context.typelist;
- c->scope_stack = s->parent;
- free(s);
- c->scope_depth -= 1;
- c->scope_count += 1;
+ context.typelist = t->next;
+ if (t->free_type)
+ t->free_type(t);
+ free(t);
}
- static void scope_push(struct parse_context *c)
- {
- struct scope *s = calloc(1, sizeof(*s));
- if (c->scope_stack)
- c->scope_stack->child_count += 1;
- s->parent = c->scope_stack;
- c->scope_stack = s;
- c->scope_depth += 1;
- c->scope_count += 1;
- }
+Type can be specified for local variables, for fields in a structure,
+for formal parameters to functions, and possibly elsewhere. Different
+rules may apply in different contexts. As a minimum, a named type may
+always be used. Currently the type of a formal parameter can be
+different from types in other contexts, so we have a separate grammar
+symbol for those.
###### Grammar
- $void
- OpenScope -> ${ scope_push(c); }$
-
-Each variable records a scope depth and is in one of four states:
-
-- "in scope". This is the case between the declaration of the
- variable and the end of the containing block, and also between
- the usage with affirms a merge and the end of that block.
-
- The scope depth is not greater than the current parse context scope
- nest depth. When the block of that depth closes, the state will
- change. To achieve this, all "in scope" variables are linked
- together as a stack in nesting order.
-
-- "pending". The "in scope" block has closed, but other parallel
- scopes are still being processed. So far, every parallel block at
- the same level that has closed has declared the name.
-
- The scope depth is the depth of the last parallel block that
- enclosed the declaration, and that has closed.
-
-- "conditionally in scope". The "in scope" block and all parallel
- scopes have closed, and no further mention of the name has been seen.
- This state includes a secondary nest depth (`min_depth`) which records
- the outermost scope seen since the variable became conditionally in
- scope. If a use of the name is found, the variable becomes "in scope"
- and that secondary depth becomes the recorded scope depth. If the
- name is declared as a new variable, the old variable becomes "out of
- scope" and the recorded scope depth stays unchanged.
+ $*type
+ Type -> IDENTIFIER ${
+ $0 = find_type(c, $1.txt);
+ if (!$0) {
+ tok_err(c,
+ "error: undefined type", &$1);
-- "out of scope". The variable is neither in scope nor conditionally
- in scope. It is permanently out of scope now and can be removed from
- the "in scope" stack. When a variable becomes out-of-scope it is
- moved to a separate list (`out_scope`) of variables which have fully
- known scope. This will be used at the end of each function to assign
- each variable a place in the stack frame.
+ $0 = Tnone;
+ }
+ }$
+ ## type grammar
-###### variable fields
- int depth, min_depth;
- enum { OutScope, PendingScope, CondScope, InScope } scope;
- struct variable *in_scope;
+ FormalType -> Type ${ $0 = $<1; }$
+ ## formal type grammar
-###### parse context
+#### Base Types
- struct variable *in_scope;
- struct variable *out_scope;
+Values of the base types can be numbers, which we represent as
+multi-precision fractions, strings, Booleans and labels. When
+analysing the program we also need to allow for places where no value
+is meaningful (type `Tnone`) and where we don't know what type to
+expect yet (type is `NULL`).
-All variables with the same name are linked together using the
-'previous' link. Those variable that have been affirmatively merged all
-have a 'merged' pointer that points to one primary variable - the most
-recently declared instance. When merging variables, we need to also
-adjust the 'merged' pointer on any other variables that had previously
-been merged with the one that will no longer be primary.
+Values are never shared, they are always copied when used, and freed
+when no longer needed.
-A variable that is no longer the most recent instance of a name may
-still have "pending" scope, if it might still be merged with most
-recent instance. These variables don't really belong in the
-"in_scope" list, but are not immediately removed when a new instance
-is found. Instead, they are detected and ignored when considering the
-list of in_scope names.
+When propagating type information around the program, we need to
+determine if two types are compatible, where type `NULL` is compatible
+with anything. There are two special cases with type compatibility,
+both related to the Conditional Statement which will be described
+later. In some cases a Boolean can be accepted as well as some other
+primary type, and in others any type is acceptable except a label (`Vlabel`).
+A separate function encoding these cases will simplify some code later.
-The storage of the value of a variable will be described later. For now
-we just need to know that when a variable goes out of scope, it might
-need to be freed. For this we need to be able to find it, so assume that
-`var_value()` will provide that.
+###### type functions
-###### variable fields
- struct variable *merged;
+ int (*compat)(struct type *this, struct type *other);
###### ast functions
- static void variable_merge(struct variable *primary, struct variable *secondary)
+ static int type_compat(struct type *require, struct type *have, int rules)
{
- struct variable *v;
+ if ((rules & Rboolok) && have == Tbool)
+ return 1; // NOTEST
+ if ((rules & Rnolabel) && have == Tlabel)
+ return 0; // NOTEST
+ if (!require || !have)
+ return 1;
- primary = primary->merged;
+ if (require->compat)
+ return require->compat(require, have);
- for (v = primary->previous; v; v=v->previous)
- if (v == secondary || v == secondary->merged ||
- v->merged == secondary ||
- v->merged == secondary->merged) {
- v->scope = OutScope;
- v->merged = primary;
- if (v->scope_start < primary->scope_start)
- primary->scope_start = v->scope_start;
- if (v->scope_end > primary->scope_end)
- primary->scope_end = v->scope_end; // NOTEST
- variable_unlink_exec(v);
- }
+ return require == have;
}
-###### forward decls
- static struct value *var_value(struct parse_context *c, struct variable *v);
+###### includes
+ #include <gmp.h>
+ #include "parse_string.h"
+ #include "parse_number.h"
-###### free global vars
+###### libs
+ myLDLIBS := libnumber.o libstring.o -lgmp
+ LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
- while (context.varlist) {
- struct binding *b = context.varlist;
- struct variable *v = b->var;
- context.varlist = b->next;
- free(b);
- while (v) {
- struct variable *next = v->previous;
+###### type union fields
+ enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
- if (v->global) {
- free_value(v->type, var_value(&context, v));
- if (v->depth == 0)
- // This is a global constant
- free_exec(v->where_decl);
- }
- free(v);
- v = next;
+###### value union fields
+ struct text str;
+ mpq_t num;
+ unsigned char bool;
+ void *label;
+
+###### ast functions
+ static void _free_value(struct type *type, struct value *v)
+ {
+ if (!v)
+ return; // NOTEST
+ switch (type->vtype) {
+ case Vnone: break;
+ case Vstr: free(v->str.txt); break;
+ case Vnum: mpq_clear(v->num); break;
+ case Vlabel:
+ case Vbool: break;
}
}
-#### Manipulating Bindings
-
-When a name is conditionally visible, a new declaration discards the old
-binding - the condition lapses. Similarly when we reach the end of a
-function (outermost non-global scope) any conditional scope must lapse.
-Conversely a usage of the name affirms the visibility and extends it to
-the end of the containing block - i.e. the block that contains both the
-original declaration and the latest usage. This is determined from
-`min_depth`. When a conditionally visible variable gets affirmed like
-this, it is also merged with other conditionally visible variables with
-the same name.
+###### value functions
-When we parse a variable declaration we either report an error if the
-name is currently bound, or create a new variable at the current nest
+ static void _val_init(struct type *type, struct value *val)
+ {
+ switch(type->vtype) {
+ case Vnone: // NOTEST
+ break; // NOTEST
+ case Vnum:
+ mpq_init(val->num); break;
+ case Vstr:
+ val->str.txt = malloc(1);
+ val->str.len = 0;
+ break;
+ case Vbool:
+ val->bool = 0;
+ break;
+ case Vlabel:
+ val->label = NULL;
+ break;
+ }
+ }
+
+ static void _dup_value(struct type *type,
+ struct value *vold, struct value *vnew)
+ {
+ switch (type->vtype) {
+ case Vnone: // NOTEST
+ break; // NOTEST
+ case Vlabel:
+ vnew->label = vold->label;
+ break;
+ case Vbool:
+ vnew->bool = vold->bool;
+ break;
+ case Vnum:
+ mpq_init(vnew->num);
+ mpq_set(vnew->num, vold->num);
+ break;
+ case Vstr:
+ vnew->str.len = vold->str.len;
+ vnew->str.txt = malloc(vnew->str.len);
+ memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
+ break;
+ }
+ }
+
+ static int _value_cmp(struct type *tl, struct type *tr,
+ struct value *left, struct value *right)
+ {
+ int cmp;
+ if (tl != tr)
+ return tl - tr; // NOTEST
+ switch (tl->vtype) {
+ case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
+ case Vnum: cmp = mpq_cmp(left->num, right->num); break;
+ case Vstr: cmp = text_cmp(left->str, right->str); break;
+ case Vbool: cmp = left->bool - right->bool; break;
+ case Vnone: cmp = 0; // NOTEST
+ }
+ return cmp;
+ }
+
+ static void _print_value(struct type *type, struct value *v, FILE *f)
+ {
+ switch (type->vtype) {
+ case Vnone: // NOTEST
+ fprintf(f, "*no-value*"); break; // NOTEST
+ case Vlabel: // NOTEST
+ fprintf(f, "*label-%p*", v->label); break; // NOTEST
+ case Vstr:
+ fprintf(f, "%.*s", v->str.len, v->str.txt); break;
+ case Vbool:
+ fprintf(f, "%s", v->bool ? "True":"False"); break;
+ case Vnum:
+ {
+ mpf_t fl;
+ mpf_init2(fl, 20);
+ mpf_set_q(fl, v->num);
+ gmp_fprintf(f, "%Fg", fl);
+ mpf_clear(fl);
+ break;
+ }
+ }
+ }
+
+ static void _free_value(struct type *type, struct value *v);
+
+ static struct type base_prototype = {
+ .init = _val_init,
+ .print = _print_value,
+ .cmp_order = _value_cmp,
+ .cmp_eq = _value_cmp,
+ .dup = _dup_value,
+ .free = _free_value,
+ };
+
+ static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
+
+###### ast functions
+ static struct type *add_base_type(struct parse_context *c, char *n,
+ enum vtype vt, int size)
+ {
+ struct text txt = { n, strlen(n) };
+ struct type *t;
+
+ t = add_type(c, txt, &base_prototype);
+ t->vtype = vt;
+ t->size = size;
+ t->align = size > sizeof(void*) ? sizeof(void*) : size;
+ if (t->size & (t->align - 1))
+ t->size = (t->size | (t->align - 1)) + 1; // NOTEST
+ return t;
+ }
+
+###### context initialization
+
+ Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
+ Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
+ Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
+ Tnone = add_base_type(&context, "none", Vnone, 0);
+ Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
+
+##### Base Values
+
+We have already met values as separate objects. When manifest constants
+appear in the program text, that must result in an executable which has
+a constant value. So the `val` structure embeds a value in an
+executable.
+
+###### exec type
+ Xval,
+
+###### ast
+ struct val {
+ struct exec;
+ struct type *vtype;
+ struct value val;
+ };
+
+###### ast functions
+ struct val *new_val(struct type *T, struct token tk)
+ {
+ struct val *v = new_pos(val, tk);
+ v->vtype = T;
+ return v;
+ }
+
+###### Grammar
+
+ $TERM True False
+
+ $*val
+ Value -> True ${
+ $0 = new_val(Tbool, $1);
+ $0->val.bool = 1;
+ }$
+ | False ${
+ $0 = new_val(Tbool, $1);
+ $0->val.bool = 0;
+ }$
+ | NUMBER ${
+ $0 = new_val(Tnum, $1);
+ {
+ char tail[3];
+ if (number_parse($0->val.num, tail, $1.txt) == 0)
+ mpq_init($0->val.num); // UNTESTED
+ if (tail[0])
+ tok_err(c, "error: unsupported number suffix",
+ &$1);
+ }
+ }$
+ | STRING ${
+ $0 = new_val(Tstr, $1);
+ {
+ char tail[3];
+ string_parse(&$1, '\\', &$0->val.str, tail);
+ if (tail[0])
+ tok_err(c, "error: unsupported string suffix",
+ &$1);
+ }
+ }$
+ | MULTI_STRING ${
+ $0 = new_val(Tstr, $1);
+ {
+ char tail[3];
+ string_parse(&$1, '\\', &$0->val.str, tail);
+ if (tail[0])
+ tok_err(c, "error: unsupported string suffix",
+ &$1);
+ }
+ }$
+
+###### print exec cases
+ case Xval:
+ {
+ struct val *v = cast(val, e);
+ if (v->vtype == Tstr)
+ printf("\"");
+ print_value(v->vtype, &v->val, stdout);
+ if (v->vtype == Tstr)
+ printf("\"");
+ break;
+ }
+
+###### propagate exec cases
+ case Xval:
+ {
+ struct val *val = cast(val, prog);
+ if (!type_compat(type, val->vtype, rules))
+ type_err(c, "error: expected %1%r found %2",
+ prog, type, rules, val->vtype);
+ return val->vtype;
+ }
+
+###### interp exec cases
+ case Xval:
+ rvtype = cast(val, e)->vtype;
+ dup_value(rvtype, &cast(val, e)->val, &rv);
+ break;
+
+###### ast functions
+ static void free_val(struct val *v)
+ {
+ if (v)
+ free_value(v->vtype, &v->val);
+ free(v);
+ }
+
+###### free exec cases
+ case Xval: free_val(cast(val, e)); break;
+
+###### ast functions
+ // Move all nodes from 'b' to 'rv', reversing their order.
+ // In 'b' 'left' is a list, and 'right' is the last node.
+ // In 'rv', left' is the first node and 'right' is a list.
+ static struct binode *reorder_bilist(struct binode *b)
+ {
+ struct binode *rv = NULL;
+
+ while (b) {
+ struct exec *t = b->right;
+ b->right = rv;
+ rv = b;
+ if (b->left)
+ b = cast(binode, b->left);
+ else
+ b = NULL;
+ rv->left = t;
+ }
+ return rv;
+ }
+
+### Variables
+
+Variables are scoped named values. We store the names in a linked list
+of "bindings" sorted in lexical order, and use sequential search and
+insertion sort.
+
+###### ast
+
+ struct binding {
+ struct text name;
+ struct binding *next; // in lexical order
+ ## binding fields
+ };
+
+This linked list is stored in the parse context so that "reduce"
+functions can find or add variables, and so the analysis phase can
+ensure that every variable gets a type.
+
+###### parse context
+
+ struct binding *varlist; // In lexical order
+
+###### ast functions
+
+ static struct binding *find_binding(struct parse_context *c, struct text s)
+ {
+ struct binding **l = &c->varlist;
+ struct binding *n;
+ int cmp = 1;
+
+ while (*l &&
+ (cmp = text_cmp((*l)->name, s)) < 0)
+ l = & (*l)->next;
+ if (cmp == 0)
+ return *l;
+ n = calloc(1, sizeof(*n));
+ n->name = s;
+ n->next = *l;
+ *l = n;
+ return n;
+ }
+
+Each name can be linked to multiple variables defined in different
+scopes. Each scope starts where the name is declared and continues
+until the end of the containing code block. Scopes of a given name
+cannot nest, so a declaration while a name is in-scope is an error.
+
+###### binding fields
+ struct variable *var;
+
+###### ast
+ struct variable {
+ struct variable *previous;
+ struct type *type;
+ struct binding *name;
+ struct exec *where_decl;// where name was declared
+ struct exec *where_set; // where type was set
+ ## variable fields
+ };
+
+When a scope closes, the values of the variables might need to be freed.
+This happens in the context of some `struct exec` and each `exec` will
+need to know which variables need to be freed when it completes.
+
+####### exec fields
+ struct variable *to_free;
+
+####### variable fields
+ struct exec *cleanup_exec;
+ struct variable *next_free;
+
+####### interp exec cleanup
+ {
+ struct variable *v;
+ for (v = e->to_free; v; v = v->next_free) {
+ struct value *val = var_value(c, v);
+ free_value(v->type, val);
+ }
+ }
+
+###### ast functions
+ static void variable_unlink_exec(struct variable *v)
+ {
+ struct variable **vp;
+ if (!v->cleanup_exec)
+ return;
+ for (vp = &v->cleanup_exec->to_free;
+ *vp; vp = &(*vp)->next_free) {
+ if (*vp != v)
+ continue;
+ *vp = v->next_free;
+ v->cleanup_exec = NULL;
+ break;
+ }
+ }
+
+While the naming seems strange, we include local constants in the
+definition of variables. A name declared `var := value` can
+subsequently be changed, but a name declared `var ::= value` cannot -
+it is constant
+
+###### variable fields
+ int constant;
+
+Scopes in parallel branches can be partially merged. More
+specifically, if a given name is declared in both branches of an
+if/else then its scope is a candidate for merging. Similarly if
+every branch of an exhaustive switch (e.g. has an "else" clause)
+declares a given name, then the scopes from the branches are
+candidates for merging.
+
+Note that names declared inside a loop (which is only parallel to
+itself) are never visible after the loop. Similarly names defined in
+scopes which are not parallel, such as those started by `for` and
+`switch`, are never visible after the scope. Only variables defined in
+both `then` and `else` (including the implicit then after an `if`, and
+excluding `then` used with `for`) and in all `case`s and `else` of a
+`switch` or `while` can be visible beyond the `if`/`switch`/`while`.
+
+Labels, which are a bit like variables, follow different rules.
+Labels are not explicitly declared, but if an undeclared name appears
+in a context where a label is legal, that effectively declares the
+name as a label. The declaration remains in force (or in scope) at
+least to the end of the immediately containing block and conditionally
+in any larger containing block which does not declare the name in some
+other way. Importantly, the conditional scope extension happens even
+if the label is only used in one parallel branch of a conditional --
+when used in one branch it is treated as having been declared in all
+branches.
+
+Merge candidates are tentatively visible beyond the end of the
+branching statement which creates them. If the name is used, the
+merge is affirmed and they become a single variable visible at the
+outer layer. If not - if it is redeclared first - the merge lapses.
+
+To track scopes we have an extra stack, implemented as a linked list,
+which roughly parallels the parse stack and which is used exclusively
+for scoping. When a new scope is opened, a new frame is pushed and
+the child-count of the parent frame is incremented. This child-count
+is used to distinguish between the first of a set of parallel scopes,
+in which declared variables must not be in scope, and subsequent
+branches, whether they may already be conditionally scoped.
+
+We need a total ordering of scopes so we can easily compare to variables
+to see if they are concurrently in scope. To achieve this we record a
+`scope_count` which is actually a count of both beginnings and endings
+of scopes. Then each variable has a record of the scope count where it
+enters scope, and where it leaves.
+
+To push a new frame *before* any code in the frame is parsed, we need a
+grammar reduction. This is most easily achieved with a grammar
+element which derives the empty string, and creates the new scope when
+it is recognised. This can be placed, for example, between a keyword
+like "if" and the code following it.
+
+###### ast
+ struct scope {
+ struct scope *parent;
+ int child_count;
+ };
+
+###### parse context
+ int scope_depth;
+ int scope_count;
+ struct scope *scope_stack;
+
+###### variable fields
+ int scope_start, scope_end;
+
+###### ast functions
+ static void scope_pop(struct parse_context *c)
+ {
+ struct scope *s = c->scope_stack;
+
+ c->scope_stack = s->parent;
+ free(s);
+ c->scope_depth -= 1;
+ c->scope_count += 1;
+ }
+
+ static void scope_push(struct parse_context *c)
+ {
+ struct scope *s = calloc(1, sizeof(*s));
+ if (c->scope_stack)
+ c->scope_stack->child_count += 1;
+ s->parent = c->scope_stack;
+ c->scope_stack = s;
+ c->scope_depth += 1;
+ c->scope_count += 1;
+ }
+
+###### Grammar
+
+ $void
+ OpenScope -> ${ scope_push(c); }$
+
+Each variable records a scope depth and is in one of four states:
+
+- "in scope". This is the case between the declaration of the
+ variable and the end of the containing block, and also between
+ the usage with affirms a merge and the end of that block.
+
+ The scope depth is not greater than the current parse context scope
+ nest depth. When the block of that depth closes, the state will
+ change. To achieve this, all "in scope" variables are linked
+ together as a stack in nesting order.
+
+- "pending". The "in scope" block has closed, but other parallel
+ scopes are still being processed. So far, every parallel block at
+ the same level that has closed has declared the name.
+
+ The scope depth is the depth of the last parallel block that
+ enclosed the declaration, and that has closed.
+
+- "conditionally in scope". The "in scope" block and all parallel
+ scopes have closed, and no further mention of the name has been seen.
+ This state includes a secondary nest depth (`min_depth`) which records
+ the outermost scope seen since the variable became conditionally in
+ scope. If a use of the name is found, the variable becomes "in scope"
+ and that secondary depth becomes the recorded scope depth. If the
+ name is declared as a new variable, the old variable becomes "out of
+ scope" and the recorded scope depth stays unchanged.
+
+- "out of scope". The variable is neither in scope nor conditionally
+ in scope. It is permanently out of scope now and can be removed from
+ the "in scope" stack. When a variable becomes out-of-scope it is
+ moved to a separate list (`out_scope`) of variables which have fully
+ known scope. This will be used at the end of each function to assign
+ each variable a place in the stack frame.
+
+###### variable fields
+ int depth, min_depth;
+ enum { OutScope, PendingScope, CondScope, InScope } scope;
+ struct variable *in_scope;
+
+###### parse context
+
+ struct variable *in_scope;
+ struct variable *out_scope;
+
+All variables with the same name are linked together using the
+'previous' link. Those variable that have been affirmatively merged all
+have a 'merged' pointer that points to one primary variable - the most
+recently declared instance. When merging variables, we need to also
+adjust the 'merged' pointer on any other variables that had previously
+been merged with the one that will no longer be primary.
+
+A variable that is no longer the most recent instance of a name may
+still have "pending" scope, if it might still be merged with most
+recent instance. These variables don't really belong in the
+"in_scope" list, but are not immediately removed when a new instance
+is found. Instead, they are detected and ignored when considering the
+list of in_scope names.
+
+The storage of the value of a variable will be described later. For now
+we just need to know that when a variable goes out of scope, it might
+need to be freed. For this we need to be able to find it, so assume that
+`var_value()` will provide that.
+
+###### variable fields
+ struct variable *merged;
+
+###### ast functions
+
+ static void variable_merge(struct variable *primary, struct variable *secondary)
+ {
+ struct variable *v;
+
+ primary = primary->merged;
+
+ for (v = primary->previous; v; v=v->previous)
+ if (v == secondary || v == secondary->merged ||
+ v->merged == secondary ||
+ v->merged == secondary->merged) {
+ v->scope = OutScope;
+ v->merged = primary;
+ if (v->scope_start < primary->scope_start)
+ primary->scope_start = v->scope_start;
+ if (v->scope_end > primary->scope_end)
+ primary->scope_end = v->scope_end; // NOTEST
+ variable_unlink_exec(v);
+ }
+ }
+
+###### forward decls
+ static struct value *var_value(struct parse_context *c, struct variable *v);
+
+###### free global vars
+
+ while (context.varlist) {
+ struct binding *b = context.varlist;
+ struct variable *v = b->var;
+ context.varlist = b->next;
+ free(b);
+ while (v) {
+ struct variable *next = v->previous;
+
+ if (v->global) {
+ free_value(v->type, var_value(&context, v));
+ if (v->depth == 0)
+ // This is a global constant
+ free_exec(v->where_decl);
+ }
+ free(v);
+ v = next;
+ }
+ }
+
+#### Manipulating Bindings
+
+When a name is conditionally visible, a new declaration discards the old
+binding - the condition lapses. Similarly when we reach the end of a
+function (outermost non-global scope) any conditional scope must lapse.
+Conversely a usage of the name affirms the visibility and extends it to
+the end of the containing block - i.e. the block that contains both the
+original declaration and the latest usage. This is determined from
+`min_depth`. When a conditionally visible variable gets affirmed like
+this, it is also merged with other conditionally visible variables with
+the same name.
+
+When we parse a variable declaration we either report an error if the
+name is currently bound, or create a new variable at the current nest
depth if the name is unbound or bound to a conditionally scoped or
pending-scope variable. If the previous variable was conditionally
scoped, it and its homonyms becomes out-of-scope.
pos = done->frame_pos + done->type->size;
else
pos = ft->function.local_size;
- if (pos & (t->align - 1))
- pos = (pos + t->align) & ~(t->align-1);
- v->frame_pos = pos;
- if (size < pos + v->type->size)
- size = pos + v->type->size;
- v->in_scope = done;
- done = v;
- }
- c->out_scope = NULL;
- ft->function.local_size = size;
- }
-
-###### free context storage
- free(context.global);
-
-### Executables
-
-Executables can be lots of different things. In many cases an
-executable is just an operation combined with one or two other
-executables. This allows for expressions and lists etc. Other times an
-executable is something quite specific like a constant or variable name.
-So we define a `struct exec` to be a general executable with a type, and
-a `struct binode` which is a subclass of `exec`, forms a node in a
-binary tree, and holds an operation. There will be other subclasses,
-and to access these we need to be able to `cast` the `exec` into the
-various other types. The first field in any `struct exec` is the type
-from the `exec_types` enum.
-
-###### macros
- #define cast(structname, pointer) ({ \
- const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
- if (__mptr && *__mptr != X##structname) abort(); \
- (struct structname *)( (char *)__mptr);})
-
- #define new(structname) ({ \
- struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
- __ptr->type = X##structname; \
- __ptr->line = -1; __ptr->column = -1; \
- __ptr;})
-
- #define new_pos(structname, token) ({ \
- struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
- __ptr->type = X##structname; \
- __ptr->line = token.line; __ptr->column = token.col; \
- __ptr;})
-
-###### ast
- enum exec_types {
- Xbinode,
- ## exec type
- };
- struct exec {
- enum exec_types type;
- int line, column;
- ## exec fields
- };
- struct binode {
- struct exec;
- enum Btype {
- ## Binode types
- } op;
- struct exec *left, *right;
- };
-
-###### ast functions
-
- static int __fput_loc(struct exec *loc, FILE *f)
- {
- if (!loc)
- return 0;
- if (loc->line >= 0) {
- fprintf(f, "%d:%d: ", loc->line, loc->column);
- return 1;
- }
- if (loc->type == Xbinode)
- return __fput_loc(cast(binode,loc)->left, f) ||
- __fput_loc(cast(binode,loc)->right, f); // NOTEST
- return 0;
- }
- static void fput_loc(struct exec *loc, FILE *f)
- {
- if (!__fput_loc(loc, f))
- fprintf(f, "??:??: ");
- }
-
-Each different type of `exec` node needs a number of functions defined,
-a bit like methods. We must be able to free it, print it, analyse it
-and execute it. Once we have specific `exec` types we will need to
-parse them too. Let's take this a bit more slowly.
-
-#### Freeing
-
-The parser generator requires a `free_foo` function for each struct
-that stores attributes and they will often be `exec`s and subtypes
-there-of. So we need `free_exec` which can handle all the subtypes,
-and we need `free_binode`.
-
-###### ast functions
-
- static void free_binode(struct binode *b)
- {
- if (!b)
- return;
- free_exec(b->left);
- free_exec(b->right);
- free(b);
- }
-
-###### core functions
- static void free_exec(struct exec *e)
- {
- if (!e)
- return;
- switch(e->type) {
- ## free exec cases
- }
- }
-
-###### forward decls
-
- static void free_exec(struct exec *e);
-
-###### free exec cases
- case Xbinode: free_binode(cast(binode, e)); break;
-
-#### Printing
-
-Printing an `exec` requires that we know the current indent level for
-printing line-oriented components. As will become clear later, we
-also want to know what sort of bracketing to use.
-
-###### ast functions
-
- static void do_indent(int i, char *str)
- {
- while (i-- > 0)
- printf(" ");
- printf("%s", str);
- }
-
-###### core functions
- static void print_binode(struct binode *b, int indent, int bracket)
- {
- struct binode *b2;
- switch(b->op) {
- ## print binode cases
- }
- }
-
- static void print_exec(struct exec *e, int indent, int bracket)
- {
- if (!e)
- return;
- switch (e->type) {
- case Xbinode:
- print_binode(cast(binode, e), indent, bracket); break;
- ## print exec cases
- }
- if (e->to_free) {
- struct variable *v;
- do_indent(indent, "/* FREE");
- for (v = e->to_free; v; v = v->next_free) {
- printf(" %.*s", v->name->name.len, v->name->name.txt);
- printf("[%d,%d]", v->scope_start, v->scope_end);
- if (v->frame_pos >= 0)
- printf("(%d+%d)", v->frame_pos,
- v->type ? v->type->size:0);
- }
- printf(" */\n");
+ if (pos & (t->align - 1))
+ pos = (pos + t->align) & ~(t->align-1);
+ v->frame_pos = pos;
+ if (size < pos + v->type->size)
+ size = pos + v->type->size;
+ v->in_scope = done;
+ done = v;
}
+ c->out_scope = NULL;
+ ft->function.local_size = size;
}
-###### forward decls
+###### free context storage
+ free(context.global);
- static void print_exec(struct exec *e, int indent, int bracket);
+#### Variables as executables
-#### Analysing
+Just as we used a `val` to wrap a value into an `exec`, we similarly
+need a `var` to wrap a `variable` into an exec. While each `val`
+contained a copy of the value, each `var` holds a link to the variable
+because it really is the same variable no matter where it appears.
+When a variable is used, we need to remember to follow the `->merged`
+link to find the primary instance.
-As discussed, analysis involves propagating type requirements around the
-program and looking for errors.
+When a variable is declared, it may or may not be given an explicit
+type. We need to record which so that we can report the parsed code
+correctly.
-So `propagate_types` is passed an expected type (being a `struct type`
-pointer together with some `val_rules` flags) that the `exec` is
-expected to return, and returns the type that it does return, either
-of which can be `NULL` signifying "unknown". An `ok` flag is passed
-by reference. It is set to `0` when an error is found, and `2` when
-any change is made. If it remains unchanged at `1`, then no more
-propagation is needed.
+###### exec type
+ Xvar,
###### ast
+ struct var {
+ struct exec;
+ struct variable *var;
+ };
- enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 1<<2};
-
-###### format cases
- case 'r':
- if (rules & Rnolabel)
- fputs(" (labels not permitted)", stderr);
- break;
+###### variable fields
+ int explicit_type;
-###### forward decls
- static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
- struct type *type, int rules);
-###### core functions
+###### Grammar
- static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
- struct type *type, int rules)
- {
- struct type *t;
+ $TERM : ::
- if (!prog)
- return Tnone;
+ $*var
+ VariableDecl -> IDENTIFIER : ${ {
+ struct variable *v = var_decl(c, $1.txt);
+ $0 = new_pos(var, $1);
+ $0->var = v;
+ if (v)
+ v->where_decl = $0;
+ else {
+ v = var_ref(c, $1.txt);
+ $0->var = v;
+ type_err(c, "error: variable '%v' redeclared",
+ $0, NULL, 0, NULL);
+ type_err(c, "info: this is where '%v' was first declared",
+ v->where_decl, NULL, 0, NULL);
+ }
+ } }$
+ | IDENTIFIER :: ${ {
+ struct variable *v = var_decl(c, $1.txt);
+ $0 = new_pos(var, $1);
+ $0->var = v;
+ if (v) {
+ v->where_decl = $0;
+ v->constant = 1;
+ } else {
+ v = var_ref(c, $1.txt);
+ $0->var = v;
+ type_err(c, "error: variable '%v' redeclared",
+ $0, NULL, 0, NULL);
+ type_err(c, "info: this is where '%v' was first declared",
+ v->where_decl, NULL, 0, NULL);
+ }
+ } }$
+ | IDENTIFIER : Type ${ {
+ struct variable *v = var_decl(c, $1.txt);
+ $0 = new_pos(var, $1);
+ $0->var = v;
+ if (v) {
+ v->where_decl = $0;
+ v->where_set = $0;
+ v->type = $<Type;
+ v->explicit_type = 1;
+ } else {
+ v = var_ref(c, $1.txt);
+ $0->var = v;
+ type_err(c, "error: variable '%v' redeclared",
+ $0, NULL, 0, NULL);
+ type_err(c, "info: this is where '%v' was first declared",
+ v->where_decl, NULL, 0, NULL);
+ }
+ } }$
+ | IDENTIFIER :: Type ${ {
+ struct variable *v = var_decl(c, $1.txt);
+ $0 = new_pos(var, $1);
+ $0->var = v;
+ if (v) {
+ v->where_decl = $0;
+ v->where_set = $0;
+ v->type = $<Type;
+ v->constant = 1;
+ v->explicit_type = 1;
+ } else {
+ v = var_ref(c, $1.txt);
+ $0->var = v;
+ type_err(c, "error: variable '%v' redeclared",
+ $0, NULL, 0, NULL);
+ type_err(c, "info: this is where '%v' was first declared",
+ v->where_decl, NULL, 0, NULL);
+ }
+ } }$
- switch (prog->type) {
- case Xbinode:
- {
- struct binode *b = cast(binode, prog);
- switch (b->op) {
- ## propagate binode cases
+ $*exec
+ Variable -> IDENTIFIER ${ {
+ struct variable *v = var_ref(c, $1.txt);
+ $0 = new_pos(var, $1);
+ if (v == NULL) {
+ /* This might be a label - allocate a var just in case */
+ v = var_decl(c, $1.txt);
+ if (v) {
+ v->type = Tnone;
+ v->where_decl = $0;
+ v->where_set = $0;
}
- break;
- }
- ## propagate exec cases
}
- return Tnone;
- }
+ cast(var, $0)->var = v;
+ } }$
- static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
- struct type *type, int rules)
+###### print exec cases
+ case Xvar:
{
- struct type *ret = __propagate_types(prog, c, ok, type, rules);
-
- if (c->parse_error)
- *ok = 0;
- return ret;
+ struct var *v = cast(var, e);
+ if (v->var) {
+ struct binding *b = v->var->name;
+ printf("%.*s", b->name.len, b->name.txt);
+ }
+ break;
}
-#### Interpreting
-
-Interpreting an `exec` doesn't require anything but the `exec`. State
-is stored in variables and each variable will be directly linked from
-within the `exec` tree. The exception to this is the `main` function
-which needs to look at command line arguments. This function will be
-interpreted separately.
-
-Each `exec` can return a value combined with a type in `struct lrval`.
-The type may be `Tnone` but must be non-NULL. Some `exec`s will return
-the location of a value, which can be updated, in `lval`. Others will
-set `lval` to NULL indicating that there is a value of appropriate type
-in `rval`.
-
-###### core functions
-
- struct lrval {
- struct type *type;
- struct value rval, *lval;
- };
-
- /* If dest is passed, dtype must give the expected type, and
- * result can go there, in which case type is returned as NULL.
- */
- static struct lrval _interp_exec(struct parse_context *c, struct exec *e,
- struct value *dest, struct type *dtype);
-
- static struct value interp_exec(struct parse_context *c, struct exec *e,
- struct type **typeret)
- {
- struct lrval ret = _interp_exec(c, e, NULL, NULL);
+###### format cases
+ case 'v':
+ if (loc && loc->type == Xvar) {
+ struct var *v = cast(var, loc);
+ if (v->var) {
+ struct binding *b = v->var->name;
+ fprintf(stderr, "%.*s", b->name.len, b->name.txt);
+ } else
+ fputs("???", stderr); // NOTEST
+ } else
+ fputs("NOTVAR", stderr);
+ break;
- if (!ret.type) abort();
- if (typeret)
- *typeret = ret.type;
- if (ret.lval)
- dup_value(ret.type, ret.lval, &ret.rval);
- return ret.rval;
- }
+###### propagate exec cases
- static struct value *linterp_exec(struct parse_context *c, struct exec *e,
- struct type **typeret)
+ case Xvar:
{
- struct lrval ret = _interp_exec(c, e, NULL, NULL);
-
- if (!ret.type) abort();
- if (ret.lval)
- *typeret = ret.type;
- else
- free_value(ret.type, &ret.rval);
- return ret.lval;
+ struct var *var = cast(var, prog);
+ struct variable *v = var->var;
+ if (!v) {
+ type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
+ return Tnone; // NOTEST
+ }
+ v = v->merged;
+ if (v->constant && (rules & Rnoconstant)) {
+ type_err(c, "error: Cannot assign to a constant: %v",
+ prog, NULL, 0, NULL);
+ type_err(c, "info: name was defined as a constant here",
+ v->where_decl, NULL, 0, NULL);
+ return v->type;
+ }
+ if (v->type == Tnone && v->where_decl == prog)
+ type_err(c, "error: variable used but not declared: %v",
+ prog, NULL, 0, NULL);
+ if (v->type == NULL) {
+ if (type && *ok != 0) {
+ v->type = type;
+ v->where_set = prog;
+ *ok = 2;
+ }
+ return type;
+ }
+ if (!type_compat(type, v->type, rules)) {
+ type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
+ type, rules, v->type);
+ type_err(c, "info: this is where '%v' was set to %1", v->where_set,
+ v->type, rules, NULL);
+ }
+ if (!type)
+ return v->type;
+ return type;
}
- /* dinterp_exec is used when the destination type is certain and
- * the value has a place to go.
- */
- static void dinterp_exec(struct parse_context *c, struct exec *e,
- struct value *dest, struct type *dtype,
- int need_free)
+###### interp exec cases
+ case Xvar:
{
- struct lrval ret = _interp_exec(c, e, dest, dtype);
- if (!ret.type)
- return;
- if (need_free)
- free_value(dtype, dest);
- if (ret.lval)
- dup_value(dtype, ret.lval, dest);
- else
- memcpy(dest, &ret.rval, dtype->size);
+ struct var *var = cast(var, e);
+ struct variable *v = var->var;
+
+ v = v->merged;
+ lrv = var_value(c, v);
+ rvtype = v->type;
+ break;
}
- static struct lrval _interp_exec(struct parse_context *c, struct exec *e,
- struct value *dest, struct type *dtype)
+###### ast functions
+
+ static void free_var(struct var *v)
{
- /* If the result is copied to dest, ret.type is set to NULL */
- struct lrval ret;
- struct value rv = {}, *lrv = NULL;
- struct type *rvtype;
+ free(v);
+ }
- rvtype = ret.type = Tnone;
- if (!e) {
- ret.lval = lrv;
- ret.rval = rv;
- return ret;
- }
+###### free exec cases
+ case Xvar: free_var(cast(var, e)); break;
- switch(e->type) {
- case Xbinode:
- {
- struct binode *b = cast(binode, e);
- struct value left, right, *lleft;
- struct type *ltype, *rtype;
- ltype = rtype = Tnone;
- switch (b->op) {
- ## interp binode cases
- }
- free_value(ltype, &left);
- free_value(rtype, &right);
- break;
- }
- ## interp exec cases
- }
- if (rvtype) {
- ret.lval = lrv;
- ret.rval = rv;
- ret.type = rvtype;
- }
- ## interp exec cleanup
- return ret;
- }
### Complex types
complex types and connected them in to the data structures and the
different phases of parse, analyse, print, interpret.
-Thus far we have arrays and structs.
+Being "complex" the language will naturally have syntax to access
+specifics of objects of these types. These will fit into the grammar as
+"Terms" which are the things that are combined with various operators to
+form "Expression". Where a Term is formed by some operation on another
+Term, the subordinate Term will always come first, so for example a
+member of an array will be expressed as the Term for the array followed
+by an index in square brackets. The strict rule of using postfix
+operations makes precedence irrelevant within terms. To provide a place
+to put the grammar for each terms of each type, we will start out by
+introducing the "Term" grammar production, with contains at least a
+simple "Value" (to be explained later).
+
+###### Grammar
+ $*exec
+ Term -> Value ${ $0 = $<1; }$
+ | Variable ${ $0 = $<1; }$
+ ## term grammar
+
+Thus far the complex types we have are arrays and structs.
#### Arrays
###### Binode types
Index,
-###### variable grammar
+###### term grammar
- | Variable [ Expression ] ${ {
+ | Term [ Expression ] ${ {
struct binode *b = new(binode);
b->op = Index;
b->left = $<1;
###### declare terminals
$TERM struct .
-###### variable grammar
+###### term grammar
- | Variable . IDENTIFIER ${ {
+ | Term . IDENTIFIER ${ {
struct fieldref *fr = new_pos(fieldref, $2);
fr->left = $<1;
fr->name = $3.txt;
struct field *fl = t->structure.fields + i;
if (i)
fprintf(f, "; ");
- fprintf(f, "%.*s:", fl->name.len, fl->name.txt);
- type_print(fl->type, f);
- }
- fprintf(f, ")");
- } else
- type_print(type->function.return_type, f);
- }
- fprintf(f, "\n");
- }
-
- static void function_free_type(struct type *t)
- {
- free_exec(t->function.params);
- }
-
- static struct type function_prototype = {
- .size = sizeof(void*),
- .align = sizeof(void*),
- .free = function_free,
- .compat = function_compat,
- .check_args = function_check_args,
- .print = function_print,
- .print_type_decl = function_print_type_decl,
- .free_type = function_free_type,
- };
-
-###### declare terminals
-
- $TERM func
-
-###### Binode types
- List,
-
-###### Grammar
-
- $*variable
- FuncName -> IDENTIFIER ${ {
- struct variable *v = var_decl(c, $1.txt);
- struct var *e = new_pos(var, $1);
- e->var = v;
- if (v) {
- v->where_decl = e;
- $0 = v;
- } else {
- v = var_ref(c, $1.txt);
- e->var = v;
- type_err(c, "error: function '%v' redeclared",
- e, NULL, 0, NULL);
- type_err(c, "info: this is where '%v' was first declared",
- v->where_decl, NULL, 0, NULL);
- free_exec(e);
- }
- } }$
-
- $*binode
- Args -> ArgsLine NEWLINE ${ $0 = $<AL; }$
- | Args ArgsLine NEWLINE ${ {
- struct binode *b = $<AL;
- struct binode **bp = &b;
- while (*bp)
- bp = (struct binode **)&(*bp)->left;
- *bp = $<A;
- $0 = b;
- } }$
-
- ArgsLine -> ${ $0 = NULL; }$
- | Varlist ${ $0 = $<1; }$
- | Varlist ; ${ $0 = $<1; }$
-
- Varlist -> Varlist ; ArgDecl ${
- $0 = new(binode);
- $0->op = List;
- $0->left = $<Vl;
- $0->right = $<AD;
- }$
- | ArgDecl ${
- $0 = new(binode);
- $0->op = List;
- $0->left = NULL;
- $0->right = $<AD;
- }$
-
- $*var
- ArgDecl -> IDENTIFIER : FormalType ${ {
- struct variable *v = var_decl(c, $1.txt);
- $0 = new(var);
- $0->var = v;
- v->type = $<FT;
- } }$
-
-## Executables: the elements of code
-
-Each code element needs to be parsed, printed, analysed,
-interpreted, and freed. There are several, so let's just start with
-the easy ones and work our way up.
-
-### Values
-
-We have already met values as separate objects. When manifest
-constants appear in the program text, that must result in an executable
-which has a constant value. So the `val` structure embeds a value in
-an executable.
-
-###### exec type
- Xval,
-
-###### ast
- struct val {
- struct exec;
- struct type *vtype;
- struct value val;
- };
-
-###### ast functions
- struct val *new_val(struct type *T, struct token tk)
- {
- struct val *v = new_pos(val, tk);
- v->vtype = T;
- return v;
- }
-
-###### Grammar
-
- $TERM True False
-
- $*val
- Value -> True ${
- $0 = new_val(Tbool, $1);
- $0->val.bool = 1;
- }$
- | False ${
- $0 = new_val(Tbool, $1);
- $0->val.bool = 0;
- }$
- | NUMBER ${
- $0 = new_val(Tnum, $1);
- {
- char tail[3];
- if (number_parse($0->val.num, tail, $1.txt) == 0)
- mpq_init($0->val.num); // UNTESTED
- if (tail[0])
- tok_err(c, "error: unsupported number suffix",
- &$1);
- }
- }$
- | STRING ${
- $0 = new_val(Tstr, $1);
- {
- char tail[3];
- string_parse(&$1, '\\', &$0->val.str, tail);
- if (tail[0])
- tok_err(c, "error: unsupported string suffix",
- &$1);
- }
- }$
- | MULTI_STRING ${
- $0 = new_val(Tstr, $1);
- {
- char tail[3];
- string_parse(&$1, '\\', &$0->val.str, tail);
- if (tail[0])
- tok_err(c, "error: unsupported string suffix",
- &$1);
- }
- }$
-
-###### print exec cases
- case Xval:
- {
- struct val *v = cast(val, e);
- if (v->vtype == Tstr)
- printf("\"");
- print_value(v->vtype, &v->val, stdout);
- if (v->vtype == Tstr)
- printf("\"");
- break;
- }
-
-###### propagate exec cases
- case Xval:
- {
- struct val *val = cast(val, prog);
- if (!type_compat(type, val->vtype, rules))
- type_err(c, "error: expected %1%r found %2",
- prog, type, rules, val->vtype);
- return val->vtype;
+ fprintf(f, "%.*s:", fl->name.len, fl->name.txt);
+ type_print(fl->type, f);
+ }
+ fprintf(f, ")");
+ } else
+ type_print(type->function.return_type, f);
+ }
+ fprintf(f, "\n");
}
-###### interp exec cases
- case Xval:
- rvtype = cast(val, e)->vtype;
- dup_value(rvtype, &cast(val, e)->val, &rv);
- break;
-
-###### ast functions
- static void free_val(struct val *v)
+ static void function_free_type(struct type *t)
{
- if (v)
- free_value(v->vtype, &v->val);
- free(v);
+ free_exec(t->function.params);
}
-###### free exec cases
- case Xval: free_val(cast(val, e)); break;
-
-###### ast functions
- // Move all nodes from 'b' to 'rv', reversing their order.
- // In 'b' 'left' is a list, and 'right' is the last node.
- // In 'rv', left' is the first node and 'right' is a list.
- static struct binode *reorder_bilist(struct binode *b)
- {
- struct binode *rv = NULL;
-
- while (b) {
- struct exec *t = b->right;
- b->right = rv;
- rv = b;
- if (b->left)
- b = cast(binode, b->left);
- else
- b = NULL;
- rv->left = t;
- }
- return rv;
- }
+ static struct type function_prototype = {
+ .size = sizeof(void*),
+ .align = sizeof(void*),
+ .free = function_free,
+ .compat = function_compat,
+ .check_args = function_check_args,
+ .print = function_print,
+ .print_type_decl = function_print_type_decl,
+ .free_type = function_free_type,
+ };
-### Variables
+###### declare terminals
-Just as we used a `val` to wrap a value into an `exec`, we similarly
-need a `var` to wrap a `variable` into an exec. While each `val`
-contained a copy of the value, each `var` holds a link to the variable
-because it really is the same variable no matter where it appears.
-When a variable is used, we need to remember to follow the `->merged`
-link to find the primary instance.
+ $TERM func
-When a variable is declared, it may or may not be given an explicit
-type. We need to record which so that we can report the parsed code
-correctly.
+###### Binode types
+ List,
-###### exec type
- Xvar,
+###### Grammar
-###### ast
- struct var {
- struct exec;
- struct variable *var;
- };
+ $*variable
+ FuncName -> IDENTIFIER ${ {
+ struct variable *v = var_decl(c, $1.txt);
+ struct var *e = new_pos(var, $1);
+ e->var = v;
+ if (v) {
+ v->where_decl = e;
+ $0 = v;
+ } else {
+ v = var_ref(c, $1.txt);
+ e->var = v;
+ type_err(c, "error: function '%v' redeclared",
+ e, NULL, 0, NULL);
+ type_err(c, "info: this is where '%v' was first declared",
+ v->where_decl, NULL, 0, NULL);
+ free_exec(e);
+ }
+ } }$
-###### variable fields
- int explicit_type;
+ $*binode
+ Args -> ArgsLine NEWLINE ${ $0 = $<AL; }$
+ | Args ArgsLine NEWLINE ${ {
+ struct binode *b = $<AL;
+ struct binode **bp = &b;
+ while (*bp)
+ bp = (struct binode **)&(*bp)->left;
+ *bp = $<A;
+ $0 = b;
+ } }$
-###### Grammar
+ ArgsLine -> ${ $0 = NULL; }$
+ | Varlist ${ $0 = $<1; }$
+ | Varlist ; ${ $0 = $<1; }$
- $TERM : ::
+ Varlist -> Varlist ; ArgDecl ${
+ $0 = new(binode);
+ $0->op = List;
+ $0->left = $<Vl;
+ $0->right = $<AD;
+ }$
+ | ArgDecl ${
+ $0 = new(binode);
+ $0->op = List;
+ $0->left = NULL;
+ $0->right = $<AD;
+ }$
$*var
- VariableDecl -> IDENTIFIER : ${ {
- struct variable *v = var_decl(c, $1.txt);
- $0 = new_pos(var, $1);
- $0->var = v;
- if (v)
- v->where_decl = $0;
- else {
- v = var_ref(c, $1.txt);
- $0->var = v;
- type_err(c, "error: variable '%v' redeclared",
- $0, NULL, 0, NULL);
- type_err(c, "info: this is where '%v' was first declared",
- v->where_decl, NULL, 0, NULL);
- }
- } }$
- | IDENTIFIER :: ${ {
+ ArgDecl -> IDENTIFIER : FormalType ${ {
struct variable *v = var_decl(c, $1.txt);
- $0 = new_pos(var, $1);
+ $0 = new(var);
$0->var = v;
- if (v) {
- v->where_decl = $0;
- v->constant = 1;
- } else {
- v = var_ref(c, $1.txt);
- $0->var = v;
- type_err(c, "error: variable '%v' redeclared",
- $0, NULL, 0, NULL);
- type_err(c, "info: this is where '%v' was first declared",
- v->where_decl, NULL, 0, NULL);
- }
+ v->type = $<FT;
} }$
- | IDENTIFIER : Type ${ {
- struct variable *v = var_decl(c, $1.txt);
- $0 = new_pos(var, $1);
- $0->var = v;
- if (v) {
- v->where_decl = $0;
- v->where_set = $0;
- v->type = $<Type;
- v->explicit_type = 1;
- } else {
- v = var_ref(c, $1.txt);
- $0->var = v;
- type_err(c, "error: variable '%v' redeclared",
- $0, NULL, 0, NULL);
- type_err(c, "info: this is where '%v' was first declared",
- v->where_decl, NULL, 0, NULL);
- }
+
+##### Function calls
+
+A function call can appear either as an expression or as a statement.
+We use a new 'Funcall' binode type to link the function with a list of
+arguments, form with the 'List' nodes.
+
+We have already seen the "Term" which is how a function call can appear
+in an expression. To parse a function call into a statement we include
+it in the "SimpleStatement Grammar" which will be described later.
+
+###### Binode types
+ Funcall,
+
+###### term grammar
+ | Term ( ExpressionList ) ${ {
+ struct binode *b = new(binode);
+ b->op = Funcall;
+ b->left = $<T;
+ b->right = reorder_bilist($<EL);
+ $0 = b;
} }$
- | IDENTIFIER :: Type ${ {
- struct variable *v = var_decl(c, $1.txt);
- $0 = new_pos(var, $1);
- $0->var = v;
- if (v) {
- v->where_decl = $0;
- v->where_set = $0;
- v->type = $<Type;
- v->constant = 1;
- v->explicit_type = 1;
- } else {
- v = var_ref(c, $1.txt);
- $0->var = v;
- type_err(c, "error: variable '%v' redeclared",
- $0, NULL, 0, NULL);
- type_err(c, "info: this is where '%v' was first declared",
- v->where_decl, NULL, 0, NULL);
- }
+ | Term ( ) ${ {
+ struct binode *b = new(binode);
+ b->op = Funcall;
+ b->left = $<T;
+ b->right = NULL;
+ $0 = b;
} }$
- $*exec
- Variable -> IDENTIFIER ${ {
- struct variable *v = var_ref(c, $1.txt);
- $0 = new_pos(var, $1);
- if (v == NULL) {
- /* This might be a label - allocate a var just in case */
- v = var_decl(c, $1.txt);
- if (v) {
- v->type = Tnone;
- v->where_decl = $0;
- v->where_set = $0;
- }
- }
- cast(var, $0)->var = v;
+###### SimpleStatement Grammar
+
+ | Term ( ExpressionList ) ${ {
+ struct binode *b = new(binode);
+ b->op = Funcall;
+ b->left = $<T;
+ b->right = reorder_bilist($<EL);
+ $0 = b;
} }$
- ## variable grammar
-###### print exec cases
- case Xvar:
- {
- struct var *v = cast(var, e);
- if (v->var) {
- struct binding *b = v->var->name;
- printf("%.*s", b->name.len, b->name.txt);
- }
- break;
- }
+###### print binode cases
-###### format cases
- case 'v':
- if (loc && loc->type == Xvar) {
- struct var *v = cast(var, loc);
- if (v->var) {
- struct binding *b = v->var->name;
- fprintf(stderr, "%.*s", b->name.len, b->name.txt);
- } else
- fputs("???", stderr); // NOTEST
- } else
- fputs("NOTVAR", stderr);
+ case Funcall:
+ do_indent(indent, "");
+ print_exec(b->left, -1, bracket);
+ printf("(");
+ for (b = cast(binode, b->right); b; b = cast(binode, b->right)) {
+ if (b->left) {
+ printf(" ");
+ print_exec(b->left, -1, bracket);
+ if (b->right)
+ printf(",");
+ }
+ }
+ printf(")");
+ if (indent >= 0)
+ printf("\n");
break;
-###### propagate exec cases
+###### propagate binode cases
- case Xvar:
- {
- struct var *var = cast(var, prog);
- struct variable *v = var->var;
- if (!v) {
- type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
- return Tnone; // NOTEST
- }
- v = v->merged;
- if (v->constant && (rules & Rnoconstant)) {
- type_err(c, "error: Cannot assign to a constant: %v",
- prog, NULL, 0, NULL);
- type_err(c, "info: name was defined as a constant here",
- v->where_decl, NULL, 0, NULL);
- return v->type;
- }
- if (v->type == Tnone && v->where_decl == prog)
- type_err(c, "error: variable used but not declared: %v",
- prog, NULL, 0, NULL);
- if (v->type == NULL) {
- if (type && *ok != 0) {
- v->type = type;
- v->where_set = prog;
- *ok = 2;
- }
- return type;
- }
- if (!type_compat(type, v->type, rules)) {
- type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
- type, rules, v->type);
- type_err(c, "info: this is where '%v' was set to %1", v->where_set,
- v->type, rules, NULL);
+ case Funcall: {
+ /* Every arg must match formal parameter, and result
+ * is return type of function
+ */
+ struct binode *args = cast(binode, b->right);
+ struct var *v = cast(var, b->left);
+
+ if (!v->var->type || v->var->type->check_args == NULL) {
+ type_err(c, "error: attempt to call a non-function.",
+ prog, NULL, 0, NULL);
+ return NULL;
}
- if (!type)
- return v->type;
- return type;
+ v->var->type->check_args(c, ok, v->var->type, args);
+ return v->var->type->function.return_type;
}
-###### interp exec cases
- case Xvar:
- {
- struct var *var = cast(var, e);
- struct variable *v = var->var;
+###### interp binode cases
- v = v->merged;
- lrv = var_value(c, v);
- rvtype = v->type;
+ case Funcall: {
+ struct var *v = cast(var, b->left);
+ struct type *t = v->var->type;
+ void *oldlocal = c->local;
+ int old_size = c->local_size;
+ void *local = calloc(1, t->function.local_size);
+ struct value *fbody = var_value(c, v->var);
+ struct binode *arg = cast(binode, b->right);
+ struct binode *param = t->function.params;
+
+ while (param) {
+ struct var *pv = cast(var, param->left);
+ struct type *vtype = NULL;
+ struct value val = interp_exec(c, arg->left, &vtype);
+ struct value *lval;
+ c->local = local; c->local_size = t->function.local_size;
+ lval = var_value(c, pv->var);
+ c->local = oldlocal; c->local_size = old_size;
+ memcpy(lval, &val, vtype->size);
+ param = cast(binode, param->right);
+ arg = cast(binode, arg->right);
+ }
+ c->local = local; c->local_size = t->function.local_size;
+ if (t->function.inline_result && dtype) {
+ _interp_exec(c, fbody->function, NULL, NULL);
+ memcpy(dest, local, dtype->size);
+ rvtype = ret.type = NULL;
+ } else
+ rv = interp_exec(c, fbody->function, &rvtype);
+ c->local = oldlocal; c->local_size = old_size;
+ free(local);
break;
}
-###### ast functions
+## Complex executables: statements and expressions
- static void free_var(struct var *v)
- {
- free(v);
- }
+Now that we have types and values and variables and most of the basic
+Terms which provide access to these, we can explore the more complex
+code that combine all of these to get useful work done. Specifically
+statements and expressions.
-###### free exec cases
- case Xvar: free_var(cast(var, e)); break;
+Expressions are various combinations of Terms. We will use operator
+precedence to ensure correct parsing. The simplest Expression is just a
+Term - others will follow.
+
+###### Grammar
+
+ $*exec
+ Expression -> Term ${ $0 = $<Term; }$
+ ## expression grammar
### Expressions: Conditional
$LEFT if $$ifelse
-###### Grammar
+###### expression grammar
- $*exec
- Expression -> Expression if Expression else Expression $$ifelse ${ {
- struct binode *b1 = new(binode);
- struct binode *b2 = new(binode);
- b1->op = CondExpr;
- b1->left = $<3;
- b1->right = b2;
- b2->op = CondExpr;
- b2->left = $<1;
- b2->right = $<5;
- $0 = b1;
- } }$
- ## expression grammar
+ | Expression if Expression else Expression $$ifelse ${ {
+ struct binode *b1 = new(binode);
+ struct binode *b2 = new(binode);
+ b1->op = CondExpr;
+ b1->left = $<3;
+ b1->right = b2;
+ b2->op = CondExpr;
+ b2->left = $<1;
+ b2->right = $<5;
+ $0 = b1;
+ } }$
###### print binode cases
We also have a 'Bracket' operator which records where parentheses were
found. This makes it easy to reproduce these when printing. Possibly I
-should only insert brackets were needed for precedence.
+should only insert brackets were needed for precedence. Putting
+parentheses around an expression converts it into a Term,
###### Binode types
Plus, Minus,
$TERM ( )
###### expression grammar
- | Expression Eop Expression ${ {
- struct binode *b = new(binode);
- b->op = $2.op;
- b->left = $<1;
- b->right = $<3;
- $0 = b;
- } }$
+ | Expression Eop Expression ${ {
+ struct binode *b = new(binode);
+ b->op = $2.op;
+ b->left = $<1;
+ b->right = $<3;
+ $0 = b;
+ } }$
- | Expression Top Expression ${ {
- struct binode *b = new(binode);
- b->op = $2.op;
- b->left = $<1;
- b->right = $<3;
- $0 = b;
- } }$
+ | Expression Top Expression ${ {
+ struct binode *b = new(binode);
+ b->op = $2.op;
+ b->left = $<1;
+ b->right = $<3;
+ $0 = b;
+ } }$
- | ( Expression ) ${ {
- struct binode *b = new_pos(binode, $1);
- b->op = Bracket;
- b->right = $<2;
- $0 = b;
- } }$
- | Uop Expression ${ {
- struct binode *b = new(binode);
- b->op = $1.op;
- b->right = $<2;
- $0 = b;
- } }$
- | Value ${ $0 = $<1; }$
- | Variable ${ $0 = $<1; }$
+ | Uop Expression ${ {
+ struct binode *b = new(binode);
+ b->op = $1.op;
+ b->right = $<2;
+ $0 = b;
+ } }$
+
+###### term grammar
+
+ | ( Expression ) ${ {
+ struct binode *b = new_pos(binode, $1);
+ b->op = Bracket;
+ b->right = $<2;
+ $0 = b;
+ } }$
###### Grammar
return rv;
}
-### Function calls
-
-A function call can appear either as an expression or as a statement.
-As functions cannot yet return values, only the statement version will work.
-We use a new 'Funcall' binode type to link the function with a list of
-arguments, form with the 'List' nodes.
-
-###### Binode types
- Funcall,
-
-###### expression grammar
- | Variable ( ExpressionList ) ${ {
- struct binode *b = new(binode);
- b->op = Funcall;
- b->left = $<V;
- b->right = reorder_bilist($<EL);
- $0 = b;
- } }$
- | Variable ( ) ${ {
- struct binode *b = new(binode);
- b->op = Funcall;
- b->left = $<V;
- b->right = NULL;
- $0 = b;
- } }$
-
-###### SimpleStatement Grammar
-
- | Variable ( ExpressionList ) ${ {
- struct binode *b = new(binode);
- b->op = Funcall;
- b->left = $<V;
- b->right = reorder_bilist($<EL);
- $0 = b;
- } }$
-
-###### print binode cases
-
- case Funcall:
- do_indent(indent, "");
- print_exec(b->left, -1, bracket);
- printf("(");
- for (b = cast(binode, b->right); b; b = cast(binode, b->right)) {
- if (b->left) {
- printf(" ");
- print_exec(b->left, -1, bracket);
- if (b->right)
- printf(",");
- }
- }
- printf(")");
- if (indent >= 0)
- printf("\n");
- break;
-
-###### propagate binode cases
-
- case Funcall: {
- /* Every arg must match formal parameter, and result
- * is return type of function
- */
- struct binode *args = cast(binode, b->right);
- struct var *v = cast(var, b->left);
-
- if (!v->var->type || v->var->type->check_args == NULL) {
- type_err(c, "error: attempt to call a non-function.",
- prog, NULL, 0, NULL);
- return NULL;
- }
- v->var->type->check_args(c, ok, v->var->type, args);
- return v->var->type->function.return_type;
- }
-
-###### interp binode cases
-
- case Funcall: {
- struct var *v = cast(var, b->left);
- struct type *t = v->var->type;
- void *oldlocal = c->local;
- int old_size = c->local_size;
- void *local = calloc(1, t->function.local_size);
- struct value *fbody = var_value(c, v->var);
- struct binode *arg = cast(binode, b->right);
- struct binode *param = t->function.params;
-
- while (param) {
- struct var *pv = cast(var, param->left);
- struct type *vtype = NULL;
- struct value val = interp_exec(c, arg->left, &vtype);
- struct value *lval;
- c->local = local; c->local_size = t->function.local_size;
- lval = var_value(c, pv->var);
- c->local = oldlocal; c->local_size = old_size;
- memcpy(lval, &val, vtype->size);
- param = cast(binode, param->right);
- arg = cast(binode, arg->right);
- }
- c->local = local; c->local_size = t->function.local_size;
- if (t->function.inline_result && dtype) {
- _interp_exec(c, fbody->function, NULL, NULL);
- memcpy(dest, local, dtype->size);
- rvtype = ret.type = NULL;
- } else
- rv = interp_exec(c, fbody->function, &rvtype);
- c->local = oldlocal; c->local_size = old_size;
- free(local);
- break;
- }
-
### Blocks, Statements, and Statement lists.
Now that we have expressions out of the way we need to turn to
$TERM =
###### SimpleStatement Grammar
- | Variable = Expression ${
+ | Term = Expression ${
$0 = b= new(binode);
b->op = Assign;
b->left = $<1;