X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Foceani.mdc;h=c687fddca5ea7a17521fe730c81d8381d61bbb6e;hp=c89ecafc6c33890f838890eceb91b815e07575f3;hb=39f272e16649c2fe49184d31f668471ff49ee9ed;hpb=499033323e79fc5d2b757c2055ad5bd0f55bc7f3 diff --git a/csrc/oceani.mdc b/csrc/oceani.mdc index c89ecaf..c687fdd 100644 --- a/csrc/oceani.mdc +++ b/csrc/oceani.mdc @@ -37,9 +37,9 @@ boolean operators. Some operators that have only recently been added, and so have not generated all that much experience yet are "and then" and "or else" as -short-circuit Boolean operators, and the "if ... else" trinary -operator which can select between two expressions based on a third -(which appears syntactically in the middle). +short-circuit Boolean operators (which have since been remove), and the +"if ... else" trinary operator which can select between two expressions +based on a third (which appears syntactically in the middle). The "func" clause currently only allows a "main" function to be declared. That will be extended when proper function support is added. @@ -110,6 +110,7 @@ structures can be used. ## macros struct parse_context; ## ast + ## ast late struct parse_context { struct token_config config; char *file_name; @@ -130,7 +131,7 @@ structures can be used. struct parse_context *c = config2context(config); ###### Parser: code - + #define _GNU_SOURCE #include #include #include @@ -176,7 +177,7 @@ structures can be used. int fd; int len; char *file; - struct section *s, *ss; + struct section *s = NULL, *ss; char *section = NULL; struct parse_context context = { .config = { @@ -231,20 +232,22 @@ structures can be used. if (!ss) { fprintf(stderr, "oceani: cannot find section %s\n", section); - exit(1); + goto cleanup; } } else ss = s; // NOTEST if (!ss->code) { fprintf(stderr, "oceani: no code found in requested section\n"); // NOTEST - exit(1); // NOTEST + goto cleanup; // NOTEST } parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL); + resolve_consts(&context); + prepare_types(&context); if (!context.parse_error && !analyse_funcs(&context)) { fprintf(stderr, "oceani: type error in program - not running.\n"); - context.parse_error = 1; + context.parse_error += 1; } if (doprint) { @@ -254,16 +257,18 @@ structures can be used. } if (doexec && !context.parse_error) interp_main(&context, argc - optind, argv + optind); - + cleanup: while (s) { struct section *t = s->next; code_free(s->code); free(s); s = t; } - if (!context.parse_error) { - ## free global vars - } + // FIXME parser should pop scope even on error + while (context.scope_depth > 0) + scope_pop(&context); + ## free global vars + ## free const decls ## free context types ## free context storage exit(context.parse_error ? 1 : 0); @@ -362,13 +367,14 @@ context so indicate that parsing failed. static void fput_loc(struct exec *loc, FILE *f); static void type_err(struct parse_context *c, char *fmt, struct exec *loc, - struct type *t1, int rules, struct type *t2); + struct type *t1, enum val_rules rules, struct type *t2); + static void tok_err(struct parse_context *c, char *fmt, struct token *t); ###### core functions static void type_err(struct parse_context *c, char *fmt, struct exec *loc, - struct type *t1, int rules, struct type *t2) + struct type *t1, enum val_rules rules, struct type *t2) { fprintf(stderr, "%s:", c->file_name); fput_loc(loc, stderr); @@ -391,14 +397,14 @@ context so indicate that parsing failed. } } fputs("\n", stderr); - c->parse_error = 1; + c->parse_error += 1; } static void tok_err(struct parse_context *c, char *fmt, struct token *t) { fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt, t->txt.len, t->txt.txt); - c->parse_error = 1; + c->parse_error += 1; } ## Entities: declared and predeclared. @@ -411,731 +417,1423 @@ confusing) and introduced here. The following section will present the different specific code elements which comprise or manipulate these various entities. -### Types +### Executables -Values come in a wide range of types, with more likely to be added. -Each type needs to be able to print its own values (for convenience at -least) as well as to compare two values, at least for equality and -possibly for order. For now, values might need to be duplicated and -freed, though eventually such manipulations will be better integrated -into the language. +Executables can be lots of different things. In many cases an +executable is just an operation combined with one or two other +executables. This allows for expressions and lists etc. Other times an +executable is something quite specific like a constant or variable name. +So we define a `struct exec` to be a general executable with a type, and +a `struct binode` which is a subclass of `exec`, forms a node in a +binary tree, and holds an operation. The simplest operation is "List" +which can be used to combine several execs together. -Rather than requiring every numeric type to support all numeric -operations (add, multiple, etc), we allow types to be able to present -as one of a few standard types: integer, float, and fraction. The -existence of these conversion functions eventually enable types to -determine if they are compatible with other types, though such types -have not yet been implemented. +There will be other subclasses, and to access these we need to be able +to `cast` the `exec` into the various other types. The first field in +any `struct exec` is the type from the `exec_types` enum. -Named type are stored in a simple linked list. Objects of each type are -"values" which are often passed around by value. +###### macros + #define cast(structname, pointer) ({ \ + const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \ + if (__mptr && *__mptr != X##structname) abort(); \ + (struct structname *)( (char *)__mptr);}) -###### ast + #define new(structname) ({ \ + struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \ + __ptr->type = X##structname; \ + __ptr->line = -1; __ptr->column = -1; \ + __ptr;}) - struct value { - union { - char ptr[1]; - ## value union fields - }; - }; + #define new_pos(structname, token) ({ \ + struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \ + __ptr->type = X##structname; \ + __ptr->line = token.line; __ptr->column = token.col; \ + __ptr;}) - struct type { - struct text name; - struct type *next; - int size, align; - void (*init)(struct type *type, struct value *val); - void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time); - void (*print)(struct type *type, struct value *val); - void (*print_type)(struct type *type, FILE *f); - int (*cmp_order)(struct type *t1, struct type *t2, - struct value *v1, struct value *v2); - int (*cmp_eq)(struct type *t1, struct type *t2, - struct value *v1, struct value *v2); - void (*dup)(struct type *type, struct value *vold, struct value *vnew); - void (*free)(struct type *type, struct value *val); - void (*free_type)(struct type *t); - long long (*to_int)(struct value *v); - double (*to_float)(struct value *v); - int (*to_mpq)(mpq_t *q, struct value *v); - ## type functions - union { - ## type union fields - }; +###### ast + enum exec_types { + Xbinode, + ## exec type + }; + struct exec { + enum exec_types type; + int line, column; + ## exec fields + }; + struct binode { + struct exec; + enum Btype { + List, + ## Binode types + } op; + struct exec *left, *right; }; - -###### parse context - - struct type *typelist; ###### ast functions - static struct type *find_type(struct parse_context *c, struct text s) + static int __fput_loc(struct exec *loc, FILE *f) { - struct type *l = c->typelist; - - while (l && - text_cmp(l->name, s) != 0) - l = l->next; - return l; + if (!loc) + return 0; + if (loc->line >= 0) { + fprintf(f, "%d:%d: ", loc->line, loc->column); + return 1; + } + if (loc->type == Xbinode) + return __fput_loc(cast(binode,loc)->left, f) || + __fput_loc(cast(binode,loc)->right, f); // NOTEST + return 0; // NOTEST } - - static struct type *add_type(struct parse_context *c, struct text s, - struct type *proto) + static void fput_loc(struct exec *loc, FILE *f) { - struct type *n; - - n = calloc(1, sizeof(*n)); - *n = *proto; - n->name = s; - n->next = c->typelist; - c->typelist = n; - return n; + if (!__fput_loc(loc, f)) + fprintf(f, "??:??: "); // NOTEST } - static void free_type(struct type *t) +Each different type of `exec` node needs a number of functions defined, +a bit like methods. We must be able to free it, print it, analyse it +and execute it. Once we have specific `exec` types we will need to +parse them too. Let's take this a bit more slowly. + +#### Freeing + +The parser generator requires a `free_foo` function for each struct +that stores attributes and they will often be `exec`s and subtypes +there-of. So we need `free_exec` which can handle all the subtypes, +and we need `free_binode`. + +###### ast functions + + static void free_binode(struct binode *b) { - /* The type is always a reference to something in the - * context, so we don't need to free anything. - */ + if (!b) + return; + free_exec(b->left); + free_exec(b->right); + free(b); } - static void free_value(struct type *type, struct value *v) +###### core functions + static void free_exec(struct exec *e) { - if (type && v) { - type->free(type, v); - memset(v, 0x5a, type->size); + if (!e) + return; + switch(e->type) { + ## free exec cases } } - static void type_print(struct type *type, FILE *f) - { - if (!type) - fputs("*unknown*type*", f); // NOTEST - else if (type->name.len) - fprintf(f, "%.*s", type->name.len, type->name.txt); - else if (type->print_type) - type->print_type(type, f); - else - fputs("*invalid*type*", f); // NOTEST - } +###### forward decls - static void val_init(struct type *type, struct value *val) - { - if (type && type->init) - type->init(type, val); - } + static void free_exec(struct exec *e); - static void dup_value(struct type *type, - struct value *vold, struct value *vnew) +###### free exec cases + case Xbinode: free_binode(cast(binode, e)); break; + +#### Printing + +Printing an `exec` requires that we know the current indent level for +printing line-oriented components. As will become clear later, we +also want to know what sort of bracketing to use. It will also be used +to sometime print comments after an exec to explain some of the results +of analysis. + +###### ast functions + + static void do_indent(int i, char *str) { - if (type && type->dup) - type->dup(type, vold, vnew); + while (i-- > 0) + printf(" "); + printf("%s", str); } - static int value_cmp(struct type *tl, struct type *tr, - struct value *left, struct value *right) +###### core functions + static void print_binode(struct binode *b, int indent, int bracket) { - if (tl && tl->cmp_order) - return tl->cmp_order(tl, tr, left, right); - if (tl && tl->cmp_eq) // NOTEST - return tl->cmp_eq(tl, tr, left, right); // NOTEST - return -1; // NOTEST + struct binode *b2; + switch(b->op) { + case List: abort(); // must be handled by parent NOTEST + ## print binode cases + } } - static void print_value(struct type *type, struct value *v) + static void print_exec(struct exec *e, int indent, int bracket) { - if (type && type->print) - type->print(type, v); - else - printf("*Unknown*"); // NOTEST + if (!e) + return; // NOTEST + switch (e->type) { + case Xbinode: + print_binode(cast(binode, e), indent, bracket); break; + ## print exec cases + } + ## print exec extras } ###### forward decls - static void free_value(struct type *type, struct value *v); - static int type_compat(struct type *require, struct type *have, int rules); - static void type_print(struct type *type, FILE *f); - static void val_init(struct type *type, struct value *v); - static void dup_value(struct type *type, - struct value *vold, struct value *vnew); - static int value_cmp(struct type *tl, struct type *tr, - struct value *left, struct value *right); - static void print_value(struct type *type, struct value *v); - -###### free context types - - while (context.typelist) { - struct type *t = context.typelist; + static void print_exec(struct exec *e, int indent, int bracket); - context.typelist = t->next; - if (t->free_type) - t->free_type(t); - free(t); - } +#### Analysing -Type can be specified for local variables, for fields in a structure, -for formal parameters to functions, and possibly elsewhere. Different -rules may apply in different contexts. As a minimum, a named type may -always be used. Currently the type of a formal parameter can be -different from types in other contexts, so we have a separate grammar -symbol for those. +As discussed, analysis involves propagating type requirements around the +program and looking for errors. -###### Grammar +So `propagate_types` is passed an expected type (being a `struct type` +pointer together with some `val_rules` flags) that the `exec` is +expected to return, and returns the type that it does return, either of +which can be `NULL` signifying "unknown". A `prop_err` flag set is +passed by reference. It has `Efail` set when an error is found, and +`Eretry` when the type for some element is set via propagation. If +any expression cannot be evaluated a compile time, `Eruntime` is set. +If the expression can be copied, `Emaycopy` is set. - $*type - Type -> IDENTIFIER ${ - $0 = find_type(c, $1.txt); - if (!$0) { - tok_err(c, - "error: undefined type", &$1); +If `Erval` is set, then the value cannot be assigned to because it is +a temporary result. If `Erval` is clear but `Econst` is set, then +the value can only be assigned once, when the variable is declared. - $0 = Tnone; - } - }$ - ## type grammar +###### ast - FormalType -> Type ${ $0 = $<1; }$ - ## formal type grammar + enum val_rules {Rboolok = 1<<0, Rrefok = 1<<1,}; + enum prop_err {Efail = 1<<0, Eretry = 1<<1, Eruntime = 1<<2, + Emaycopy = 1<<3, Erval = 1<<4, Econst = 1<<5}; -#### Base Types +###### forward decls + static struct type *propagate_types(struct exec *prog, struct parse_context *c, enum prop_err *perr, + struct type *type, enum val_rules rules); +###### core functions -Values of the base types can be numbers, which we represent as -multi-precision fractions, strings, Booleans and labels. When -analysing the program we also need to allow for places where no value -is meaningful (type `Tnone`) and where we don't know what type to -expect yet (type is `NULL`). + static struct type *__propagate_types(struct exec *prog, struct parse_context *c, enum prop_err *perr, + enum prop_err *perr_local, + struct type *type, enum val_rules rules) + { + struct type *t; -Values are never shared, they are always copied when used, and freed -when no longer needed. + if (!prog) + return Tnone; -When propagating type information around the program, we need to -determine if two types are compatible, where type `NULL` is compatible -with anything. There are two special cases with type compatibility, -both related to the Conditional Statement which will be described -later. In some cases a Boolean can be accepted as well as some other -primary type, and in others any type is acceptable except a label (`Vlabel`). -A separate function encoding these cases will simplify some code later. - -###### type functions - - int (*compat)(struct type *this, struct type *other); - -###### ast functions + switch (prog->type) { + case Xbinode: + { + struct binode *b = cast(binode, prog); + switch (b->op) { + case List: abort(); // NOTEST + ## propagate binode cases + } + break; + } + ## propagate exec cases + } + return Tnone; + } - static int type_compat(struct type *require, struct type *have, int rules) + static struct type *propagate_types(struct exec *prog, struct parse_context *c, enum prop_err *perr, + struct type *type, enum val_rules rules) { - if ((rules & Rboolok) && have == Tbool) - return 1; // NOTEST - if ((rules & Rnolabel) && have == Tlabel) - return 0; // NOTEST - if (!require || !have) - return 1; - - if (require->compat) - return require->compat(require, have); + int pre_err = c->parse_error; + enum prop_err perr_local = 0; + struct type *ret = __propagate_types(prog, c, perr, &perr_local, type, rules); - return require == have; + *perr |= perr_local & (Efail | Eretry); + if (c->parse_error > pre_err) + *perr |= Efail; + return ret; } -###### includes - #include - #include "parse_string.h" - #include "parse_number.h" +#### Interpreting -###### libs - myLDLIBS := libnumber.o libstring.o -lgmp - LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS) +Interpreting an `exec` doesn't require anything but the `exec`. State +is stored in variables and each variable will be directly linked from +within the `exec` tree. The exception to this is the `main` function +which needs to look at command line arguments. This function will be +interpreted separately. -###### type union fields - enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype; +Each `exec` can return a value combined with a type in `struct lrval`. +The type may be `Tnone` but must be non-NULL. Some `exec`s will return +the location of a value, which can be updated, in `lval`. Others will +set `lval` to NULL indicating that there is a value of appropriate type +in `rval`. -###### value union fields - struct text str; - mpq_t num; - unsigned char bool; - void *label; +###### forward decls + static struct value interp_exec(struct parse_context *c, struct exec *e, + struct type **typeret); +###### core functions -###### ast functions - static void _free_value(struct type *type, struct value *v) - { - if (!v) - return; // NOTEST - switch (type->vtype) { - case Vnone: break; - case Vstr: free(v->str.txt); break; - case Vnum: mpq_clear(v->num); break; - case Vlabel: - case Vbool: break; - } - } + struct lrval { + struct type *type; + struct value rval, *lval; + }; -###### value functions + /* If dest is passed, dtype must give the expected type, and + * result can go there, in which case type is returned as NULL. + */ + static struct lrval _interp_exec(struct parse_context *c, struct exec *e, + struct value *dest, struct type *dtype); - static void _val_init(struct type *type, struct value *val) + static struct value interp_exec(struct parse_context *c, struct exec *e, + struct type **typeret) { - switch(type->vtype) { - case Vnone: // NOTEST - break; // NOTEST - case Vnum: - mpq_init(val->num); break; - case Vstr: - val->str.txt = malloc(1); - val->str.len = 0; - break; - case Vbool: - val->bool = 0; - break; - case Vlabel: - val->label = NULL; - break; - } + struct lrval ret = _interp_exec(c, e, NULL, NULL); + + if (!ret.type) abort(); + if (typeret) + *typeret = ret.type; + if (ret.lval) + dup_value(ret.type, ret.lval, &ret.rval); + return ret.rval; } - static void _dup_value(struct type *type, - struct value *vold, struct value *vnew) + static struct value *linterp_exec(struct parse_context *c, struct exec *e, + struct type **typeret) { - switch (type->vtype) { - case Vnone: // NOTEST - break; // NOTEST - case Vlabel: - vnew->label = vold->label; - break; - case Vbool: - vnew->bool = vold->bool; - break; - case Vnum: - mpq_init(vnew->num); - mpq_set(vnew->num, vold->num); - break; - case Vstr: - vnew->str.len = vold->str.len; - vnew->str.txt = malloc(vnew->str.len); - memcpy(vnew->str.txt, vold->str.txt, vnew->str.len); - break; - } + struct lrval ret = _interp_exec(c, e, NULL, NULL); + + if (!ret.type) abort(); + if (ret.lval) + *typeret = ret.type; + else + free_value(ret.type, &ret.rval); + return ret.lval; } - static int _value_cmp(struct type *tl, struct type *tr, - struct value *left, struct value *right) + /* dinterp_exec is used when the destination type is certain and + * the value has a place to go. + */ + static void dinterp_exec(struct parse_context *c, struct exec *e, + struct value *dest, struct type *dtype, + int need_free) { - int cmp; - if (tl != tr) - return tl - tr; // NOTEST - switch (tl->vtype) { - case Vlabel: cmp = left->label == right->label ? 0 : 1; break; - case Vnum: cmp = mpq_cmp(left->num, right->num); break; - case Vstr: cmp = text_cmp(left->str, right->str); break; - case Vbool: cmp = left->bool - right->bool; break; - case Vnone: cmp = 0; // NOTEST - } - return cmp; + struct lrval ret = _interp_exec(c, e, dest, dtype); + if (!ret.type) + return; + if (need_free) + free_value(dtype, dest); + if (ret.lval) + dup_value(dtype, ret.lval, dest); + else + memcpy(dest, &ret.rval, dtype->size); } - static void _print_value(struct type *type, struct value *v) + static struct lrval _interp_exec(struct parse_context *c, struct exec *e, + struct value *dest, struct type *dtype) { - switch (type->vtype) { - case Vnone: // NOTEST - printf("*no-value*"); break; // NOTEST - case Vlabel: // NOTEST - printf("*label-%p*", v->label); break; // NOTEST - case Vstr: - printf("%.*s", v->str.len, v->str.txt); break; - case Vbool: - printf("%s", v->bool ? "True":"False"); break; - case Vnum: - { - mpf_t fl; - mpf_init2(fl, 20); - mpf_set_q(fl, v->num); - gmp_printf("%Fg", fl); - mpf_clear(fl); - break; + /* If the result is copied to dest, ret.type is set to NULL */ + struct lrval ret; + struct value rv = {}, *lrv = NULL; + struct type *rvtype; + + rvtype = ret.type = Tnone; + if (!e) { + ret.lval = lrv; + ret.rval = rv; + return ret; + } + + switch(e->type) { + case Xbinode: + { + struct binode *b = cast(binode, e); + struct value left, right, *lleft; + struct type *ltype, *rtype; + ltype = rtype = Tnone; + switch (b->op) { + case List: abort(); // NOTEST + ## interp binode cases } + free_value(ltype, &left); + free_value(rtype, &right); + break; + } + ## interp exec cases } + if (rvtype) { + ret.lval = lrv; + ret.rval = rv; + ret.type = rvtype; + } + ## interp exec cleanup + return ret; } - static void _free_value(struct type *type, struct value *v); - - static struct type base_prototype = { - .init = _val_init, - .print = _print_value, - .cmp_order = _value_cmp, - .cmp_eq = _value_cmp, - .dup = _dup_value, - .free = _free_value, - }; - - static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel; - -###### ast functions - static struct type *add_base_type(struct parse_context *c, char *n, - enum vtype vt, int size) - { - struct text txt = { n, strlen(n) }; - struct type *t; - - t = add_type(c, txt, &base_prototype); - t->vtype = vt; - t->size = size; - t->align = size > sizeof(void*) ? sizeof(void*) : size; - if (t->size & (t->align - 1)) - t->size = (t->size | (t->align - 1)) + 1; // NOTEST - return t; - } +### Types -###### context initialization +Values come in a wide range of types, with more likely to be added. +Each type needs to be able to print its own values (for convenience at +least) as well as to compare two values, at least for equality and +possibly for order. For now, values might need to be duplicated and +freed, though eventually such manipulations will be better integrated +into the language. - Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char)); - Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text)); - Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t)); - Tnone = add_base_type(&context, "none", Vnone, 0); - Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*)); +Rather than requiring every numeric type to support all numeric +operations (add, multiply, etc), we allow types to be able to present +as one of a few standard types: integer, float, and fraction. The +existence of these conversion functions eventually enable types to +determine if they are compatible with other types, though such types +have not yet been implemented. -### Variables +Named type are stored in a simple linked list. Objects of each type are +"values" which are often passed around by value. -Variables are scoped named values. We store the names in a linked list -of "bindings" sorted in lexical order, and use sequential search and -insertion sort. +There are both explicitly named types, and anonymous types. Anonymous +cannot be accessed by name, but are used internally and have a name +which might be reported in error messages. ###### ast - struct binding { - struct text name; - struct binding *next; // in lexical order - ## binding fields + struct value { + union { + char ptr[1]; + ## value union fields + }; }; -This linked list is stored in the parse context so that "reduce" -functions can find or add variables, and so the analysis phase can -ensure that every variable gets a type. - -###### parse context - - struct binding *varlist; // In lexical order - -###### ast functions - - static struct binding *find_binding(struct parse_context *c, struct text s) +###### ast late + struct type { + struct text name; + struct type *next; + struct token first_use; + int size, align; + int anon; + void (*init)(struct type *type, struct value *val); + int (*prepare_type)(struct parse_context *c, struct type *type, int parse_time); + void (*print)(struct type *type, struct value *val, FILE *f); + void (*print_type)(struct type *type, FILE *f); + int (*cmp_order)(struct type *t1, struct type *t2, + struct value *v1, struct value *v2); + int (*cmp_eq)(struct type *t1, struct type *t2, + struct value *v1, struct value *v2); + void (*dup)(struct type *type, struct value *vold, struct value *vnew); + int (*test)(struct type *type, struct value *val); + void (*free)(struct type *type, struct value *val); + void (*free_type)(struct type *t); + long long (*to_int)(struct value *v); + double (*to_float)(struct value *v); + int (*to_mpq)(mpq_t *q, struct value *v); + ## type functions + union { + ## type union fields + }; + }; + +###### parse context + + struct type *typelist; + +###### includes + #include + +###### ast functions + + static struct type *find_type(struct parse_context *c, struct text s) { - struct binding **l = &c->varlist; - struct binding *n; - int cmp = 1; + struct type *t = c->typelist; + + while (t && (t->anon || + text_cmp(t->name, s) != 0)) + t = t->next; + return t; + } + + static struct type *_add_type(struct parse_context *c, struct text s, + struct type *proto, int anon) + { + struct type *n; - while (*l && - (cmp = text_cmp((*l)->name, s)) < 0) - l = & (*l)->next; - if (cmp == 0) - return *l; n = calloc(1, sizeof(*n)); + if (proto) + *n = *proto; + else + n->size = -1; n->name = s; - n->next = *l; - *l = n; + n->anon = anon; + n->next = c->typelist; + c->typelist = n; return n; } -Each name can be linked to multiple variables defined in different -scopes. Each scope starts where the name is declared and continues -until the end of the containing code block. Scopes of a given name -cannot nest, so a declaration while a name is in-scope is an error. - -###### binding fields - struct variable *var; - -###### ast - struct variable { - struct variable *previous; - struct type *type; - struct binding *name; - struct exec *where_decl;// where name was declared - struct exec *where_set; // where type was set - ## variable fields - }; - -When a scope closes, the values of the variables might need to be freed. -This happens in the context of some `struct exec` and each `exec` will -need to know which variables need to be freed when it completes. - -####### exec fields - struct variable *to_free; + static struct type *add_type(struct parse_context *c, struct text s, + struct type *proto) + { + return _add_type(c, s, proto, 0); + } -####### variable fields - struct exec *cleanup_exec; - struct variable *next_free; + static struct type *add_anon_type(struct parse_context *c, + struct type *proto, char *name, ...) + { + struct text t; + va_list ap; + + va_start(ap, name); + vasprintf(&t.txt, name, ap); + va_end(ap); + t.len = strlen(t.txt); + return _add_type(c, t, proto, 1); + } -####### interp exec cleanup + static struct type *find_anon_type(struct parse_context *c, + struct type *proto, char *name, ...) { - struct variable *v; - for (v = e->to_free; v; v = v->next_free) { - struct value *val = var_value(c, v); - free_value(v->type, val); + struct type *t = c->typelist; + struct text nm; + va_list ap; + + va_start(ap, name); + vasprintf(&nm.txt, name, ap); + va_end(ap); + nm.len = strlen(name); + + while (t && (!t->anon || + text_cmp(t->name, nm) != 0)) + t = t->next; + if (t) { + free(nm.txt); + return t; } + return _add_type(c, nm, proto, 1); } -###### ast functions - static void variable_unlink_exec(struct variable *v) + static void free_type(struct type *t) { - struct variable **vp; - if (!v->cleanup_exec) - return; - for (vp = &v->cleanup_exec->to_free; - *vp; vp = &(*vp)->next_free) { - if (*vp != v) - continue; - *vp = v->next_free; - v->cleanup_exec = NULL; - break; - } + /* The type is always a reference to something in the + * context, so we don't need to free anything. + */ } -While the naming seems strange, we include local constants in the -definition of variables. A name declared `var := value` can -subsequently be changed, but a name declared `var ::= value` cannot - -it is constant + static void free_value(struct type *type, struct value *v) + { + if (type && v) { + type->free(type, v); + memset(v, 0x5a, type->size); + } + } -###### variable fields - int constant; + static void type_print(struct type *type, FILE *f) + { + if (!type) + fputs("*unknown*type*", f); // NOTEST + else if (type->name.len && !type->anon) + fprintf(f, "%.*s", type->name.len, type->name.txt); + else if (type->print_type) + type->print_type(type, f); + else if (type->name.len && type->anon) + fprintf(f, "\"%.*s\"", type->name.len, type->name.txt); + else + fputs("*invalid*type*", f); // NOTEST + } -Scopes in parallel branches can be partially merged. More -specifically, if a given name is declared in both branches of an -if/else then its scope is a candidate for merging. Similarly if -every branch of an exhaustive switch (e.g. has an "else" clause) -declares a given name, then the scopes from the branches are -candidates for merging. + static void val_init(struct type *type, struct value *val) + { + if (type && type->init) + type->init(type, val); + } -Note that names declared inside a loop (which is only parallel to -itself) are never visible after the loop. Similarly names defined in -scopes which are not parallel, such as those started by `for` and -`switch`, are never visible after the scope. Only variables defined in -both `then` and `else` (including the implicit then after an `if`, and -excluding `then` used with `for`) and in all `case`s and `else` of a -`switch` or `while` can be visible beyond the `if`/`switch`/`while`. + static void dup_value(struct type *type, + struct value *vold, struct value *vnew) + { + if (type && type->dup) + type->dup(type, vold, vnew); + } -Labels, which are a bit like variables, follow different rules. -Labels are not explicitly declared, but if an undeclared name appears -in a context where a label is legal, that effectively declares the -name as a label. The declaration remains in force (or in scope) at -least to the end of the immediately containing block and conditionally -in any larger containing block which does not declare the name in some -other way. Importantly, the conditional scope extension happens even -if the label is only used in one parallel branch of a conditional -- -when used in one branch it is treated as having been declared in all -branches. + static int value_cmp(struct type *tl, struct type *tr, + struct value *left, struct value *right) + { + if (tl && tl->cmp_order) + return tl->cmp_order(tl, tr, left, right); + if (tl && tl->cmp_eq) + return tl->cmp_eq(tl, tr, left, right); + return -1; // NOTEST + } -Merge candidates are tentatively visible beyond the end of the -branching statement which creates them. If the name is used, the -merge is affirmed and they become a single variable visible at the -outer layer. If not - if it is redeclared first - the merge lapses. + static void print_value(struct type *type, struct value *v, FILE *f) + { + if (type && type->print) + type->print(type, v, f); + else + fprintf(f, "*Unknown*"); // NOTEST + } -To track scopes we have an extra stack, implemented as a linked list, -which roughly parallels the parse stack and which is used exclusively -for scoping. When a new scope is opened, a new frame is pushed and -the child-count of the parent frame is incremented. This child-count -is used to distinguish between the first of a set of parallel scopes, -in which declared variables must not be in scope, and subsequent -branches, whether they may already be conditionally scoped. + static void prepare_types(struct parse_context *c) + { + struct type *t; + int retry = 1; + enum { none, some, cannot } progress = none; + + while (retry) { + retry = 0; + + for (t = c->typelist; t; t = t->next) { + if (t->size < 0) + tok_err(c, "error: type used but not declared", + &t->first_use); + if (t->size == 0 && t->prepare_type) { + if (t->prepare_type(c, t, 1)) + progress = some; + else if (progress == cannot) + tok_err(c, "error: type has recursive definition", + &t->first_use); + else + retry = 1; + } + } + switch (progress) { + case cannot: + retry = 0; break; + case none: + progress = cannot; break; + case some: + progress = none; break; + } + } + } -To push a new frame *before* any code in the frame is parsed, we need a -grammar reduction. This is most easily achieved with a grammar -element which derives the empty string, and creates the new scope when -it is recognised. This can be placed, for example, between a keyword -like "if" and the code following it. +###### forward decls -###### ast - struct scope { - struct scope *parent; - int child_count; - }; + static void free_value(struct type *type, struct value *v); + static int type_compat(struct type *require, struct type *have, enum val_rules rules); + static void type_print(struct type *type, FILE *f); + static void val_init(struct type *type, struct value *v); + static void dup_value(struct type *type, + struct value *vold, struct value *vnew); + static int value_cmp(struct type *tl, struct type *tr, + struct value *left, struct value *right); + static void print_value(struct type *type, struct value *v, FILE *f); -###### parse context - int scope_depth; - struct scope *scope_stack; +###### free context types -###### ast functions - static void scope_pop(struct parse_context *c) - { - struct scope *s = c->scope_stack; + while (context.typelist) { + struct type *t = context.typelist; - c->scope_stack = s->parent; - free(s); - c->scope_depth -= 1; + context.typelist = t->next; + if (t->free_type) + t->free_type(t); + if (t->anon) + free(t->name.txt); + free(t); } - static void scope_push(struct parse_context *c) - { - struct scope *s = calloc(1, sizeof(*s)); - if (c->scope_stack) - c->scope_stack->child_count += 1; - s->parent = c->scope_stack; - c->scope_stack = s; - c->scope_depth += 1; - } +Type can be specified for local variables, for fields in a structure, +for formal parameters to functions, and possibly elsewhere. Different +rules may apply in different contexts. As a minimum, a named type may +always be used. Currently the type of a formal parameter can be +different from types in other contexts, so we have a separate grammar +symbol for those. ###### Grammar - $void - OpenScope -> ${ scope_push(c); }$ + $*type + Type -> IDENTIFIER ${ + $0 = find_type(c, $ID.txt); + if (!$0) { + $0 = add_type(c, $ID.txt, NULL); + $0->first_use = $ID; + } + }$ + ## type grammar -Each variable records a scope depth and is in one of four states: + FormalType -> Type ${ $0 = $<1; }$ + ## formal type grammar -- "in scope". This is the case between the declaration of the - variable and the end of the containing block, and also between - the usage with affirms a merge and the end of that block. +#### Base Types - The scope depth is not greater than the current parse context scope - nest depth. When the block of that depth closes, the state will - change. To achieve this, all "in scope" variables are linked - together as a stack in nesting order. +Values of the base types can be numbers, which we represent as +multi-precision fractions, strings, Booleans and labels. When +analysing the program we also need to allow for places where no value +is meaningful (type `Tnone`) and where we don't know what type to +expect yet (type is `NULL`). -- "pending". The "in scope" block has closed, but other parallel - scopes are still being processed. So far, every parallel block at - the same level that has closed has declared the name. +Values are never shared, they are always copied when used, and freed +when no longer needed. - The scope depth is the depth of the last parallel block that - enclosed the declaration, and that has closed. +When propagating type information around the program, we need to +determine if two types are compatible, where type `NULL` is compatible +with anything. There are two special cases with type compatibility, +both related to the Conditional Statement which will be described +later. In some cases a Boolean can be accepted as well as some other +primary type, and in others any type is acceptable except a label (`Vlabel`). +A separate function encoding these cases will simplify some code later. -- "conditionally in scope". The "in scope" block and all parallel - scopes have closed, and no further mention of the name has been seen. - This state includes a secondary nest depth (`min_depth`) which records - the outermost scope seen since the variable became conditionally in - scope. If a use of the name is found, the variable becomes "in scope" - and that secondary depth becomes the recorded scope depth. If the - name is declared as a new variable, the old variable becomes "out of - scope" and the recorded scope depth stays unchanged. +###### type functions -- "out of scope". The variable is neither in scope nor conditionally - in scope. It is permanently out of scope now and can be removed from - the "in scope" stack. + int (*compat)(struct type *this, struct type *other, enum val_rules rules); -###### variable fields - int depth, min_depth; - enum { OutScope, PendingScope, CondScope, InScope } scope; - struct variable *in_scope; +###### ast functions -###### parse context + static int type_compat(struct type *require, struct type *have, + enum val_rules rules) + { + if ((rules & Rboolok) && have == Tbool) + return 1; // NOTEST + if (!require || !have) + return 1; - struct variable *in_scope; + if (require->compat) + return require->compat(require, have, rules); -All variables with the same name are linked together using the -'previous' link. Those variable that have been affirmatively merged all -have a 'merged' pointer that points to one primary variable - the most -recently declared instance. When merging variables, we need to also -adjust the 'merged' pointer on any other variables that had previously -been merged with the one that will no longer be primary. + return require == have; + } -A variable that is no longer the most recent instance of a name may -still have "pending" scope, if it might still be merged with most -recent instance. These variables don't really belong in the -"in_scope" list, but are not immediately removed when a new instance -is found. Instead, they are detected and ignored when considering the -list of in_scope names. +###### includes + #include + #include "parse_string.h" + #include "parse_number.h" -The storage of the value of a variable will be described later. For now -we just need to know that when a variable goes out of scope, it might -need to be freed. For this we need to be able to find it, so assume that -`var_value()` will provide that. +###### libs + myLDLIBS := libnumber.o libstring.o -lgmp + LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS) -###### variable fields - struct variable *merged; +###### type union fields + enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype; -###### ast functions +###### value union fields + struct text str; + mpq_t num; + unsigned char bool; + int label; - static void variable_merge(struct variable *primary, struct variable *secondary) +###### ast functions + static void _free_value(struct type *type, struct value *v) { - struct variable *v; + if (!v) + return; // NOTEST + switch (type->vtype) { + case Vnone: break; + case Vstr: free(v->str.txt); break; + case Vnum: mpq_clear(v->num); break; + case Vlabel: + case Vbool: break; + } + } - primary = primary->merged; +###### value functions - for (v = primary->previous; v; v=v->previous) - if (v == secondary || v == secondary->merged || - v->merged == secondary || - v->merged == secondary->merged) { - v->scope = OutScope; - v->merged = primary; - variable_unlink_exec(v); - } + static void _val_init(struct type *type, struct value *val) + { + switch(type->vtype) { + case Vnone: // NOTEST + break; // NOTEST + case Vnum: + mpq_init(val->num); break; + case Vstr: + val->str.txt = malloc(1); + val->str.len = 0; + break; + case Vbool: + val->bool = 0; + break; + case Vlabel: + val->label = 0; // NOTEST + break; // NOTEST + } } -###### forward decls - static struct value *var_value(struct parse_context *c, struct variable *v); - -###### free global vars + static void _dup_value(struct type *type, + struct value *vold, struct value *vnew) + { + switch (type->vtype) { + case Vnone: // NOTEST + break; // NOTEST + case Vlabel: + vnew->label = vold->label; // NOTEST + break; // NOTEST + case Vbool: + vnew->bool = vold->bool; + break; + case Vnum: + mpq_init(vnew->num); + mpq_set(vnew->num, vold->num); + break; + case Vstr: + vnew->str.len = vold->str.len; + vnew->str.txt = malloc(vnew->str.len); + memcpy(vnew->str.txt, vold->str.txt, vnew->str.len); + break; + } + } - while (context.varlist) { - struct binding *b = context.varlist; - struct variable *v = b->var; - context.varlist = b->next; - free(b); - while (v) { - struct variable *next = v->previous; + static int _value_cmp(struct type *tl, struct type *tr, + struct value *left, struct value *right) + { + int cmp; + if (tl != tr) + return tl - tr; + switch (tl->vtype) { + case Vlabel: cmp = left->label == right->label ? 0 : 1; break; + case Vnum: cmp = mpq_cmp(left->num, right->num); break; + case Vstr: cmp = text_cmp(left->str, right->str); break; + case Vbool: cmp = left->bool - right->bool; break; + case Vnone: cmp = 0; // NOTEST + } + return cmp; + } - if (v->global) { - free_value(v->type, var_value(&context, v)); - if (v->depth == 0) - // This is a global constant - free_exec(v->where_decl); + static void _print_value(struct type *type, struct value *v, FILE *f) + { + switch (type->vtype) { + case Vnone: // NOTEST + fprintf(f, "*no-value*"); break; // NOTEST + case Vlabel: // NOTEST + fprintf(f, "*label-%d*", v->label); break; // NOTEST + case Vstr: + fprintf(f, "%.*s", v->str.len, v->str.txt); break; + case Vbool: + fprintf(f, "%s", v->bool ? "True":"False"); break; + case Vnum: + { + mpf_t fl; + mpf_init2(fl, 20); + mpf_set_q(fl, v->num); + gmp_fprintf(f, "%.10Fg", fl); + mpf_clear(fl); + break; } - free(v); - v = next; } } -#### Manipulating Bindings + static void _free_value(struct type *type, struct value *v); -When a name is conditionally visible, a new declaration discards the -old binding - the condition lapses. Conversely a usage of the name -affirms the visibility and extends it to the end of the containing -block - i.e. the block that contains both the original declaration and -the latest usage. This is determined from `min_depth`. When a -conditionally visible variable gets affirmed like this, it is also -merged with other conditionally visible variables with the same name. + static int bool_test(struct type *type, struct value *v) + { + return v->bool; + } -When we parse a variable declaration we either report an error if the -name is currently bound, or create a new variable at the current nest -depth if the name is unbound or bound to a conditionally scoped or -pending-scope variable. If the previous variable was conditionally -scoped, it and its homonyms becomes out-of-scope. + static struct type base_prototype = { + .init = _val_init, + .print = _print_value, + .cmp_order = _value_cmp, + .cmp_eq = _value_cmp, + .dup = _dup_value, + .free = _free_value, + }; -When we parse a variable reference (including non-declarative assignment -"foo = bar") we report an error if the name is not bound or is bound to -a pending-scope variable; update the scope if the name is bound to a -conditionally scoped variable; or just proceed normally if the named -variable is in scope. + static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel; -When we exit a scope, any variables bound at this level are either -marked out of scope or pending-scoped, depending on whether the scope -was sequential or parallel. Here a "parallel" scope means the "then" -or "else" part of a conditional, or any "case" or "else" branch of a -switch. Other scopes are "sequential". +###### ast functions + static struct type *add_base_type(struct parse_context *c, char *n, + enum vtype vt, int size) + { + struct text txt = { n, strlen(n) }; + struct type *t; -When exiting a parallel scope we check if there are any variables that -were previously pending and are still visible. If there are, then -they weren't redeclared in the most recent scope, so they cannot be -merged and must become out-of-scope. If it is not the first of -parallel scopes (based on `child_count`), we check that there was a -previous binding that is still pending-scope. If there isn't, the new -variable must now be out-of-scope. + t = add_type(c, txt, &base_prototype); + t->vtype = vt; + t->size = size; + t->align = size > sizeof(void*) ? sizeof(void*) : size; + if (t->size & (t->align - 1)) + t->size = (t->size | (t->align - 1)) + 1; // NOTEST + return t; + } -When exiting a sequential scope that immediately enclosed parallel -scopes, we need to resolve any pending-scope variables. If there was -no `else` clause, and we cannot determine that the `switch` was exhaustive, -we need to mark all pending-scope variable as out-of-scope. Otherwise -all pending-scope variables become conditionally scoped. +###### context initialization -###### ast - enum closetype { CloseSequential, CloseParallel, CloseElse }; + Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char)); + Tbool->test = bool_test; + Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text)); + Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t)); + Tnone = add_base_type(&context, "none", Vnone, 0); + Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*)); -###### ast functions +##### Base Values - static struct variable *var_decl(struct parse_context *c, struct text s) - { - struct binding *b = find_binding(c, s); - struct variable *v = b->var; +We have already met values as separate objects. When manifest constants +appear in the program text, that must result in an executable which has +a constant value. So the `val` structure embeds a value in an +executable. - switch (v ? v->scope : OutScope) { - case InScope: - /* Caller will report the error */ - return NULL; - case CondScope: +###### exec type + Xval, + +###### ast + struct val { + struct exec; + struct type *vtype; + struct value val; + }; + +###### ast functions + struct val *new_val(struct type *T, struct token tk) + { + struct val *v = new_pos(val, tk); + v->vtype = T; + return v; + } + +###### declare terminals + $TERM True False + +###### Grammar + + $*val + Value -> True ${ + $0 = new_val(Tbool, $1); + $0->val.bool = 1; + }$ + | False ${ + $0 = new_val(Tbool, $1); + $0->val.bool = 0; + }$ + | NUMBER ${ { + char tail[3] = ""; + $0 = new_val(Tnum, $1); + if (number_parse($0->val.num, tail, $1.txt) == 0) { + mpq_init($0->val.num); + tok_err(c, "error: unsupported number format", &$NUM); + } else if (tail[0]) + tok_err(c, "error: unsupported number suffix", &$1); + } }$ + | STRING ${ { + char tail[3]; + $0 = new_val(Tstr, $1); + string_parse(&$1, '\\', &$0->val.str, tail); + if (tail[0]) + tok_err(c, "error: unsupported string suffix", + &$1); + } }$ + | MULTI_STRING ${ { + char tail[3]; + $0 = new_val(Tstr, $1); + string_parse(&$1, '\\', &$0->val.str, tail); + if (tail[0]) + tok_err(c, "error: unsupported string suffix", + &$1); + } }$ + +###### print exec cases + case Xval: + { + struct val *v = cast(val, e); + if (v->vtype == Tstr) + printf("\""); + // FIXME how to ensure numbers have same precision. + print_value(v->vtype, &v->val, stdout); + if (v->vtype == Tstr) + printf("\""); + break; + } + +###### propagate exec cases + case Xval: + { + struct val *val = cast(val, prog); + if (!type_compat(type, val->vtype, rules)) + type_err(c, "error: expected %1 found %2", + prog, type, rules, val->vtype); + *perr |= Erval; + return val->vtype; + } + +###### interp exec cases + case Xval: + rvtype = cast(val, e)->vtype; + dup_value(rvtype, &cast(val, e)->val, &rv); + break; + +###### ast functions + static void free_val(struct val *v) + { + if (v) + free_value(v->vtype, &v->val); + free(v); + } + +###### free exec cases + case Xval: free_val(cast(val, e)); break; + +###### ast functions + // Move all nodes from 'b' to 'rv', reversing their order. + // In 'b' 'left' is a list, and 'right' is the last node. + // In 'rv', left' is the first node and 'right' is a list. + static struct binode *reorder_bilist(struct binode *b) + { + struct binode *rv = NULL; + + while (b) { + struct exec *t = b->right; + b->right = rv; + rv = b; + if (b->left) + b = cast(binode, b->left); + else + b = NULL; + rv->left = t; + } + return rv; + } + +#### Labels + +Labels are a temporary concept until I implement enums. There are an +anonymous enum which is declared by usage. Thet are only allowed in +`use` statements and corresponding `case` entries. They appear as a +period followed by an identifier. All identifiers that are "used" must +have a "case". + +For now, we have a global list of labels, and don't check that all "use" +match "case". + +###### exec type + Xlabel, + +###### ast + struct label { + struct exec; + struct text name; + int value; + }; +###### free exec cases + case Xlabel: + free(e); + break; +###### print exec cases + case Xlabel: { + struct label *l = cast(label, e); + printf(".%.*s", l->name.len, l->name.txt); + break; + } + +###### ast + struct labels { + struct labels *next; + struct text name; + int value; + }; +###### parse context + struct labels *labels; + int next_label; +###### ast functions + static int label_lookup(struct parse_context *c, struct text name) + { + struct labels *l, **lp = &c->labels; + while (*lp && text_cmp((*lp)->name, name) < 0) + lp = &(*lp)->next; + if (*lp && text_cmp((*lp)->name, name) == 0) + return (*lp)->value; + l = calloc(1, sizeof(*l)); + l->next = *lp; + l->name = name; + if (c->next_label == 0) + c->next_label = 2; + l->value = c->next_label; + c->next_label += 1; + *lp = l; + return l->value; + } + +###### free context storage + while (context.labels) { + struct labels *l = context.labels; + context.labels = l->next; + free(l); + } + +###### declare terminals + $TERM . +###### term grammar + | . IDENTIFIER ${ { + struct label *l = new_pos(label, $ID); + l->name = $ID.txt; + $0 = l; + } }$ +###### propagate exec cases + case Xlabel: { + struct label *l = cast(label, prog); + l->value = label_lookup(c, l->name); + if (!type_compat(type, Tlabel, rules)) + type_err(c, "error: expected %1 found %2", + prog, type, rules, Tlabel); + *perr |= Erval; + return Tlabel; + } +###### interp exec cases + case Xlabel : { + struct label *l = cast(label, e); + rv.label = l->value; + rvtype = Tlabel; + break; + } + + +### Variables + +Variables are scoped named values. We store the names in a linked list +of "bindings" sorted in lexical order, and use sequential search and +insertion sort. + +###### ast + + struct binding { + struct text name; + struct binding *next; // in lexical order + ## binding fields + }; + +This linked list is stored in the parse context so that "reduce" +functions can find or add variables, and so the analysis phase can +ensure that every variable gets a type. + +###### parse context + + struct binding *varlist; // In lexical order + +###### ast functions + + static struct binding *find_binding(struct parse_context *c, struct text s) + { + struct binding **l = &c->varlist; + struct binding *n; + int cmp = 1; + + while (*l && + (cmp = text_cmp((*l)->name, s)) < 0) + l = & (*l)->next; + if (cmp == 0) + return *l; + n = calloc(1, sizeof(*n)); + n->name = s; + n->next = *l; + *l = n; + return n; + } + +Each name can be linked to multiple variables defined in different +scopes. Each scope starts where the name is declared and continues +until the end of the containing code block. Scopes of a given name +cannot nest, so a declaration while a name is in-scope is an error. + +###### binding fields + struct variable *var; + +###### ast + struct variable { + struct variable *previous; + struct type *type; + struct binding *name; + struct exec *where_decl;// where name was declared + struct exec *where_set; // where type was set + ## variable fields + }; + +When a scope closes, the values of the variables might need to be freed. +This happens in the context of some `struct exec` and each `exec` will +need to know which variables need to be freed when it completes. To +improve visibility, we add a comment when printing any `exec` that +embodies a scope to list the variables that must be freed when it ends. + +####### exec fields + struct variable *to_free; + +####### variable fields + struct exec *cleanup_exec; + struct variable *next_free; + +####### interp exec cleanup + { + struct variable *v; + for (v = e->to_free; v; v = v->next_free) { + struct value *val = var_value(c, v); + free_value(v->type, val); + } + } + +###### print exec extras + if (e->to_free) { + struct variable *v; + do_indent(indent, "/* FREE"); + for (v = e->to_free; v; v = v->next_free) { + printf(" %.*s", v->name->name.len, v->name->name.txt); + printf("[%d,%d]", v->scope_start, v->scope_end); + if (v->frame_pos >= 0) + printf("(%d+%d)", v->frame_pos, + v->type ? v->type->size:0); + } + printf(" */\n"); + } + +###### ast functions + static void variable_unlink_exec(struct variable *v) + { + struct variable **vp; + if (!v->cleanup_exec) + return; + for (vp = &v->cleanup_exec->to_free; + *vp; vp = &(*vp)->next_free) { + if (*vp != v) + continue; + *vp = v->next_free; + v->cleanup_exec = NULL; + break; + } + } + +While the naming seems strange, we include local constants in the +definition of variables. A name declared `var := value` can +subsequently be changed, but a name declared `var ::= value` cannot - +it is constant + +###### variable fields + int constant; + +Scopes in parallel branches can be partially merged. More +specifically, if a given name is declared in both branches of an +if/else then its scope is a candidate for merging. Similarly if +every branch of an exhaustive switch (e.g. has an "else" clause) +declares a given name, then the scopes from the branches are +candidates for merging. + +Note that names declared inside a loop (which is only parallel to +itself) are never visible after the loop. Similarly names defined in +scopes which are not parallel, such as those started by `for` and +`switch`, are never visible after the scope. Only variables defined in +both `then` and `else` (including the implicit then after an `if`, and +excluding `then` used with `for`) and in all `case`s and `else` of a +`switch` or `while` can be visible beyond the `if`/`switch`/`while`. + +Labels, which are a bit like variables, follow different rules. +Labels are not explicitly declared, but if an undeclared name appears +in a context where a label is legal, that effectively declares the +name as a label. The declaration remains in force (or in scope) at +least to the end of the immediately containing block and conditionally +in any larger containing block which does not declare the name in some +other way. Importantly, the conditional scope extension happens even +if the label is only used in one parallel branch of a conditional -- +when used in one branch it is treated as having been declared in all +branches. + +Merge candidates are tentatively visible beyond the end of the +branching statement which creates them. If the name is used, the +merge is affirmed and they become a single variable visible at the +outer layer. If not - if it is redeclared first - the merge lapses. + +To track scopes we have an extra stack, implemented as a linked list, +which roughly parallels the parse stack and which is used exclusively +for scoping. When a new scope is opened, a new frame is pushed and +the child-count of the parent frame is incremented. This child-count +is used to distinguish between the first of a set of parallel scopes, +in which declared variables must not be in scope, and subsequent +branches, whether they may already be conditionally scoped. + +We need a total ordering of scopes so we can easily compare to variables +to see if they are concurrently in scope. To achieve this we record a +`scope_count` which is actually a count of both beginnings and endings +of scopes. Then each variable has a record of the scope count where it +enters scope, and where it leaves. + +To push a new frame *before* any code in the frame is parsed, we need a +grammar reduction. This is most easily achieved with a grammar +element which derives the empty string, and creates the new scope when +it is recognised. This can be placed, for example, between a keyword +like "if" and the code following it. + +###### ast + struct scope { + struct scope *parent; + int child_count; + }; + +###### parse context + int scope_depth; + int scope_count; + struct scope *scope_stack; + +###### variable fields + int scope_start, scope_end; + +###### ast functions + static void scope_pop(struct parse_context *c) + { + struct scope *s = c->scope_stack; + + c->scope_stack = s->parent; + free(s); + c->scope_depth -= 1; + c->scope_count += 1; + } + + static void scope_push(struct parse_context *c) + { + struct scope *s = calloc(1, sizeof(*s)); + if (c->scope_stack) + c->scope_stack->child_count += 1; + s->parent = c->scope_stack; + c->scope_stack = s; + c->scope_depth += 1; + c->scope_count += 1; + } + +###### Grammar + + $void + OpenScope -> ${ scope_push(c); }$ + +Each variable records a scope depth and is in one of four states: + +- "in scope". This is the case between the declaration of the + variable and the end of the containing block, and also between + the usage with affirms a merge and the end of that block. + + The scope depth is not greater than the current parse context scope + nest depth. When the block of that depth closes, the state will + change. To achieve this, all "in scope" variables are linked + together as a stack in nesting order. + +- "pending". The "in scope" block has closed, but other parallel + scopes are still being processed. So far, every parallel block at + the same level that has closed has declared the name. + + The scope depth is the depth of the last parallel block that + enclosed the declaration, and that has closed. + +- "conditionally in scope". The "in scope" block and all parallel + scopes have closed, and no further mention of the name has been seen. + This state includes a secondary nest depth (`min_depth`) which records + the outermost scope seen since the variable became conditionally in + scope. If a use of the name is found, the variable becomes "in scope" + and that secondary depth becomes the recorded scope depth. If the + name is declared as a new variable, the old variable becomes "out of + scope" and the recorded scope depth stays unchanged. + +- "out of scope". The variable is neither in scope nor conditionally + in scope. It is permanently out of scope now and can be removed from + the "in scope" stack. When a variable becomes out-of-scope it is + moved to a separate list (`out_scope`) of variables which have fully + known scope. This will be used at the end of each function to assign + each variable a place in the stack frame. + +###### variable fields + int depth, min_depth; + enum { OutScope, PendingScope, CondScope, InScope } scope; + struct variable *in_scope; + +###### parse context + + struct variable *in_scope; + struct variable *out_scope; + +All variables with the same name are linked together using the +'previous' link. Those variable that have been affirmatively merged all +have a 'merged' pointer that points to one primary variable - the most +recently declared instance. When merging variables, we need to also +adjust the 'merged' pointer on any other variables that had previously +been merged with the one that will no longer be primary. + +A variable that is no longer the most recent instance of a name may +still have "pending" scope, if it might still be merged with most +recent instance. These variables don't really belong in the +"in_scope" list, but are not immediately removed when a new instance +is found. Instead, they are detected and ignored when considering the +list of in_scope names. + +The storage of the value of a variable will be described later. For now +we just need to know that when a variable goes out of scope, it might +need to be freed. For this we need to be able to find it, so assume that +`var_value()` will provide that. + +###### variable fields + struct variable *merged; + +###### ast functions + + static void variable_merge(struct variable *primary, struct variable *secondary) + { + struct variable *v; + + primary = primary->merged; + + for (v = primary->previous; v; v=v->previous) + if (v == secondary || v == secondary->merged || + v->merged == secondary || + v->merged == secondary->merged) { + v->scope = OutScope; + v->merged = primary; + if (v->scope_start < primary->scope_start) + primary->scope_start = v->scope_start; + if (v->scope_end > primary->scope_end) + primary->scope_end = v->scope_end; // NOTEST + variable_unlink_exec(v); + } + } + +###### forward decls + static struct value *var_value(struct parse_context *c, struct variable *v); + +###### free global vars + + while (context.varlist) { + struct binding *b = context.varlist; + struct variable *v = b->var; + context.varlist = b->next; + free(b); + while (v) { + struct variable *next = v->previous; + + if (v->global && v->frame_pos >= 0) { + free_value(v->type, var_value(&context, v)); + if (v->depth == 0 && v->type->free == function_free) + // This is a function constant + free_exec(v->where_decl); + } + free(v); + v = next; + } + } + +#### Manipulating Bindings + +When a name is conditionally visible, a new declaration discards the old +binding - the condition lapses. Similarly when we reach the end of a +function (outermost non-global scope) any conditional scope must lapse. +Conversely a usage of the name affirms the visibility and extends it to +the end of the containing block - i.e. the block that contains both the +original declaration and the latest usage. This is determined from +`min_depth`. When a conditionally visible variable gets affirmed like +this, it is also merged with other conditionally visible variables with +the same name. + +When we parse a variable declaration we either report an error if the +name is currently bound, or create a new variable at the current nest +depth if the name is unbound or bound to a conditionally scoped or +pending-scope variable. If the previous variable was conditionally +scoped, it and its homonyms becomes out-of-scope. + +When we parse a variable reference (including non-declarative assignment +"foo = bar") we report an error if the name is not bound or is bound to +a pending-scope variable; update the scope if the name is bound to a +conditionally scoped variable; or just proceed normally if the named +variable is in scope. + +When we exit a scope, any variables bound at this level are either +marked out of scope or pending-scoped, depending on whether the scope +was sequential or parallel. Here a "parallel" scope means the "then" +or "else" part of a conditional, or any "case" or "else" branch of a +switch. Other scopes are "sequential". + +When exiting a parallel scope we check if there are any variables that +were previously pending and are still visible. If there are, then +they weren't redeclared in the most recent scope, so they cannot be +merged and must become out-of-scope. If it is not the first of +parallel scopes (based on `child_count`), we check that there was a +previous binding that is still pending-scope. If there isn't, the new +variable must now be out-of-scope. + +When exiting a sequential scope that immediately enclosed parallel +scopes, we need to resolve any pending-scope variables. If there was +no `else` clause, and we cannot determine that the `switch` was exhaustive, +we need to mark all pending-scope variable as out-of-scope. Otherwise +all pending-scope variables become conditionally scoped. + +###### ast + enum closetype { CloseSequential, CloseFunction, CloseParallel, CloseElse }; + +###### ast functions + + static struct variable *var_decl(struct parse_context *c, struct text s) + { + struct binding *b = find_binding(c, s); + struct variable *v = b->var; + + switch (v ? v->scope : OutScope) { + case InScope: + /* Caller will report the error */ + return NULL; + case CondScope: for (; v && v->scope == CondScope; v = v->previous) @@ -1151,6 +1849,7 @@ all pending-scope variables become conditionally scoped. v->min_depth = v->depth = c->scope_depth; v->scope = InScope; v->in_scope = c->in_scope; + v->scope_start = c->scope_count; c->in_scope = v; ## variable init return v; @@ -1184,6 +1883,19 @@ all pending-scope variables become conditionally scoped. return v; } + static int var_refile(struct parse_context *c, struct variable *v) + { + /* Variable just went out of scope. Add it to the out_scope + * list, sorted by ->scope_start + */ + struct variable **vp = &c->out_scope; + while ((*vp) && (*vp)->scope_start < v->scope_start) + vp = &(*vp)->in_scope; + v->in_scope = *vp; + *vp = v; + return 0; + } + static void var_block_close(struct parse_context *c, enum closetype ct, struct exec *e) { @@ -1201,7 +1913,7 @@ all pending-scope variables become conditionally scoped. for (vp = &c->in_scope; (v = *vp) && v->min_depth > c->scope_depth; (v->scope == OutScope || v->name->var != v) - ? (*vp = v->in_scope, 0) + ? (*vp = v->in_scope, var_refile(c, v)) : ( vp = &v->in_scope, 0)) { v->min_depth = c->scope_depth; if (v->name->var != v) @@ -1210,7 +1922,9 @@ all pending-scope variables become conditionally scoped. */ continue; v->min_depth = c->scope_depth; - if (v->scope == InScope && e) { + if (v->scope == InScope) + v->scope_end = c->scope_count; + if (v->scope == InScope && e && !v->global) { /* This variable gets cleaned up when 'e' finishes */ variable_unlink_exec(v); v->cleanup_exec = e; @@ -1230,9 +1944,6 @@ all pending-scope variables become conditionally scoped. v->previous->scope == PendingScope) /* all previous branches used name */ v->scope = PendingScope; - else if (v->type == Tlabel) - /* Labels remain pending even when not used */ - v->scope = PendingScope; // UNTESTED else v->scope = OutScope; if (ct == CloseElse) { @@ -1257,9 +1968,12 @@ all pending-scope variables become conditionally scoped. abort(); // NOTEST } break; + case CloseFunction: + if (v->scope == CondScope) + /* Condition cannot continue past end of function */ + v->scope = InScope; + /* fallthrough */ case CloseSequential: - if (v->type == Tlabel) - v->scope = PendingScope; switch (v->scope) { case InScope: v->scope = OutScope; @@ -1273,10 +1987,7 @@ all pending-scope variables become conditionally scoped. for (v2 = v; v2 && v2->scope == PendingScope; v2 = v2->previous) - if (v2->type == Tlabel) - v2->scope = CondScope; - else - v2->scope = OutScope; + v2->scope = OutScope; break; case CondScope: case OutScope: break; @@ -1315,13 +2026,17 @@ tell if it was set or not later. short local_size; void *global, *local; +###### forward decls + static struct value *global_alloc(struct parse_context *c, struct type *t, + struct variable *v, struct value *init); + ###### ast functions static struct value *var_value(struct parse_context *c, struct variable *v) { if (!v->global) { if (!c->local || !v->type) - return NULL; // NOTEST + return NULL; // NOTEST if (v->frame_pos + v->type->size > c->local_size) { printf("INVALID frame_pos\n"); // NOTEST exit(2); // NOTEST @@ -1359,358 +2074,310 @@ tell if it was set or not later. if (init) memcpy(ret, init, t->size); else - val_init(t, ret); + val_init(t, ret); // NOTEST return ret; } As global values are found -- struct field initializers, labels etc -- `global_alloc()` is called to record the value in the global frame. -When the program is fully parsed, we need to walk the list of variables -to find any that weren't merged away and that aren't global, and to -calculate the frame size and assign a frame position for each -variable. For this we have `scope_finalize()`. - -###### ast functions - - static int scope_finalize(struct parse_context *c) - { - struct binding *b; - int size = 0; - - for (b = c->varlist; b; b = b->next) { - struct variable *v; - for (v = b->var; v; v = v->previous) { - struct type *t = v->type; - if (v->merged != v) - continue; - if (v->global) - continue; - if (!t) - continue; - if (size & (t->align - 1)) - size = (size + t->align) & ~(t->align-1); - v->frame_pos = size; - size += v->type->size; - } - } - return size; - } - -###### free context storage - free(context.global); - -### Executables - -Executables can be lots of different things. In many cases an -executable is just an operation combined with one or two other -executables. This allows for expressions and lists etc. Other times an -executable is something quite specific like a constant or variable name. -So we define a `struct exec` to be a general executable with a type, and -a `struct binode` which is a subclass of `exec`, forms a node in a -binary tree, and holds an operation. There will be other subclasses, -and to access these we need to be able to `cast` the `exec` into the -various other types. The first field in any `struct exec` is the type -from the `exec_types` enum. - -###### macros - #define cast(structname, pointer) ({ \ - const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \ - if (__mptr && *__mptr != X##structname) abort(); \ - (struct structname *)( (char *)__mptr);}) - - #define new(structname) ({ \ - struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \ - __ptr->type = X##structname; \ - __ptr->line = -1; __ptr->column = -1; \ - __ptr;}) - - #define new_pos(structname, token) ({ \ - struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \ - __ptr->type = X##structname; \ - __ptr->line = token.line; __ptr->column = token.col; \ - __ptr;}) - -###### ast - enum exec_types { - Xbinode, - ## exec type - }; - struct exec { - enum exec_types type; - int line, column; - ## exec fields - }; - struct binode { - struct exec; - enum Btype { - ## Binode types - } op; - struct exec *left, *right; - }; - -###### ast functions - - static int __fput_loc(struct exec *loc, FILE *f) - { - if (!loc) - return 0; - if (loc->line >= 0) { - fprintf(f, "%d:%d: ", loc->line, loc->column); - return 1; - } - if (loc->type == Xbinode) - return __fput_loc(cast(binode,loc)->left, f) || - __fput_loc(cast(binode,loc)->right, f); // NOTEST - return 0; - } - static void fput_loc(struct exec *loc, FILE *f) - { - if (!__fput_loc(loc, f)) - fprintf(f, "??:??: "); - } - -Each different type of `exec` node needs a number of functions defined, -a bit like methods. We must be able to free it, print it, analyse it -and execute it. Once we have specific `exec` types we will need to -parse them too. Let's take this a bit more slowly. - -#### Freeing - -The parser generator requires a `free_foo` function for each struct -that stores attributes and they will often be `exec`s and subtypes -there-of. So we need `free_exec` which can handle all the subtypes, -and we need `free_binode`. - -###### ast functions - - static void free_binode(struct binode *b) - { - if (!b) - return; - free_exec(b->left); - free_exec(b->right); - free(b); - } - -###### core functions - static void free_exec(struct exec *e) - { - if (!e) - return; - switch(e->type) { - ## free exec cases - } - } - -###### forward decls - - static void free_exec(struct exec *e); - -###### free exec cases - case Xbinode: free_binode(cast(binode, e)); break; - -#### Printing +When the program is fully parsed, each function is analysed, we need to +walk the list of variables local to that function and assign them an +offset in the stack frame. For this we have `scope_finalize()`. -Printing an `exec` requires that we know the current indent level for -printing line-oriented components. As will become clear later, we -also want to know what sort of bracketing to use. +We keep the stack from dense by re-using space for between variables +that are not in scope at the same time. The `out_scope` list is sorted +by `scope_start` and as we process a varible, we move it to an FIFO +stack. For each variable we consider, we first discard any from the +stack anything that went out of scope before the new variable came in. +Then we place the new variable just after the one at the top of the +stack. ###### ast functions - - static void do_indent(int i, char *str) - { - while (i-- > 0) - printf(" "); - printf("%s", str); - } - -###### core functions - static void print_binode(struct binode *b, int indent, int bracket) - { - struct binode *b2; - switch(b->op) { - ## print binode cases - } - } - - static void print_exec(struct exec *e, int indent, int bracket) - { - if (!e) - return; - switch (e->type) { - case Xbinode: - print_binode(cast(binode, e), indent, bracket); break; - ## print exec cases - } - if (e->to_free) { - struct variable *v; - do_indent(indent, "/* FREE"); - for (v = e->to_free; v; v = v->next_free) { - printf(" %.*s", v->name->name.len, v->name->name.txt); - if (v->frame_pos >= 0) - printf("(%d+%d)", v->frame_pos, - v->type ? v->type->size:0); - } - printf(" */\n"); + + static void scope_finalize(struct parse_context *c, struct type *ft) + { + int size = ft->function.local_size; + struct variable *next = ft->function.scope; + struct variable *done = NULL; + + while (next) { + struct variable *v = next; + struct type *t = v->type; + int pos; + next = v->in_scope; + if (v->merged != v) + continue; + if (!t) + continue; // NOTEST + if (v->frame_pos >= 0) + continue; + while (done && done->scope_end < v->scope_start) + done = done->in_scope; + if (done) + pos = done->frame_pos + done->type->size; + else + pos = ft->function.local_size; + if (pos & (t->align - 1)) + pos = (pos + t->align) & ~(t->align-1); + v->frame_pos = pos; + if (size < pos + v->type->size) + size = pos + v->type->size; + v->in_scope = done; + done = v; } + c->out_scope = NULL; + ft->function.local_size = size; } -###### forward decls +###### free context storage + free(context.global); - static void print_exec(struct exec *e, int indent, int bracket); +#### Variables as executables -#### Analysing +Just as we used a `val` to wrap a value into an `exec`, we similarly +need a `var` to wrap a `variable` into an exec. While each `val` +contained a copy of the value, each `var` holds a link to the variable +because it really is the same variable no matter where it appears. +When a variable is used, we need to remember to follow the `->merged` +link to find the primary instance. -As discussed, analysis involves propagating type requirements around the -program and looking for errors. +When a variable is declared, it may or may not be given an explicit +type. We need to record which so that we can report the parsed code +correctly. -So `propagate_types` is passed an expected type (being a `struct type` -pointer together with some `val_rules` flags) that the `exec` is -expected to return, and returns the type that it does return, either -of which can be `NULL` signifying "unknown". An `ok` flag is passed -by reference. It is set to `0` when an error is found, and `2` when -any change is made. If it remains unchanged at `1`, then no more -propagation is needed. +###### exec type + Xvar, ###### ast + struct var { + struct exec; + struct variable *var; + }; - enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1}; - -###### format cases - case 'r': - if (rules & Rnolabel) - fputs(" (labels not permitted)", stderr); - break; +###### variable fields + int explicit_type; -###### forward decls - static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok, - struct type *type, int rules); -###### core functions +###### Grammar - static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok, - struct type *type, int rules) - { - struct type *t; + $TERM : :: - if (!prog) - return Tnone; + $*var + VariableDecl -> IDENTIFIER : ${ { + struct variable *v = var_decl(c, $1.txt); + $0 = new_pos(var, $1); + $0->var = v; + if (v) + v->where_decl = $0; + else { + v = var_ref(c, $1.txt); + $0->var = v; + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); + } + } }$ + | IDENTIFIER :: ${ { + struct variable *v = var_decl(c, $1.txt); + $0 = new_pos(var, $1); + $0->var = v; + if (v) { + v->where_decl = $0; + v->constant = 1; + } else { + v = var_ref(c, $1.txt); + $0->var = v; + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); + } + } }$ + | IDENTIFIER : Type ${ { + struct variable *v = var_decl(c, $1.txt); + $0 = new_pos(var, $1); + $0->var = v; + if (v) { + v->where_decl = $0; + v->where_set = $0; + v->type = $explicit_type = 1; + } else { + v = var_ref(c, $1.txt); + $0->var = v; + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); + } + } }$ + | IDENTIFIER :: Type ${ { + struct variable *v = var_decl(c, $1.txt); + $0 = new_pos(var, $1); + $0->var = v; + if (v) { + v->where_decl = $0; + v->where_set = $0; + v->type = $constant = 1; + v->explicit_type = 1; + } else { + v = var_ref(c, $1.txt); + $0->var = v; + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); + } + } }$ - switch (prog->type) { - case Xbinode: - { - struct binode *b = cast(binode, prog); - switch (b->op) { - ## propagate binode cases + $*exec + Variable -> IDENTIFIER ${ { + struct variable *v = var_ref(c, $1.txt); + $0 = new_pos(var, $1); + if (v == NULL) { + /* This might be a global const or a label + * Allocate a var with impossible type Tnone, + * which will be adjusted when we find out what it is, + * or will trigger an error. + */ + v = var_decl(c, $1.txt); + if (v) { + v->type = Tnone; + v->where_decl = $0; + v->where_set = $0; } - break; } - ## propagate exec cases - } - return Tnone; - } + cast(var, $0)->var = v; + } }$ - static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok, - struct type *type, int rules) +###### print exec cases + case Xvar: { - struct type *ret = __propagate_types(prog, c, ok, type, rules); - - if (c->parse_error) - *ok = 0; - return ret; + struct var *v = cast(var, e); + if (v->var) { + struct binding *b = v->var->name; + printf("%.*s", b->name.len, b->name.txt); + } + break; } -#### Interpreting - -Interpreting an `exec` doesn't require anything but the `exec`. State -is stored in variables and each variable will be directly linked from -within the `exec` tree. The exception to this is the `main` function -which needs to look at command line arguments. This function will be -interpreted separately. - -Each `exec` can return a value combined with a type in `struct lrval`. -The type may be `Tnone` but must be non-NULL. Some `exec`s will return -the location of a value, which can be updated, in `lval`. Others will -set `lval` to NULL indicating that there is a value of appropriate type -in `rval`. - -###### core functions - - struct lrval { - struct type *type; - struct value rval, *lval; - }; +###### format cases + case 'v': + if (loc && loc->type == Xvar) { + struct var *v = cast(var, loc); + if (v->var) { + struct binding *b = v->var->name; + fprintf(stderr, "%.*s", b->name.len, b->name.txt); + } else + fputs("???", stderr); // NOTEST + } else + fputs("NOTVAR", stderr); // NOTEST + break; - static struct lrval _interp_exec(struct parse_context *c, struct exec *e); +###### propagate exec cases - static struct value interp_exec(struct parse_context *c, struct exec *e, - struct type **typeret) + case Xvar: { - struct lrval ret = _interp_exec(c, e); - - if (!ret.type) abort(); - if (typeret) - *typeret = ret.type; - if (ret.lval) - dup_value(ret.type, ret.lval, &ret.rval); - return ret.rval; + struct var *var = cast(var, prog); + struct variable *v = var->var; + if (!v) { + type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST + return Tnone; // NOTEST + } + v = v->merged; + if (v->type == Tnone && v->where_decl == prog) + type_err(c, "error: variable used but not declared: %v", + prog, NULL, 0, NULL); + if (v->type == NULL) { + if (type && !(*perr & Efail)) { + v->type = type; + v->where_set = prog; + *perr |= Eretry; + } + } else if (!type_compat(type, v->type, rules)) { + type_err(c, "error: expected %1 but variable '%v' is %2", prog, + type, rules, v->type); + type_err(c, "info: this is where '%v' was set to %1", v->where_set, + v->type, rules, NULL); + } + if (!v->global || v->frame_pos < 0) + *perr |= Eruntime; + if (v->constant) + *perr |= Econst; + return v->type; } - static struct value *linterp_exec(struct parse_context *c, struct exec *e, - struct type **typeret) +###### interp exec cases + case Xvar: { - struct lrval ret = _interp_exec(c, e); + struct var *var = cast(var, e); + struct variable *v = var->var; - if (ret.lval) - *typeret = ret.type; - else - free_value(ret.type, &ret.rval); - return ret.lval; + v = v->merged; + lrv = var_value(c, v); + rvtype = v->type; + break; } - static struct lrval _interp_exec(struct parse_context *c, struct exec *e) +###### ast functions + + static void free_var(struct var *v) { - struct lrval ret; - struct value rv = {}, *lrv = NULL; - struct type *rvtype; + free(v); + } - rvtype = ret.type = Tnone; - if (!e) { - ret.lval = lrv; - ret.rval = rv; - return ret; - } +###### free exec cases + case Xvar: free_var(cast(var, e)); break; - switch(e->type) { - case Xbinode: - { - struct binode *b = cast(binode, e); - struct value left, right, *lleft; - struct type *ltype, *rtype; - ltype = rtype = Tnone; - switch (b->op) { - ## interp binode cases - } - free_value(ltype, &left); - free_value(rtype, &right); - break; - } - ## interp exec cases - } - ret.lval = lrv; - ret.rval = rv; - ret.type = rvtype; - ## interp exec cleanup - return ret; - } ### Complex types -Now that we have the shape of the interpreter in place we can add some -complex types and connected them in to the data structures and the -different phases of parse, analyse, print, interpret. +Now that we have the shape of the interpreter in place we can add some +complex types and connected them in to the data structures and the +different phases of parse, analyse, print, interpret. + +Being "complex" the language will naturally have syntax to access +specifics of objects of these types. These will fit into the grammar as +"Terms" which are the things that are combined with various operators to +form an "Expression". Where a Term is formed by some operation on another +Term, the subordinate Term will always come first, so for example a +member of an array will be expressed as the Term for the array followed +by an index in square brackets. The strict rule of using postfix +operations makes precedence irrelevant within terms. To provide a place +to put the grammar for terms of each type, we will start out by +introducing the "Term" grammar production, with contains at least a +simple "Value" (to be explained later). + +We also take this opportunity to introduce the "ExpressionsList" which +is a simple comma-separated list of expressions - it may be used in +various places. + +###### declare terminals + $TERM , + +###### Grammar + $*exec + Term -> Value ${ $0 = $<1; }$ + | Variable ${ $0 = $<1; }$ + ## term grammar + + $*binode + ExpressionList -> ExpressionList , Expression ${ + $0 = new(binode); + $0->op = List; + $0->left = $<1; + $0->right = $<3; + }$ + | Expression ${ + $0 = new(binode); + $0->op = List; + $0->left = NULL; + $0->right = $<1; + }$ -Thus far we have arrays and structs. +Thus far the complex types we have are arrays and structs. #### Arrays @@ -1759,25 +2426,37 @@ with a const size by whether they are prepared at parse time or not. ###### value functions - static void array_prepare_type(struct parse_context *c, struct type *type, + static int array_prepare_type(struct parse_context *c, struct type *type, int parse_time) { struct value *vsize; mpz_t q; - if (!type->array.vsize || type->array.static_size) - return; + if (type->array.static_size) + return 1; // NOTEST - guard against reentry + if (type->array.unspec && parse_time) + return 1; // NOTEST - unspec is still incomplete + if (parse_time && type->array.vsize && !type->array.vsize->global) + return 1; // NOTEST - should be impossible - vsize = var_value(c, type->array.vsize); - mpz_init(q); - mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num)); - type->array.size = mpz_get_si(q); - mpz_clear(q); - - if (parse_time) { - type->array.static_size = 1; - type->size = type->array.size * type->array.member->size; - type->align = type->array.member->align; + if (type->array.vsize) { + vsize = var_value(c, type->array.vsize); + if (!vsize) + return 1; // NOTEST - should be impossible + mpz_init(q); + mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num)); + type->array.size = mpz_get_si(q); + mpz_clear(q); } + if (!parse_time) + return 1; + if (type->array.member->size <= 0) + return 0; // NOTEST - error caught before here + + type->array.static_size = 1; + type->size = type->array.size * type->array.member->size; + type->align = type->array.member->align; + + return 1; } static void array_init(struct type *type, struct value *val) @@ -1815,26 +2494,23 @@ with a const size by whether they are prepared at parse time or not. free(ptr); } - static int array_compat(struct type *require, struct type *have) + static int array_compat(struct type *require, struct type *have, + enum val_rules rules) { if (have->compat != require->compat) return 0; /* Both are arrays, so we can look at details */ if (!type_compat(require->array.member, have->array.member, 0)) return 0; - if (have->array.unspec && require->array.unspec) { - if (have->array.vsize && require->array.vsize && - have->array.vsize != require->array.vsize) // UNTESTED - /* sizes might not be the same */ - return 0; // UNTESTED - return 1; - } + if (have->array.unspec && require->array.unspec && + have->array.size != require->array.size) + return 0; // NOTEST if (have->array.unspec || require->array.unspec) - return 1; // UNTESTED + return 1; if (require->array.vsize == NULL && have->array.vsize == NULL) return require->array.size == have->array.size; - return require->array.vsize == have->array.vsize; // UNTESTED + return require->array.vsize == have->array.vsize; } static void array_print_type(struct type *type, FILE *f) @@ -1844,8 +2520,10 @@ with a const size by whether they are prepared at parse time or not. struct binding *b = type->array.vsize->name; fprintf(f, "%.*s%s]", b->name.len, b->name.txt, type->array.unspec ? "::" : ""); - } else + } else if (type->array.size) fprintf(f, "%d]", type->array.size); + else + fprintf(f, "]"); type_print(type->array.member, f); } @@ -1867,18 +2545,16 @@ with a const size by whether they are prepared at parse time or not. | [ NUMBER ] Type ${ { char tail[3]; mpq_t num; - struct text noname = { "", 0 }; struct type *t; + int elements = 0; - $0 = t = add_type(c, noname, &array_prototype); - t->array.member = $<4; - t->array.vsize = NULL; if (number_parse(num, tail, $2.txt) == 0) tok_err(c, "error: unrecognised number", &$2); - else if (tail[0]) + else if (tail[0]) { tok_err(c, "error: unsupported number suffix", &$2); - else { - t->array.size = mpz_get_ui(mpq_numref(num)); + mpq_clear(num); + } else { + elements = mpz_get_ui(mpq_numref(num)); if (mpz_cmp_ui(mpq_denref(num), 1) != 0) { tok_err(c, "error: array size must be an integer", &$2); @@ -1887,54 +2563,43 @@ with a const size by whether they are prepared at parse time or not. &$2); mpq_clear(num); } - t->array.static_size = 1; - t->size = t->array.size * t->array.member->size; - t->align = t->array.member->align; + + $0 = t = add_anon_type(c, &array_prototype, "array[%d]", elements ); + t->array.size = elements; + t->array.member = $<4; + t->array.vsize = NULL; } }$ | [ IDENTIFIER ] Type ${ { struct variable *v = var_ref(c, $2.txt); - struct text noname = { "", 0 }; if (!v) tok_err(c, "error: name undeclared", &$2); else if (!v->constant) tok_err(c, "error: array size must be a constant", &$2); - $0 = add_type(c, noname, &array_prototype); + $0 = add_anon_type(c, &array_prototype, "array[%.*s]", $2.txt.len, $2.txt.txt); $0->array.member = $<4; $0->array.size = 0; $0->array.vsize = v; } }$ -###### Grammar - $*type - OptType -> Type ${ $0 = $<1; }$ - | ${ $0 = NULL; }$ - ###### formal type grammar - | [ IDENTIFIER :: OptType ] Type ${ { - struct variable *v = var_decl(c, $ID.txt); - struct text noname = { "", 0 }; - - v->type = $constant = 1; - if (!v->type) - v->type = Tnum; - $0 = add_type(c, noname, &array_prototype); - $0->array.member = $<6; + | [ ] Type ${ { + $0 = add_anon_type(c, &array_prototype, "array[]"); + $0->array.member = $array.size = 0; $0->array.unspec = 1; - $0->array.vsize = v; + $0->array.vsize = NULL; } }$ ###### Binode types - Index, + Index, Length, -###### variable grammar +###### term grammar - | Variable [ Expression ] ${ { + | Term [ Expression ] ${ { struct binode *b = new(binode); b->op = Index; b->left = $<1; @@ -1942,6 +2607,13 @@ with a const size by whether they are prepared at parse time or not. $0 = b; } }$ + | Term [ ] ${ { + struct binode *b = new(binode); + b->op = Length; + b->left = $left, -1, bracket); @@ -1950,13 +2622,18 @@ with a const size by whether they are prepared at parse time or not. printf("]"); break; + case Length: + print_exec(b->left, -1, bracket); + printf("[]"); + break; + ###### propagate binode cases case Index: /* left must be an array, right must be a number, * result is the member type of the array */ - propagate_types(b->right, c, ok, Tnum, 0); - t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant); + propagate_types(b->right, c, perr_local, Tnum, 0); + t = propagate_types(b->left, c, perr, NULL, 0); if (!t || t->compat != array_compat) { type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL); return NULL; @@ -1969,6 +2646,20 @@ with a const size by whether they are prepared at parse time or not. } break; + case Length: + /* left must be an array, result is a number + */ + t = propagate_types(b->left, c, perr, NULL, 0); + if (!t || t->compat != array_compat) { + type_err(c, "error: %1 cannot provide length", prog, t, 0, NULL); + return NULL; + } + if (!type_compat(type, Tnum, rules)) + type_err(c, "error: have %1 but need %2", prog, + Tnum, rules, type); + return Tnum; + break; + ###### interp binode cases case Index: { mpz_t q; @@ -1990,8 +2681,15 @@ with a const size by whether they are prepared at parse time or not. if (i >= 0 && i < ltype->array.size) lrv = ptr + i * rvtype->size; else - val_init(ltype->array.member, &rv); + val_init(ltype->array.member, &rv); // UNSAFE + ltype = NULL; + break; + } + case Length: { + lleft = linterp_exec(c, b->left, <ype); + mpq_set_ui(rv.num, ltype->array.size, 1); ltype = NULL; + rvtype = Tnum; break; } @@ -2045,11 +2743,18 @@ function will be needed. struct type *type; struct value *init; int offset; - } *fields; + } *fields; // This is created when field_list is analysed. + struct fieldlist { + struct fieldlist *prev; + struct field f; + struct exec *init; + } *field_list; // This is created during parsing } structure; ###### type functions void (*print_type_decl)(struct type *type, FILE *f); + struct type *(*fieldref)(struct type *t, struct parse_context *c, + struct fieldref *f, struct value **vp); ###### value functions @@ -2061,7 +2766,7 @@ function will be needed. struct value *v; v = (void*) val->ptr + type->structure.fields[i].offset; if (type->structure.fields[i].init) - dup_value(type->structure.fields[i].type, + dup_value(type->structure.fields[i].type, type->structure.fields[i].init, v); else @@ -2080,6 +2785,15 @@ function will be needed. } } + static void free_fieldlist(struct fieldlist *f) + { + if (!f) + return; + free_fieldlist(f->prev); + free_exec(f->init); + free(f); + } + static void structure_free_type(struct type *t) { int i; @@ -2089,6 +2803,88 @@ function will be needed. t->structure.fields[i].init); } free(t->structure.fields); + free_fieldlist(t->structure.field_list); + } + + static int structure_prepare_type(struct parse_context *c, + struct type *t, int parse_time) + { + int cnt = 0; + struct fieldlist *f; + + if (!parse_time || t->structure.fields) + return 1; + + for (f = t->structure.field_list; f; f=f->prev) { + enum prop_err perr; + cnt += 1; + + if (f->f.type->size <= 0) + return 0; + if (f->f.type->prepare_type) + f->f.type->prepare_type(c, f->f.type, parse_time); + + if (f->init == NULL) + continue; + do { + perr = 0; + propagate_types(f->init, c, &perr, f->f.type, 0); + } while (perr & Eretry); + if (perr & Efail) + c->parse_error += 1; // NOTEST + } + + t->structure.nfields = cnt; + t->structure.fields = calloc(cnt, sizeof(struct field)); + f = t->structure.field_list; + while (cnt > 0) { + int a = f->f.type->align; + cnt -= 1; + t->structure.fields[cnt] = f->f; + if (t->size & (a-1)) + t->size = (t->size | (a-1)) + 1; + t->structure.fields[cnt].offset = t->size; + t->size += ((f->f.type->size - 1) | (a-1)) + 1; + if (a > t->align) + t->align = a; + + if (f->init && !c->parse_error) { + struct value vl = interp_exec(c, f->init, NULL); + t->structure.fields[cnt].init = + global_alloc(c, f->f.type, NULL, &vl); + } + + f = f->prev; + } + return 1; + } + + static int find_struct_index(struct type *type, struct text field) + { + int i; + for (i = 0; i < type->structure.nfields; i++) + if (text_cmp(type->structure.fields[i].name, field) == 0) + return i; + return IndexInvalid; + } + + static struct type *structure_fieldref(struct type *t, struct parse_context *c, + struct fieldref *f, struct value **vp) + { + if (f->index == IndexUnknown) { + f->index = find_struct_index(t, f->name); + if (f->index < 0) + type_err(c, "error: cannot find requested field in %1", + f->left, t, 0, NULL); + } + if (f->index < 0) + return NULL; + if (vp) { + struct value *v = *vp; + v = (void*)v->ptr + t->structure.fields[f->index].offset; + *vp = v; + } + return t->structure.fields[f->index].type; } static struct type structure_prototype = { @@ -2096,6 +2892,8 @@ function will be needed. .free = structure_free, .free_type = structure_free_type, .print_type_decl = structure_print_type, + .prepare_type = structure_prepare_type, + .fieldref = structure_fieldref, }; ###### exec type @@ -2108,6 +2906,7 @@ function will be needed. int index; struct text name; }; + enum { IndexUnknown = -1, IndexInvalid = -2 }; ###### free exec cases case Xfieldref: @@ -2116,15 +2915,15 @@ function will be needed. break; ###### declare terminals - $TERM struct . + $TERM struct -###### variable grammar +###### term grammar - | Variable . IDENTIFIER ${ { + | Term . IDENTIFIER ${ { struct fieldref *fr = new_pos(fieldref, $2); fr->left = $<1; fr->name = $3.txt; - fr->index = -2; + fr->index = IndexUnknown; $0 = fr; } }$ @@ -2138,41 +2937,22 @@ function will be needed. break; } -###### ast functions - static int find_struct_index(struct type *type, struct text field) - { - int i; - for (i = 0; i < type->structure.nfields; i++) - if (text_cmp(type->structure.fields[i].name, field) == 0) - return i; - return -1; - } - ###### propagate exec cases case Xfieldref: { struct fieldref *f = cast(fieldref, prog); - struct type *st = propagate_types(f->left, c, ok, NULL, 0); + struct type *st = propagate_types(f->left, c, perr, NULL, 0); - if (!st) - type_err(c, "error: unknown type for field access", f->left, // UNTESTED - NULL, 0, NULL); - else if (st->init != structure_init) - type_err(c, "error: field reference attempted on %1, not a struct", + if (!st || !st->fieldref) + type_err(c, "error: field reference on %1 is not supported", f->left, st, 0, NULL); - else if (f->index == -2) { - f->index = find_struct_index(st, f->name); - if (f->index < 0) - type_err(c, "error: cannot find requested field in %1", - f->left, st, 0, NULL); - } - if (f->index >= 0) { - struct type *ft = st->structure.fields[f->index].type; - if (!type_compat(type, ft, rules)) + else { + t = st->fieldref(st, c, f, NULL); + if (t && !type_compat(type, t, rules)) type_err(c, "error: have %1 but need %2", prog, - ft, rules, type); - return ft; + t, rules, type); + return t; } break; } @@ -2183,186 +2963,559 @@ function will be needed. struct fieldref *f = cast(fieldref, e); struct type *ltype; struct value *lleft = linterp_exec(c, f->left, <ype); - lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset; - rvtype = ltype->structure.fields[f->index].type; + lrv = lleft; + rvtype = ltype->fieldref(ltype, c, f, &lrv); + break; + } + +###### top level grammar + $*type + StructName -> IDENTIFIER ${ { + struct type *t = find_type(c, $ID.txt); + + if (t && t->size >= 0) { + tok_err(c, "error: type already declared", &$ID); + tok_err(c, "info: this is location of declartion", &t->first_use); + t = NULL; + } + if (!t) + t = add_type(c, $ID.txt, NULL); + t->first_use = $ID; + $0 = t; + } }$ + $void + DeclareStruct -> struct StructName FieldBlock Newlines ${ { + struct type *t = $name = tmp.name; + t->next = tmp.next; + t->first_use = tmp.first_use; + + t->structure.field_list = $ { IN OptNL FieldLines OUT OptNL } ${ $0 = $ SimpleFieldList Newlines ${ $0 = $prev) + f = f->prev; + f->prev = $ Field ${ $0 = $prev = $ IDENTIFIER : Type = Expression ${ { + $0 = calloc(1, sizeof(struct fieldlist)); + $0->f.name = $ID.txt; + $0->f.type = $f.init = NULL; + $0->init = $f.name = $ID.txt; + $0->f.type = $name.len, t->name.txt); + + for (i = 0; i < t->structure.nfields; i++) { + struct field *fl = t->structure.fields + i; + fprintf(f, " %.*s : ", fl->name.len, fl->name.txt); + type_print(fl->type, f); + if (fl->type->print && fl->init) { + fprintf(f, " = "); + if (fl->type == Tstr) + fprintf(f, "\""); + print_value(fl->type, fl->init, f); + if (fl->type == Tstr) + fprintf(f, "\""); + } + fprintf(f, "\n"); + } + } + +###### print type decls + { + struct type *t; + int target = -1; + + while (target != 0) { + int i = 0; + for (t = context.typelist; t ; t=t->next) + if (!t->anon && t->print_type_decl && + !t->check_args) { + i += 1; + if (i == target) + break; + } + + if (target == -1) { + target = i; + } else { + t->print_type_decl(t, stdout); + target -= 1; + } + } + } + +#### References + +References, or pointers, are values that refer to another value. They +can only refer to a `struct`, though as a struct can embed anything they +can effectively refer to anything. + +References are potentially dangerous as they might refer to some +variable which no longer exists - either because a stack frame +containing it has been discarded or because the value was allocated on +the heap and has now been free. Ocean does not yet provide any +protection against these problems. It will in due course. + +With references comes the opportunity and the need to explicitly +allocate values on the "heap" and to free them. We currently provide +fairly basic support for this. + +Reference make use of the `@` symbol in various ways. A type that starts +with `@` is a reference to whatever follows. A reference value +followed by an `@` acts as the referred value, though the `@` is often +not needed. Finally, an expression that starts with `@` is a special +reference related expression. Some examples might help. + +##### Example: Reference examples + + struct foo + a: number + b: string + ref: @foo + bar: foo + bar.number = 23; bar.string = "hello" + baz: foo + ref = bar + baz = @ref + baz.a = ref.a * 2 + + ref = @new() + ref@ = baz + @free = ref + ref = @nil + +Obviously this is very contrived. `ref` is a reference to a `foo` which +is initially set to refer to the value stored in `bar` - no extra syntax +is needed to "Take the address of" `bar` - the fact that `ref` is a +reference means that only the address make sense. + +When `ref.a` is accessed, that is whatever value is stored in `bar.a`. +The same syntax is used for accessing fields both in structs and in +references to structs. It would be correct to use `ref@.a`, but not +necessary. + +`@new()` creates an object of whatever type is needed for the program +to by type-correct. In future iterations of Ocean, arguments a +constructor will access arguments, so the the syntax now looks like a +function call. `@free` can be assigned any reference that was returned +by `@new()`, and it will be freed. `@nil` is a value of whatever +reference type is appropriate, and is stable and never the address of +anything in the heap or on the stack. A reference can be assigned +`@nil` or compared against that value. + +###### declare terminals + $TERM @ + +###### type union fields + + struct { + struct type *referent; + } reference; + +###### value union fields + struct value *ref; + +###### value functions + + static void reference_print_type(struct type *t, FILE *f) + { + fprintf(f, "@"); + type_print(t->reference.referent, f); + } + + static int reference_cmp(struct type *tl, struct type *tr, + struct value *left, struct value *right) + { + return left->ref == right->ref ? 0 : 1; + } + + static void reference_dup(struct type *t, + struct value *vold, struct value *vnew) + { + vnew->ref = vold->ref; + } + + static void reference_free(struct type *t, struct value *v) + { + /* Nothing to do here */ + } + + static int reference_compat(struct type *require, struct type *have, + enum val_rules rules) + { + if (rules & Rrefok) + if (require->reference.referent == have) + return 1; + if (have->compat != require->compat) + return 0; + if (have->reference.referent != require->reference.referent) + return 0; + return 1; + } + + static int reference_test(struct type *type, struct value *val) + { + return val->ref != NULL; + } + + static struct type *reference_fieldref(struct type *t, struct parse_context *c, + struct fieldref *f, struct value **vp) + { + struct type *rt = t->reference.referent; + + if (rt->fieldref) { + if (vp) + *vp = (*vp)->ref; + return rt->fieldref(rt, c, f, vp); + } + type_err(c, "error: field reference on %1 is not supported", + f->left, rt, 0, NULL); + return Tnone; + } + + static struct type reference_prototype = { + .print_type = reference_print_type, + .cmp_eq = reference_cmp, + .dup = reference_dup, + .test = reference_test, + .free = reference_free, + .compat = reference_compat, + .fieldref = reference_fieldref, + .size = sizeof(void*), + .align = sizeof(void*), + }; + +###### type grammar + + | @ IDENTIFIER ${ { + struct type *t = find_type(c, $ID.txt); + if (!t) { + t = add_type(c, $ID.txt, NULL); + t->first_use = $ID; + } + $0 = find_anon_type(c, &reference_prototype, "@%.*s", + $ID.txt.len, $ID.txt.txt); + $0->reference.referent = t; + } }$ + +###### core functions + static int text_is(struct text t, char *s) + { + return (strlen(s) == t.len && + strncmp(s, t.txt, t.len) == 0); + } + +###### exec type + Xref, + +###### ast + struct ref { + struct exec; + enum ref_func { RefNew, RefFree, RefNil } action; + struct type *reftype; + struct exec *right; + }; + +###### SimpleStatement Grammar + + | @ IDENTIFIER = Expression ${ { + struct ref *r = new_pos(ref, $ID); + // Must be "free" + if (!text_is($ID.txt, "free")) + tok_err(c, "error: only \"@free\" makes sense here", + &$ID); + + $0 = r; + r->action = RefFree; + r->right = $action = RefNew; + } + }$ + | @ IDENTIFIER ${ + // Only 'nil' valid here + if (!text_is($ID.txt, "nil")) { + tok_err(c, "error: Only reference value is \"@nil\"", + &$ID); + } else { + struct ref *r = new_pos(ref,$ID); + $0 = r; + r->action = RefNil; + } + }$ + +###### print exec cases + case Xref: { + struct ref *r = cast(ref, e); + switch (r->action) { + case RefNew: + printf("@new()"); break; + case RefNil: + printf("@nil"); break; + case RefFree: + do_indent(indent, "@free = "); + print_exec(r->right, indent, bracket); + break; + } break; } -###### ast - struct fieldlist { - struct fieldlist *prev; - struct field f; - }; - -###### ast functions - static void free_fieldlist(struct fieldlist *f) - { - if (!f) - return; - free_fieldlist(f->prev); - if (f->f.init) { - free_value(f->f.type, f->f.init); // UNTESTED - free(f->f.init); // UNTESTED +###### propagate exec cases + case Xref: { + struct ref *r = cast(ref, prog); + switch (r->action) { + case RefNew: + if (type && type->free != reference_free) { + type_err(c, "error: @new() can only be used with references, not %1", + prog, type, 0, NULL); + return NULL; + } + if (type && !r->reftype) { + r->reftype = type; + *perr |= Eretry; + } + *perr |= Erval; + return type; + case RefNil: + if (type && type->free != reference_free) + type_err(c, "error: @nil can only be used with reference, not %1", + prog, type, 0, NULL); + if (type && !r->reftype) { + r->reftype = type; + *perr |= Eretry; + } + *perr |= Erval; + return type; + case RefFree: + t = propagate_types(r->right, c, perr_local, NULL, 0); + if (t && t->free != reference_free) + type_err(c, "error: @free can only be assigned a reference, not %1", + prog, t, 0, NULL); + r->reftype = Tnone; + return Tnone; } - free(f); + break; // NOTEST } -###### top level grammar - DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ { - struct type *t = - add_type(c, $2.txt, &structure_prototype); - int cnt = 0; - struct fieldlist *f; - - for (f = $3; f; f=f->prev) - cnt += 1; - - t->structure.nfields = cnt; - t->structure.fields = calloc(cnt, sizeof(struct field)); - f = $3; - while (cnt > 0) { - int a = f->f.type->align; - cnt -= 1; - t->structure.fields[cnt] = f->f; - if (t->size & (a-1)) - t->size = (t->size | (a-1)) + 1; - t->structure.fields[cnt].offset = t->size; - t->size += ((f->f.type->size - 1) | (a-1)) + 1; - if (a > t->align) - t->align = a; - f->f.init = NULL; - f = f->prev; - } - } }$ - $*fieldlist - FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $action) { + case RefNew: + if (r->reftype) + rv.ref = calloc(1, r->reftype->reference.referent->size); + rvtype = r->reftype; + break; + case RefNil: + rv.ref = NULL; + rvtype = r->reftype; + break; + case RefFree: + rv = interp_exec(c, r->right, &rvtype); + free_value(rvtype->reference.referent, rv.ref); + free(rv.ref); + rvtype = Tnone; + break; + } + break; + } - FieldLines -> SimpleFieldList Newlines ${ $0 = $prev = $right); + free(r); + break; + } - SimpleFieldList -> Field ${ $0 = $prev = $ IDENTIFIER : Type = Expression ${ { - int ok; // UNTESTED +###### Binode types + Deref, AddressOf, - $0 = calloc(1, sizeof(struct fieldlist)); - $0->f.name = $1.txt; - $0->f.type = $<3; - $0->f.init = NULL; - do { - ok = 1; - propagate_types($<5, c, &ok, $3, 0); - } while (ok == 2); - if (!ok) - c->parse_error = 1; // UNTESTED - else { - struct value vl = interp_exec(c, $5, NULL); - $0->f.init = global_alloc(c, $0->f.type, NULL, &vl); - } - } }$ - | IDENTIFIER : Type ${ - $0 = calloc(1, sizeof(struct fieldlist)); - $0->f.name = $1.txt; - $0->f.type = $<3; - if ($0->f.type->prepare_type) - $0->f.type->prepare_type(c, $0->f.type, 1); - }$ +###### term grammar -###### forward decls - static void structure_print_type(struct type *t, FILE *f); + | Term @ ${ { + struct binode *b = new(binode); + b->op = Deref; + b->left = $left, -1, bracket); + printf("@"); + break; + case AddressOf: + print_exec(b->left, -1, bracket); + break; - fprintf(f, "struct %.*s\n", t->name.len, t->name.txt); +###### propagate binode cases + case Deref: + /* left must be a reference, and we return what it refers to */ + /* FIXME how can I pass the expected type down? */ + t = propagate_types(b->left, c, perr, NULL, 0); + *perr &= ~Erval; + if (!t || t->free != reference_free) + type_err(c, "error: Cannot dereference %1", b, t, 0, NULL); + else + return t->reference.referent; + break; - for (i = 0; i < t->structure.nfields; i++) { - struct field *fl = t->structure.fields + i; - fprintf(f, " %.*s : ", fl->name.len, fl->name.txt); - type_print(fl->type, f); - if (fl->type->print && fl->init) { - fprintf(f, " = "); - if (fl->type == Tstr) - fprintf(f, "\""); // UNTESTED - print_value(fl->type, fl->init); - if (fl->type == Tstr) - fprintf(f, "\""); // UNTESTED - } - printf("\n"); - } - } + case AddressOf: + /* left must be lval, we create reference to it */ + if (!type || type->free != reference_free) + t = propagate_types(b->left, c, perr, type, 0); // NOTEST impossible + else + t = propagate_types(b->left, c, perr, + type->reference.referent, 0); + if (t) + t = find_anon_type(c, &reference_prototype, "@%.*s", + t->name.len, t->name.txt); + return t; -###### print type decls - { // UNTESTED - struct type *t; // UNTESTED - int target = -1; +###### interp binode cases + case Deref: + left = interp_exec(c, b->left, <ype); + lrv = left.ref; + rvtype = ltype->reference.referent; + break; - while (target != 0) { - int i = 0; - for (t = context.typelist; t ; t=t->next) - if (t->print_type_decl && !t->check_args) { - i += 1; - if (i == target) - break; - } + case AddressOf: + rv.ref = linterp_exec(c, b->left, &rvtype); + rvtype = find_anon_type(c, &reference_prototype, "@%.*s", + rvtype->name.len, rvtype->name.txt); + break; - if (target == -1) { - target = i; - } else { - t->print_type_decl(t, stdout); - target -= 1; - } - } - } #### Functions A function is a chunk of code which can be passed parameters and can -return results (though results are not yet implemented). Each function -has a type which includes the set of parameters and the return value. -As yet these types cannot be declared separately from the function -itself. +return results. Each function has a type which includes the set of +parameters and the return value. As yet these types cannot be declared +separately from the function itself. -The parameters can be specified either in parentheses as a list, such as +The parameters can be specified either in parentheses as a ';' separated +list, such as ##### Example: function 1 - func main(av:[ac::number]string) + func main(av:[ac::number]string; env:[envc::number]string) code block -or as an indented list of one parameter per line +or as an indented list of one parameter per line (though each line can +be a ';' separated list) ##### Example: function 2 func main argv:[argc::number]string + env:[envc::number]string + do + code block + +In the first case a return type can follow the parentheses after a colon, +in the second it is given on a line starting with the word `return`. + +##### Example: functions that return + + func add(a:number; b:number): number + code block + + func catenate + a: string + b: string + return string do code block -For constructing these lists we use a `List` binode, which will be +Rather than returning a type, the function can specify a set of local +variables to return as a struct. The values of these variables when the +function exits will be provided to the caller. For this the return type +is replaced with a block of result declarations, either in parentheses +or bracketed by `return` and `do`. + +##### Example: functions returning multiple variables + + func to_cartesian(rho:number; theta:number):(x:number; y:number) + x = ..... + y = ..... + + func to_polar + x:number; y:number + return + rho:number + theta:number + do + rho = .... + theta = .... + +For constructing the lists we use a `List` binode, which will be further detailed when Expression Lists are introduced. ###### type union fields struct { struct binode *params; + struct type *return_type; + struct variable *scope; + int inline_result; // return value is at start of 'local' int local_size; } function; @@ -2370,7 +3523,7 @@ further detailed when Expression Lists are introduced. struct exec *function; ###### type functions - void (*check_args)(struct parse_context *c, int *ok, + void (*check_args)(struct parse_context *c, enum prop_err *perr, struct type *require, struct exec *args); ###### value functions @@ -2381,13 +3534,22 @@ further detailed when Expression Lists are introduced. val->function = NULL; } - static int function_compat(struct type *require, struct type *have) + static int function_compat(struct type *require, struct type *have, + enum val_rules rules) { // FIXME can I do anything here yet? return 0; } - static void function_check_args(struct parse_context *c, int *ok, + static struct exec *take_addr(struct exec *e) + { + struct binode *rv = new(binode); + rv->op = AddressOf; + rv->left = e; + return rv; + } + + static void function_check_args(struct parse_context *c, enum prop_err *perr, struct type *require, struct exec *args) { /* This should be 'compat', but we don't have a 'tuple' type to @@ -2398,441 +3560,280 @@ further detailed when Expression Lists are introduced. while (param) { struct var *pv = cast(var, param->left); + struct type *t = pv->var->type, *t2; if (!arg) { type_err(c, "error: insufficient arguments to function.", args, NULL, 0, NULL); break; } - *ok = 1; - propagate_types(arg->left, c, ok, pv->var->type, 0); + *perr = 0; + t2 = propagate_types(arg->left, c, perr, t, Rrefok); + if (t->free == reference_free && + t->reference.referent == t2 && + !(*perr & Erval)) { + arg->left = take_addr(arg->left); + } else if (!(*perr & Efail) && !type_compat(t2, t, 0)) { + type_err(c, "error: cannot pass rval when reference expected", + arg->left, NULL, 0, NULL); + } param = cast(binode, param->right); arg = cast(binode, arg->right); } if (arg) type_err(c, "error: too many arguments to function.", args, NULL, 0, NULL); - } - - static void function_print(struct type *type, struct value *val) - { - print_exec(val->function, 1, 0); - } - - static void function_print_type_decl(struct type *type, FILE *f) - { - struct binode *b; - fprintf(f, "("); - for (b = type->function.params; b; b = cast(binode, b->right)) { - struct variable *v = cast(var, b->left)->var; - fprintf(f, "%.*s%s", v->name->name.len, v->name->name.txt, - v->constant ? "::" : ":"); - type_print(v->type, f); - if (b->right) - fprintf(f, "; "); - } - fprintf(f, ")\n"); - } - - static void function_free_type(struct type *t) - { - free_exec(t->function.params); - } - - static struct type function_prototype = { - .size = sizeof(void*), - .align = sizeof(void*), - .free = function_free, - .compat = function_compat, - .check_args = function_check_args, - .print = function_print, - .print_type_decl = function_print_type_decl, - .free_type = function_free_type, - }; - -###### declare terminals - - $TERM func - -###### Binode types - List, - -###### Grammar - - $*variable - FuncName -> IDENTIFIER ${ { - struct variable *v = var_decl(c, $1.txt); - struct var *e = new_pos(var, $1); - e->var = v; - if (v) { - v->where_decl = e; - $0 = v; - } else { - v = var_ref(c, $1.txt); - e->var = v; - type_err(c, "error: function '%v' redeclared", - e, NULL, 0, NULL); - type_err(c, "info: this is where '%v' was first declared", - v->where_decl, NULL, 0, NULL); - free_exec(e); - } - } }$ - - - $*binode - Args -> ${ $0 = NULL; }$ - | Varlist ${ $0 = $<1; }$ - | Varlist ; ${ $0 = $<1; }$ - | Varlist NEWLINE ${ $0 = $<1; }$ - - Varlist -> Varlist ; ArgDecl ${ // UNTESTED - $0 = new(binode); - $0->op = List; - $0->left = $right = $op = List; - $0->left = NULL; - $0->right = $ IDENTIFIER : FormalType ${ { - struct variable *v = var_decl(c, $1.txt); - $0 = new(var); - $0->var = v; - v->type = $vtype = T; - return v; - } - -###### Grammar - - $TERM True False - - $*val - Value -> True ${ - $0 = new_val(Tbool, $1); - $0->val.bool = 1; - }$ - | False ${ - $0 = new_val(Tbool, $1); - $0->val.bool = 0; - }$ - | NUMBER ${ - $0 = new_val(Tnum, $1); - { - char tail[3]; - if (number_parse($0->val.num, tail, $1.txt) == 0) - mpq_init($0->val.num); // UNTESTED - if (tail[0]) - tok_err(c, "error: unsupported number suffix", - &$1); - } - }$ - | STRING ${ - $0 = new_val(Tstr, $1); - { - char tail[3]; - string_parse(&$1, '\\', &$0->val.str, tail); - if (tail[0]) - tok_err(c, "error: unsupported string suffix", - &$1); - } - }$ - | MULTI_STRING ${ - $0 = new_val(Tstr, $1); - { - char tail[3]; - string_parse(&$1, '\\', &$0->val.str, tail); - if (tail[0]) - tok_err(c, "error: unsupported string suffix", - &$1); - } - }$ - -###### print exec cases - case Xval: - { - struct val *v = cast(val, e); - if (v->vtype == Tstr) - printf("\""); - print_value(v->vtype, &v->val); - if (v->vtype == Tstr) - printf("\""); - break; - } - -###### propagate exec cases - case Xval: - { - struct val *val = cast(val, prog); - if (!type_compat(type, val->vtype, rules)) - type_err(c, "error: expected %1%r found %2", - prog, type, rules, val->vtype); - return val->vtype; - } - -###### interp exec cases - case Xval: - rvtype = cast(val, e)->vtype; - dup_value(rvtype, &cast(val, e)->val, &rv); - break; - -###### ast functions - static void free_val(struct val *v) - { - if (v) - free_value(v->vtype, &v->val); - free(v); - } - -###### free exec cases - case Xval: free_val(cast(val, e)); break; - -###### ast functions - // Move all nodes from 'b' to 'rv', reversing their order. - // In 'b' 'left' is a list, and 'right' is the last node. - // In 'rv', left' is the first node and 'right' is a list. - static struct binode *reorder_bilist(struct binode *b) + } + + static void function_print(struct type *type, struct value *val, FILE *f) { - struct binode *rv = NULL; + fprintf(f, "\n"); + print_exec(val->function, 1, 0); + } - while (b) { - struct exec *t = b->right; - b->right = rv; - rv = b; - if (b->left) - b = cast(binode, b->left); - else - b = NULL; - rv->left = t; + static void function_print_type_decl(struct type *type, FILE *f) + { + struct binode *b; + fprintf(f, "("); + for (b = type->function.params; b; b = cast(binode, b->right)) { + struct variable *v = cast(var, b->left)->var; + fprintf(f, "%.*s%s", v->name->name.len, v->name->name.txt, + v->constant ? "::" : ":"); + type_print(v->type, f); + if (b->right) + fprintf(f, "; "); + } + fprintf(f, ")"); + if (type->function.return_type != Tnone) { + fprintf(f, ":"); + if (type->function.inline_result) { + int i; + struct type *t = type->function.return_type; + fprintf(f, " ("); + for (i = 0; i < t->structure.nfields; i++) { + struct field *fl = t->structure.fields + i; + if (i) + fprintf(f, "; "); + fprintf(f, "%.*s:", fl->name.len, fl->name.txt); + type_print(fl->type, f); + } + fprintf(f, ")"); + } else + type_print(type->function.return_type, f); } - return rv; } -### Variables + static void function_free_type(struct type *t) + { + free_exec(t->function.params); + } -Just as we used a `val` to wrap a value into an `exec`, we similarly -need a `var` to wrap a `variable` into an exec. While each `val` -contained a copy of the value, each `var` holds a link to the variable -because it really is the same variable no matter where it appears. -When a variable is used, we need to remember to follow the `->merged` -link to find the primary instance. + static struct type function_prototype = { + .size = sizeof(void*), + .align = sizeof(void*), + .free = function_free, + .compat = function_compat, + .check_args = function_check_args, + .print = function_print, + .print_type_decl = function_print_type_decl, + .free_type = function_free_type, + }; -###### exec type - Xvar, +###### declare terminals -###### ast - struct var { - struct exec; - struct variable *var; - }; + $TERM func ###### Grammar - $TERM : :: - - $*var - VariableDecl -> IDENTIFIER : ${ { - struct variable *v = var_decl(c, $1.txt); - $0 = new_pos(var, $1); - $0->var = v; - if (v) - v->where_decl = $0; - else { - v = var_ref(c, $1.txt); - $0->var = v; - type_err(c, "error: variable '%v' redeclared", - $0, NULL, 0, NULL); - type_err(c, "info: this is where '%v' was first declared", - v->where_decl, NULL, 0, NULL); - } - } }$ - | IDENTIFIER :: ${ { + $*variable + FuncName -> IDENTIFIER ${ { struct variable *v = var_decl(c, $1.txt); - $0 = new_pos(var, $1); - $0->var = v; + struct var *e = new_pos(var, $1); + e->var = v; if (v) { - v->where_decl = $0; - v->constant = 1; + v->where_decl = e; + v->where_set = e; + $0 = v; } else { v = var_ref(c, $1.txt); - $0->var = v; - type_err(c, "error: variable '%v' redeclared", - $0, NULL, 0, NULL); + e->var = v; + type_err(c, "error: function '%v' redeclared", + e, NULL, 0, NULL); type_err(c, "info: this is where '%v' was first declared", - v->where_decl, NULL, 0, NULL); + v->where_decl, NULL, 0, NULL); + free_exec(e); } } }$ - | IDENTIFIER : Type ${ { - struct variable *v = var_decl(c, $1.txt); - $0 = new_pos(var, $1); - $0->var = v; - if (v) { - v->where_decl = $0; - v->where_set = $0; - v->type = $var = v; - type_err(c, "error: variable '%v' redeclared", - $0, NULL, 0, NULL); - type_err(c, "info: this is where '%v' was first declared", - v->where_decl, NULL, 0, NULL); - } + + $*binode + Args -> ArgsLine NEWLINE ${ $0 = $left; + *bp = $ ${ $0 = NULL; }$ + | Varlist ${ $0 = $<1; }$ + | Varlist ; ${ $0 = $<1; }$ + + Varlist -> Varlist ; ArgDecl ${ + $0 = new_pos(binode, $2); + $0->op = List; + $0->left = $right = $op = List; + $0->left = NULL; + $0->right = $ IDENTIFIER : FormalType ${ { + struct variable *v = var_decl(c, $ID.txt); + $0 = new_pos(var, $ID); $0->var = v; - if (v) { - v->where_decl = $0; - v->where_set = $0; - v->type = $constant = 1; - } else { - v = var_ref(c, $1.txt); - $0->var = v; - type_err(c, "error: variable '%v' redeclared", - $0, NULL, 0, NULL); - type_err(c, "info: this is where '%v' was first declared", - v->where_decl, NULL, 0, NULL); - } + v->where_decl = $0; + v->where_set = $0; + v->type = $ IDENTIFIER ${ { - struct variable *v = var_ref(c, $1.txt); - $0 = new_pos(var, $1); - if (v == NULL) { - /* This might be a label - allocate a var just in case */ - v = var_decl(c, $1.txt); - if (v) { - v->type = Tnone; - v->where_decl = $0; - v->where_set = $0; - } - } - cast(var, $0)->var = v; +##### Function calls + +A function call can appear either as an expression or as a statement. +We use a new 'Funcall' binode type to link the function with a list of +arguments, form with the 'List' nodes. + +We have already seen the "Term" which is how a function call can appear +in an expression. To parse a function call into a statement we include +it in the "SimpleStatement Grammar" which will be described later. + +###### Binode types + Funcall, + +###### term grammar + | Term ( ExpressionList ) ${ { + struct binode *b = new(binode); + b->op = Funcall; + b->left = $right = reorder_bilist($op = Funcall; + b->left = $right = NULL; + $0 = b; } }$ - ## variable grammar -###### print exec cases - case Xvar: - { - struct var *v = cast(var, e); - if (v->var) { - struct binding *b = v->var->name; - printf("%.*s", b->name.len, b->name.txt); +###### SimpleStatement Grammar + + | Term ( ExpressionList ) ${ { + struct binode *b = new(binode); + b->op = Funcall; + b->left = $right = reorder_bilist($left, -1, bracket); + printf("("); + for (b = cast(binode, b->right); b; b = cast(binode, b->right)) { + if (b->left) { + printf(" "); + print_exec(b->left, -1, bracket); + if (b->right) + printf(","); + } } + printf(")"); + if (indent >= 0) + printf("\n"); break; - } -###### format cases - case 'v': - if (loc && loc->type == Xvar) { - struct var *v = cast(var, loc); - if (v->var) { - struct binding *b = v->var->name; - fprintf(stderr, "%.*s", b->name.len, b->name.txt); - } else - fputs("???", stderr); // NOTEST - } else - fputs("NOTVAR", stderr); - break; +###### propagate binode cases -###### propagate exec cases + case Funcall: { + /* Every arg must match formal parameter, and result + * is return type of function + */ + struct binode *args = cast(binode, b->right); + struct var *v = cast(var, b->left); - case Xvar: - { - struct var *var = cast(var, prog); - struct variable *v = var->var; - if (!v) { - type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST - return Tnone; // NOTEST - } - v = v->merged; - if (v->constant && (rules & Rnoconstant)) { - type_err(c, "error: Cannot assign to a constant: %v", - prog, NULL, 0, NULL); - type_err(c, "info: name was defined as a constant here", - v->where_decl, NULL, 0, NULL); - return v->type; - } - if (v->type == Tnone && v->where_decl == prog) - type_err(c, "error: variable used but not declared: %v", - prog, NULL, 0, NULL); - if (v->type == NULL) { - if (type && *ok != 0) { - v->type = type; - v->where_set = prog; - *ok = 2; - } - return type; + if (!v->var->type || v->var->type->check_args == NULL) { + type_err(c, "error: attempt to call a non-function.", + prog, NULL, 0, NULL); + return NULL; } - if (!type_compat(type, v->type, rules)) { - type_err(c, "error: expected %1%r but variable '%v' is %2", prog, - type, rules, v->type); - type_err(c, "info: this is where '%v' was set to %1", v->where_set, - v->type, rules, NULL); + *perr |= Eruntime; + v->var->type->check_args(c, perr_local, v->var->type, args); + if (v->var->type->function.inline_result) + *perr |= Emaycopy; + *perr |= Erval; + return v->var->type->function.return_type; + } + +###### interp binode cases + + case Funcall: { + struct var *v = cast(var, b->left); + struct type *t = v->var->type; + void *oldlocal = c->local; + int old_size = c->local_size; + void *local = calloc(1, t->function.local_size); + struct value *fbody = var_value(c, v->var); + struct binode *arg = cast(binode, b->right); + struct binode *param = t->function.params; + + while (param) { + struct var *pv = cast(var, param->left); + struct type *vtype = NULL; + struct value val = interp_exec(c, arg->left, &vtype); + struct value *lval; + c->local = local; c->local_size = t->function.local_size; + lval = var_value(c, pv->var); + c->local = oldlocal; c->local_size = old_size; + memcpy(lval, &val, vtype->size); + param = cast(binode, param->right); + arg = cast(binode, arg->right); } - if (!type) - return v->type; - return type; + c->local = local; c->local_size = t->function.local_size; + if (t->function.inline_result && dtype) { + _interp_exec(c, fbody->function, NULL, NULL); + memcpy(dest, local, dtype->size); + rvtype = ret.type = NULL; + } else + rv = interp_exec(c, fbody->function, &rvtype); + c->local = oldlocal; c->local_size = old_size; + free(local); + break; } -###### interp exec cases - case Xvar: - { - struct var *var = cast(var, e); - struct variable *v = var->var; +## Complex executables: statements and expressions - v = v->merged; - lrv = var_value(c, v); - rvtype = v->type; - break; - } +Now that we have types and values and variables and most of the basic +Terms which provide access to these, we can explore the more complex +code that combine all of these to get useful work done. Specifically +statements and expressions. -###### ast functions +Expressions are various combinations of Terms. We will use operator +precedence to ensure correct parsing. The simplest Expression is just a +Term - others will follow. - static void free_var(struct var *v) - { - free(v); - } +###### Grammar -###### free exec cases - case Xvar: free_var(cast(var, e)); break; + $*exec + Expression -> Term ${ $0 = $ Expression if Expression else Expression $$ifelse ${ { - struct binode *b1 = new(binode); - struct binode *b2 = new(binode); - b1->op = CondExpr; - b1->left = $<3; - b1->right = b2; - b2->op = CondExpr; - b2->left = $<1; - b2->right = $<5; - $0 = b1; - } }$ - ## expression grammar +###### expression grammar + + | Expression if Expression else Expression $$ifelse ${ { + struct binode *b1 = new(binode); + struct binode *b2 = new(binode); + b1->op = CondExpr; + b1->left = $<3; + b1->right = b2; + b2->op = CondExpr; + b2->left = $<1; + b2->right = $<5; + $0 = b1; + } }$ ###### print binode cases @@ -2891,9 +3891,9 @@ there. struct binode *b2 = cast(binode, b->right); struct type *t2; - propagate_types(b->left, c, ok, Tbool, 0); - t = propagate_types(b2->left, c, ok, type, Rnolabel); - t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel); + propagate_types(b->left, c, perr_local, Tbool, 0); + t = propagate_types(b2->left, c, perr, type, 0); + t2 = propagate_types(b2->right, c, perr, type ?: t, 0); return t ?: t2; } @@ -2903,119 +3903,49 @@ there. struct binode *b2 = cast(binode, b->right); left = interp_exec(c, b->left, <ype); if (left.bool) - rv = interp_exec(c, b2->left, &rvtype); // UNTESTED + rv = interp_exec(c, b2->left, &rvtype); else rv = interp_exec(c, b2->right, &rvtype); } break; -### Expression list - -We take a brief detour, now that we have expressions, to describe lists -of expressions. These will be needed for function parameters and -possibly other situations. They seem generic enough to introduce here -to be used elsewhere. - -And ExpressionList will use the `List` type of `binode`, building up at -the end. And place where they are used will probably call -`reorder_bilist()` to get a more normal first/next arrangement. - -###### declare terminals - $TERM , - -`List` execs have no implicit semantics, so they are never propagated or -interpreted. The can be printed as a comma separate list, which is how -they are parsed. Note they are also used for function formal parameter -lists. In that case a separate function is used to print them. - -###### print binode cases - case List: - while (b) { - printf(" "); - print_exec(b->left, -1, bracket); - if (b->right) - printf(","); - b = cast(binode, b->right); - } - break; - -###### propagate binode cases - case List: abort(); // NOTEST -###### interp binode cases - case List: abort(); // NOTEST - -###### Grammar - - $*binode - ExpressionList -> ExpressionList , Expression ${ - $0 = new(binode); - $0->op = List; - $0->left = $<1; - $0->right = $<3; - }$ - | Expression ${ - $0 = new(binode); - $0->op = List; - $0->left = NULL; - $0->right = $<1; - }$ - ### Expressions: Boolean The next class of expressions to use the `binode` will be Boolean -expressions. "`and then`" and "`or else`" are similar to `and` and `or` -have same corresponding precendence. The difference is that they don't +expressions. `and` and `or` are short-circuit operators that don't evaluate the second expression if not necessary. ###### Binode types And, - AndThen, Or, - OrElse, Not, -###### expr precedence +###### declare terminals $LEFT or $LEFT and $LEFT not ###### expression grammar - | Expression or Expression ${ { - struct binode *b = new(binode); - b->op = Or; - b->left = $<1; - b->right = $<3; - $0 = b; - } }$ - | Expression or else Expression ${ { - struct binode *b = new(binode); - b->op = OrElse; - b->left = $<1; - b->right = $<4; - $0 = b; - } }$ - - | Expression and Expression ${ { - struct binode *b = new(binode); - b->op = And; - b->left = $<1; - b->right = $<3; - $0 = b; - } }$ - | Expression and then Expression ${ { - struct binode *b = new(binode); - b->op = AndThen; - b->left = $<1; - b->right = $<4; - $0 = b; - } }$ - - | not Expression ${ { - struct binode *b = new(binode); - b->op = Not; - b->right = $<2; - $0 = b; - } }$ + | Expression or Expression ${ { + struct binode *b = new(binode); + b->op = Or; + b->left = $<1; + b->right = $<3; + $0 = b; + } }$ + | Expression and Expression ${ { + struct binode *b = new(binode); + b->op = And; + b->left = $<1; + b->right = $<3; + $0 = b; + } }$ + | not Expression ${ { + struct binode *b = new(binode); + b->op = Not; + b->right = $<2; + $0 = b; + } }$ ###### print binode cases case And: @@ -3025,13 +3955,6 @@ evaluate the second expression if not necessary. print_exec(b->right, -1, bracket); if (bracket) printf(")"); break; - case AndThen: - if (bracket) printf("("); - print_exec(b->left, -1, bracket); - printf(" and then "); - print_exec(b->right, -1, bracket); - if (bracket) printf(")"); - break; case Or: if (bracket) printf("("); print_exec(b->left, -1, bracket); @@ -3039,13 +3962,6 @@ evaluate the second expression if not necessary. print_exec(b->right, -1, bracket); if (bracket) printf(")"); break; - case OrElse: - if (bracket) printf("("); - print_exec(b->left, -1, bracket); - printf(" or else "); - print_exec(b->right, -1, bracket); - if (bracket) printf(")"); - break; case Not: if (bracket) printf("("); printf("not "); @@ -3055,35 +3971,24 @@ evaluate the second expression if not necessary. ###### propagate binode cases case And: - case AndThen: case Or: - case OrElse: case Not: /* both must be Tbool, result is Tbool */ - propagate_types(b->left, c, ok, Tbool, 0); - propagate_types(b->right, c, ok, Tbool, 0); + propagate_types(b->left, c, perr, Tbool, 0); + propagate_types(b->right, c, perr, Tbool, 0); if (type && type != Tbool) type_err(c, "error: %1 operation found where %2 expected", prog, Tbool, 0, type); + *perr |= Erval; return Tbool; ###### interp binode cases case And: - rv = interp_exec(c, b->left, &rvtype); - right = interp_exec(c, b->right, &rtype); - rv.bool = rv.bool && right.bool; - break; - case AndThen: rv = interp_exec(c, b->left, &rvtype); if (rv.bool) rv = interp_exec(c, b->right, NULL); break; case Or: - rv = interp_exec(c, b->left, &rvtype); - right = interp_exec(c, b->right, &rtype); - rv.bool = rv.bool || right.bool; - break; - case OrElse: rv = interp_exec(c, b->left, &rvtype); if (!rv.bool) rv = interp_exec(c, b->right, NULL); @@ -3122,7 +4027,7 @@ expression operator, and the `CMPop` non-terminal will match one of them. Eql, NEql, -###### expr precedence +###### declare terminals $LEFT < > <= >= == != CMPop ###### expression grammar @@ -3137,12 +4042,12 @@ expression operator, and the `CMPop` non-terminal will match one of them. ###### Grammar $eop - CMPop -> < ${ $0.op = Less; }$ - | > ${ $0.op = Gtr; }$ - | <= ${ $0.op = LessEq; }$ - | >= ${ $0.op = GtrEq; }$ - | == ${ $0.op = Eql; }$ - | != ${ $0.op = NEql; }$ + CMPop -> < ${ $0.op = Less; }$ + | > ${ $0.op = Gtr; }$ + | <= ${ $0.op = LessEq; }$ + | >= ${ $0.op = GtrEq; }$ + | == ${ $0.op = Eql; }$ + | != ${ $0.op = NEql; }$ ###### print binode cases @@ -3175,17 +4080,18 @@ expression operator, and the `CMPop` non-terminal will match one of them. case Eql: case NEql: /* Both must match but not be labels, result is Tbool */ - t = propagate_types(b->left, c, ok, NULL, Rnolabel); + t = propagate_types(b->left, c, perr, NULL, 0); if (t) - propagate_types(b->right, c, ok, t, 0); + propagate_types(b->right, c, perr, t, 0); else { - t = propagate_types(b->right, c, ok, NULL, Rnolabel); // UNTESTED - if (t) // UNTESTED - t = propagate_types(b->left, c, ok, t, 0); // UNTESTED + t = propagate_types(b->right, c, perr, NULL, 0); // NOTEST + if (t) // NOTEST + t = propagate_types(b->left, c, perr, t, 0); // NOTEST } if (!type_compat(type, Tbool, 0)) type_err(c, "error: Comparison returns %1 but %2 expected", prog, Tbool, rules, type); + *perr |= Erval; return Tbool; ###### interp binode cases @@ -3216,10 +4122,17 @@ expression operator, and the `CMPop` non-terminal will match one of them. ### Expressions: Arithmetic etc. The remaining expressions with the highest precedence are arithmetic, -string concatenation, and string conversion. String concatenation +string concatenation, string conversion, and testing. String concatenation (`++`) has the same precedence as multiplication and division, but lower than the uniary. +Testing comes in two forms. A single question mark (`?`) is a uniary +operator which converts come types into Boolean. The general meaning is +"is this a value value" and there will be more uses as the language +develops. A double questionmark (`??`) is a binary operator (Choose), +with same precedence as multiplication, which returns the LHS if it +tests successfully, else returns the RHS. + String conversion is a temporary feature until I get a better type system. `$` is a prefix operator which expects a string and returns a number. @@ -3229,68 +4142,72 @@ absolute value and negation). These have different operator names. We also have a 'Bracket' operator which records where parentheses were found. This makes it easy to reproduce these when printing. Possibly I -should only insert brackets were needed for precedence. +should only insert brackets were needed for precedence. Putting +parentheses around an expression converts it into a Term, ###### Binode types Plus, Minus, Times, Divide, Rem, - Concat, - Absolute, Negate, + Concat, Choose, + Absolute, Negate, Test, StringConv, Bracket, -###### expr precedence +###### declare terminals $LEFT + - Eop - $LEFT * / % ++ Top - $LEFT Uop $ + $LEFT * / % ++ ?? Top + $LEFT Uop $ ? $TERM ( ) ###### expression grammar - | Expression Eop Expression ${ { - struct binode *b = new(binode); - b->op = $2.op; - b->left = $<1; - b->right = $<3; - $0 = b; - } }$ + | Expression Eop Expression ${ { + struct binode *b = new(binode); + b->op = $2.op; + b->left = $<1; + b->right = $<3; + $0 = b; + } }$ - | Expression Top Expression ${ { - struct binode *b = new(binode); - b->op = $2.op; - b->left = $<1; - b->right = $<3; - $0 = b; - } }$ - - | ( Expression ) ${ { - struct binode *b = new_pos(binode, $1); - b->op = Bracket; - b->right = $<2; - $0 = b; - } }$ - | Uop Expression ${ { - struct binode *b = new(binode); - b->op = $1.op; - b->right = $<2; - $0 = b; - } }$ - | Value ${ $0 = $<1; }$ - | Variable ${ $0 = $<1; }$ + | Expression Top Expression ${ { + struct binode *b = new(binode); + b->op = $2.op; + b->left = $<1; + b->right = $<3; + $0 = b; + } }$ + + | Uop Expression ${ { + struct binode *b = new(binode); + b->op = $1.op; + b->right = $<2; + $0 = b; + } }$ + +###### term grammar + + | ( Expression ) ${ { + struct binode *b = new_pos(binode, $1); + b->op = Bracket; + b->right = $<2; + $0 = b; + } }$ ###### Grammar $eop - Eop -> + ${ $0.op = Plus; }$ - | - ${ $0.op = Minus; }$ + Eop -> + ${ $0.op = Plus; }$ + | - ${ $0.op = Minus; }$ - Uop -> + ${ $0.op = Absolute; }$ - | - ${ $0.op = Negate; }$ - | $ ${ $0.op = StringConv; }$ + Uop -> + ${ $0.op = Absolute; }$ + | - ${ $0.op = Negate; }$ + | $ ${ $0.op = StringConv; }$ + | ? ${ $0.op = Test; }$ - Top -> * ${ $0.op = Times; }$ - | / ${ $0.op = Divide; }$ - | % ${ $0.op = Rem; }$ - | ++ ${ $0.op = Concat; }$ + Top -> * ${ $0.op = Times; }$ + | / ${ $0.op = Divide; }$ + | % ${ $0.op = Rem; }$ + | ++ ${ $0.op = Concat; }$ + | ?? ${ $0.op = Choose; }$ ###### print binode cases case Plus: @@ -3299,6 +4216,7 @@ should only insert brackets were needed for precedence. case Divide: case Concat: case Rem: + case Choose: if (bracket) printf("("); print_exec(b->left, indent, bracket); switch(b->op) { @@ -3308,6 +4226,7 @@ should only insert brackets were needed for precedence. case Divide: fputs(" / ", stdout); break; case Rem: fputs(" % ", stdout); break; case Concat: fputs(" ++ ", stdout); break; + case Choose: fputs(" ?? ", stdout); break; default: abort(); // NOTEST } // NOTEST print_exec(b->right, indent, bracket); @@ -3316,20 +4235,23 @@ should only insert brackets were needed for precedence. case Absolute: case Negate: case StringConv: + case Test: if (bracket) printf("("); switch (b->op) { case Absolute: fputs("+", stdout); break; case Negate: fputs("-", stdout); break; case StringConv: fputs("$", stdout); break; + case Test: fputs("?", stdout); break; default: abort(); // NOTEST } // NOTEST print_exec(b->right, indent, bracket); if (bracket) printf(")"); break; case Bracket: - printf("("); + /* Avoid double brackets... */ + if (!bracket) printf("("); print_exec(b->right, indent, bracket); - printf(")"); + if (!bracket) printf(")"); break; ###### propagate binode cases @@ -3343,33 +4265,57 @@ should only insert brackets were needed for precedence. case Negate: /* as propagate_types ignores a NULL, * unary ops fit here too */ - propagate_types(b->left, c, ok, Tnum, 0); - propagate_types(b->right, c, ok, Tnum, 0); + propagate_types(b->left, c, perr, Tnum, 0); + propagate_types(b->right, c, perr, Tnum, 0); if (!type_compat(type, Tnum, 0)) type_err(c, "error: Arithmetic returns %1 but %2 expected", prog, Tnum, rules, type); + *perr |= Erval; return Tnum; case Concat: /* both must be Tstr, result is Tstr */ - propagate_types(b->left, c, ok, Tstr, 0); - propagate_types(b->right, c, ok, Tstr, 0); + propagate_types(b->left, c, perr, Tstr, 0); + propagate_types(b->right, c, perr, Tstr, 0); if (!type_compat(type, Tstr, 0)) type_err(c, "error: Concat returns %1 but %2 expected", prog, Tstr, rules, type); + *perr |= Erval; return Tstr; case StringConv: /* op must be string, result is number */ - propagate_types(b->left, c, ok, Tstr, 0); + propagate_types(b->left, c, perr, Tstr, 0); if (!type_compat(type, Tnum, 0)) - type_err(c, // UNTESTED + type_err(c, "error: Can only convert string to number, not %1", prog, type, 0, NULL); + *perr |= Erval; return Tnum; + case Test: + /* LHS must support ->test, result is Tbool */ + t = propagate_types(b->right, c, perr, NULL, 0); + if (!t || !t->test) + type_err(c, "error: '?' requires a testable value, not %1", + prog, t, 0, NULL); + *perr |= Erval; + return Tbool; + + case Choose: + /* LHS and RHS must match and are returned. Must support + * ->test + */ + t = propagate_types(b->left, c, perr, type, rules); + t = propagate_types(b->right, c, perr, t, rules); + if (t && t->test == NULL) + type_err(c, "error: \"??\" requires a testable value, not %1", + prog, t, 0, NULL); + *perr |= Erval; + return t; + case Bracket: - return propagate_types(b->right, c, ok, type, 0); + return propagate_types(b->right, c, perr, type, rules); ###### interp binode cases @@ -3424,143 +4370,53 @@ should only insert brackets were needed for precedence. right = interp_exec(c, b->right, &rtype); rvtype = Tstr; rv.str = text_join(left.str, right.str); - break; - case StringConv: - right = interp_exec(c, b->right, &rvtype); - rtype = Tstr; - rvtype = Tnum; - - struct text tx = right.str; - char tail[3]; - int neg = 0; - if (tx.txt[0] == '-') { - neg = 1; // UNTESTED - tx.txt++; // UNTESTED - tx.len--; // UNTESTED - } - if (number_parse(rv.num, tail, tx) == 0) - mpq_init(rv.num); // UNTESTED - else if (neg) - mpq_neg(rv.num, rv.num); // UNTESTED - if (tail[0]) - printf("Unsupported suffix: %.*s\n", tx.len, tx.txt); // UNTESTED - - break; - -###### value functions - - static struct text text_join(struct text a, struct text b) - { - struct text rv; - rv.len = a.len + b.len; - rv.txt = malloc(rv.len); - memcpy(rv.txt, a.txt, a.len); - memcpy(rv.txt+a.len, b.txt, b.len); - return rv; - } - -### Function calls - -A function call can appear either as an expression or as a statement. -As functions cannot yet return values, only the statement version will work. -We use a new 'Funcall' binode type to link the function with a list of -arguments, form with the 'List' nodes. - -###### Binode types - Funcall, - -###### expression grammar - | Variable ( ExpressionList ) ${ { - struct binode *b = new(binode); - b->op = Funcall; - b->left = $right = reorder_bilist($op = Funcall; - b->left = $right = NULL; - $0 = b; - } }$ - -###### SimpleStatement Grammar - - | Variable ( ExpressionList ) ${ { - struct binode *b = new(binode); - b->op = Funcall; - b->left = $right = reorder_bilist($left, -1, bracket); - printf("("); - for (b = cast(binode, b->right); b; b = cast(binode, b->right)) { - if (b->left) { - printf(" "); - print_exec(b->left, -1, bracket); - if (b->right) - printf(","); - } - } - printf(")"); - if (indent >= 0) - printf("\n"); - break; - -###### propagate binode cases - - case Funcall: { - /* Every arg must match formal parameter, and result - * is return type of function (currently Tnone). - */ - struct binode *args = cast(binode, b->right); - struct var *v = cast(var, b->left); - - if (!v->var->type || v->var->type->check_args == NULL) { - type_err(c, "error: attempt to call a non-function.", - prog, NULL, 0, NULL); - return NULL; - } - v->var->type->check_args(c, ok, v->var->type, args); - return Tnone; - } - -###### interp binode cases - - case Funcall: { - struct var *v = cast(var, b->left); - struct type *t = v->var->type; - void *oldlocal = c->local; - int old_size = c->local_size; - void *local = calloc(1, t->function.local_size); - struct value *fbody = var_value(c, v->var); - struct binode *arg = cast(binode, b->right); - struct binode *param = t->function.params; - - while (param) { - struct var *pv = cast(var, param->left); - struct type *vtype = NULL; - struct value val = interp_exec(c, arg->left, &vtype); - struct value *lval; - c->local = local; c->local_size = t->function.local_size; - lval = var_value(c, pv->var); - c->local = oldlocal; c->local_size = old_size; - memcpy(lval, &val, vtype->size); - param = cast(binode, param->right); - arg = cast(binode, arg->right); + break; + case StringConv: + right = interp_exec(c, b->right, &rvtype); + rtype = Tstr; + rvtype = Tnum; + + struct text tx = right.str; + char tail[3] = ""; + int neg = 0; + if (tx.txt[0] == '-') { + neg = 1; + tx.txt++; + tx.len--; } - c->local = local; c->local_size = t->function.local_size; - right = interp_exec(c, fbody->function, &rtype); - c->local = oldlocal; c->local_size = old_size; - free(local); + if (number_parse(rv.num, tail, tx) == 0) + mpq_init(rv.num); + else if (neg) + mpq_neg(rv.num, rv.num); + if (tail[0]) + printf("Unsupported suffix: %.*s\n", tx.len, tx.txt); + break; + case Test: + right = interp_exec(c, b->right, &rtype); + rvtype = Tbool; + rv.bool = !!rtype->test(rtype, &right); + break; + case Choose: + left = interp_exec(c, b->left, <ype); + if (ltype->test(ltype, &left)) { + rv = left; + rvtype = ltype; + ltype = NULL; + } else + rv = interp_exec(c, b->right, &rvtype); + break; + +###### value functions + + static struct text text_join(struct text a, struct text b) + { + struct text rv; + rv.len = a.len + b.len; + rv.txt = malloc(rv.len); + memcpy(rv.txt, a.txt, a.len); + memcpy(rv.txt+a.len, b.txt, b.len); + return rv; } ### Blocks, Statements, and Statement lists. @@ -3616,6 +4472,12 @@ which does nothing and is represented as a `NULL` pointer in a `Block` list. Other stand-alone statements will follow once the infrastructure is in-place. +As many statements will use binodes, we declare a binode pointer 'b' in +the common header for all reductions to use. + +###### Parser: reduce + struct binode *b; + ###### Binode types Block, @@ -3625,99 +4487,89 @@ is in-place. $*binode Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $ OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $ { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $ { IN OpenScope OptNL Statementlist OUT OptNL } ${ $0 = $ { IN OptNL Statementlist OUT OptNL } ${ $0 = $ ComplexStatements ${ $0 = reorder_bilist($ ComplexStatements ComplexStatement ${ - if ($2 == NULL) { - $0 = $<1; - } else { - $0 = new(binode); - $0->op = Block; - $0->left = $<1; - $0->right = $<2; - } - }$ - | ComplexStatement ${ - if ($1 == NULL) { - $0 = NULL; - } else { - $0 = new(binode); - $0->op = Block; - $0->left = NULL; - $0->right = $<1; - } - }$ - - $*exec - ComplexStatement -> SimpleStatements Newlines ${ - $0 = reorder_bilist($ SimpleStatements ; SimpleStatement ${ + if ($2 == NULL) { + $0 = $<1; // NOTEST - impossible + } else { $0 = new(binode); $0->op = Block; $0->left = $<1; - $0->right = $<3; - }$ - | SimpleStatement ${ + $0->right = $<2; + } + }$ + | ComplexStatement ${ + if ($1 == NULL) { + $0 = NULL; // NOTEST - impossible + } else { $0 = new(binode); $0->op = Block; $0->left = NULL; $0->right = $<1; - }$ + } + }$ + + $*exec + ComplexStatement -> SimpleStatements Newlines ${ + $0 = reorder_bilist($ SimpleStatements ; SimpleStatement ${ + $0 = new(binode); + $0->op = Block; + $0->left = $<1; + $0->right = $<3; + }$ + | SimpleStatement ${ + $0 = new(binode); + $0->op = Block; + $0->left = NULL; + $0->right = $<1; + }$ $TERM pass + $*exec SimpleStatement -> pass ${ $0 = NULL; }$ - | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$ - ## SimpleStatement Grammar + | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$ + ## SimpleStatement Grammar ###### print binode cases case Block: - if (indent < 0) { - // simple statement - if (b->left == NULL) // UNTESTED - printf("pass"); // UNTESTED - else - print_exec(b->left, indent, bracket); // UNTESTED - if (b->right) { // UNTESTED - printf("; "); // UNTESTED - print_exec(b->right, indent, bracket); // UNTESTED - } - } else { - // block, one per line - if (b->left == NULL) - do_indent(indent, "pass\n"); - else - print_exec(b->left, indent, bracket); - if (b->right) - print_exec(b->right, indent, bracket); - } + // block, one per line + if (b->left == NULL) + do_indent(indent, "pass\n"); + else + print_exec(b->left, indent, bracket); + if (b->right) + print_exec(b->right, indent, bracket); break; ###### propagate binode cases @@ -3733,14 +4585,21 @@ is in-place. struct binode *e; for (e = b; e; e = cast(binode, e->right)) { - t = propagate_types(e->left, c, ok, NULL, rules); - if ((rules & Rboolok) && t == Tbool) + *perr |= *perr_local; + *perr_local = 0; + t = propagate_types(e->left, c, perr_local, NULL, rules); + if ((rules & Rboolok) && (t == Tbool || t == Tnone)) + t = NULL; + if (t == Tnone && e->right) + /* Only the final statement *must* return a value + * when not Rboolok + */ t = NULL; - if (t && t != Tnone && t != Tbool) { + if (t) { if (!type) type = t; else if (t != type) - type_err(c, "error: expected %1%r, found %2", + type_err(c, "error: expected %1, found %2", e->left, type, rules, t); } } @@ -3771,39 +4630,44 @@ printed. ###### Binode types Print, -##### expr precedence +##### declare terminals $TERM print ###### SimpleStatement Grammar | print ExpressionList ${ - $0 = new(binode); - $0->op = Print; - $0->right = NULL; - $0->left = reorder_bilist($op = Print; + b->right = NULL; + b->left = reorder_bilist($op = Print; - $0->right = reorder_bilist($left = NULL; + $0 = b = new_pos(binode, $1); + b->op = Print; + b->right = reorder_bilist($left = NULL; } }$ | print ${ - $0 = new(binode); - $0->op = Print; - $0->left = NULL; - $0->right = NULL; + $0 = b = new_pos(binode, $1); + b->op = Print; + b->left = NULL; + b->right = NULL; }$ ###### print binode cases case Print: do_indent(indent, "print"); - if (b->right) { - print_exec(b->right, -1, bracket); + b2 = cast(binode, b->left ?: b->right); + while (b2) { + printf(" "); + print_exec(b2->left, -1, bracket); + if (b2->right) + printf(","); + b2 = cast(binode, b2->right); + } + if (b->right) printf(","); - } else - print_exec(b->left, -1, bracket); if (indent >= 0) printf("\n"); break; @@ -3817,7 +4681,7 @@ printed. else b = cast(binode, b->right); while (b) { - propagate_types(b->left, c, ok, NULL, Rnolabel); + propagate_types(b->left, c, perr_local, NULL, 0); b = cast(binode, b->right); } break; @@ -3831,7 +4695,7 @@ printed. b2 = cast(binode, b->right); for (; b2; b2 = cast(binode, b2->right)) { left = interp_exec(c, b2->left, <ype); - print_value(ltype, &left); + print_value(ltype, &left, stdout); free_value(ltype, &left); if (b2->right) putchar(' '); @@ -3848,9 +4712,9 @@ An assignment will assign a value to a variable, providing it hasn't been declared as a constant. The analysis phase ensures that the type will be correct so the interpreter just needs to perform the calculation. There is a form of assignment which declares a new -variable as well as assigning a value. If a name is assigned before -it is declared, and error will be raised as the name is created as -`Tlabel` and it is illegal to assign to such names. +variable as well as assigning a value. If a name is used before +it is declared, it is assumed to be a global constant which are allowed to +be declared at any time. ###### Binode types Assign, @@ -3860,39 +4724,40 @@ it is declared, and error will be raised as the name is created as $TERM = ###### SimpleStatement Grammar - | Variable = Expression ${ - $0 = new(binode); - $0->op = Assign; - $0->left = $<1; - $0->right = $<3; - }$ + | Term = Expression ${ + $0 = b= new(binode); + b->op = Assign; + b->left = $<1; + b->right = $<3; + }$ | VariableDecl = Expression ${ - $0 = new(binode); - $0->op = Declare; - $0->left = $<1; - $0->right =$<3; - }$ + $0 = b= new(binode); + b->op = Declare; + b->left = $<1; + b->right =$<3; + }$ | VariableDecl ${ - if ($1->var->where_set == NULL) { - type_err(c, - "Variable declared with no type or value: %v", - $1, NULL, 0, NULL); - } else { - $0 = new(binode); - $0->op = Declare; - $0->left = $<1; - $0->right = NULL; - } - }$ + if ($1->var->where_set == NULL) { + type_err(c, + "Variable declared with no type or value: %v", + $1, NULL, 0, NULL); + free_var($1); + } else { + $0 = b = new(binode); + b->op = Declare; + b->left = $<1; + b->right = NULL; + } + }$ ###### print binode cases case Assign: do_indent(indent, ""); - print_exec(b->left, indent, bracket); + print_exec(b->left, -1, bracket); printf(" = "); - print_exec(b->right, indent, bracket); + print_exec(b->right, -1, bracket); if (indent >= 0) printf("\n"); break; @@ -3901,23 +4766,23 @@ it is declared, and error will be raised as the name is created as { struct variable *v = cast(var, b->left)->var; do_indent(indent, ""); - print_exec(b->left, indent, bracket); + print_exec(b->left, -1, bracket); if (cast(var, b->left)->var->constant) { printf("::"); - if (v->where_decl == v->where_set) { + if (v->explicit_type) { type_print(v->type, stdout); printf(" "); } } else { printf(":"); - if (v->where_decl == v->where_set) { + if (v->explicit_type) { type_print(v->type, stdout); printf(" "); } } if (b->right) { printf("= "); - print_exec(b->right, indent, bracket); + print_exec(b->right, -1, bracket); } if (indent >= 0) printf("\n"); @@ -3928,29 +4793,53 @@ it is declared, and error will be raised as the name is created as case Assign: case Declare: - /* Both must match and not be labels, + /* Both must match, or left may be ref and right an lval * Type must support 'dup', * For Assign, left must not be constant. * result is Tnone */ - t = propagate_types(b->left, c, ok, NULL, - Rnolabel | (b->op == Assign ? Rnoconstant : 0)); + *perr &= ~(Erval | Econst); + t = propagate_types(b->left, c, perr, NULL, 0); if (!b->right) return Tnone; if (t) { - if (propagate_types(b->right, c, ok, t, 0) != t) - if (b->left->type == Xvar) - type_err(c, "info: variable '%v' was set as %1 here.", - cast(var, b->left)->var->where_set, t, rules, NULL); + struct type *t2 = propagate_types(b->right, c, perr_local, + t, Rrefok); + if (!t2 || t2 == t || (*perr_local & Efail)) + ; // No more effort needed + else if (t->free == reference_free && + t->reference.referent == t2 && + !(*perr_local & Erval)) + b->right = take_addr(b->right); + else if (t->free == reference_free && + t->reference.referent == t2 && + (*perr_local & Erval)) + type_err(c, "error: Cannot assign an rval to a reference.", + b, NULL, 0, NULL); } else { - t = propagate_types(b->right, c, ok, NULL, Rnolabel); + t = propagate_types(b->right, c, perr_local, NULL, 0); if (t) - propagate_types(b->left, c, ok, t, - (b->op == Assign ? Rnoconstant : 0)); + propagate_types(b->left, c, perr, t, 0); + } + if (*perr & Erval) + type_err(c, "error: cannot assign to an rval", b, + NULL, 0, NULL); + else if (b->op == Assign && (*perr & Econst)) { + type_err(c, "error: Cannot assign to a constant: %v", + b->left, NULL, 0, NULL); + if (b->left->type == Xvar) { + struct var *var = cast(var, b->left); + struct variable *v = var->var; + type_err(c, "info: name was defined as a constant here", + v->where_decl, NULL, 0, NULL); + } } - if (t && t->dup == NULL) + if (t && t->dup == NULL && !(*perr_local & Emaycopy)) type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL); + if (b->left->type == Xvar && (*perr_local & Efail)) + type_err(c, "info: variable '%v' was set as %1 here.", + cast(var, b->left)->var->where_set, t, rules, NULL); return Tnone; break; @@ -3959,12 +4848,9 @@ it is declared, and error will be raised as the name is created as case Assign: lleft = linterp_exec(c, b->left, <ype); - right = interp_exec(c, b->right, &rtype); - if (lleft) { - free_value(ltype, lleft); - dup_value(ltype, &right, lleft); - ltype = NULL; - } + if (lleft) + dinterp_exec(c, b->right, lleft, ltype, 1); + ltype = Tnone; break; case Declare: @@ -3975,45 +4861,31 @@ it is declared, and error will be raised as the name is created as val = var_value(c, v); if (v->type->prepare_type) v->type->prepare_type(c, v->type, 0); - if (b->right) { - right = interp_exec(c, b->right, &rtype); - memcpy(val, &right, rtype->size); - rtype = Tnone; - } else { + if (!b->right) val_init(v->type, val); - } + else + dinterp_exec(c, b->right, val, v->type, 0); break; } ### The `use` statement -The `use` statement is the last "simple" statement. It is needed when -the condition in a conditional statement is a block. `use` works much -like `return` in C, but only completes the `condition`, not the whole -function. +The `use` statement is the last "simple" statement. It is needed when a +statement block can return a value. This includes the body of a +function which has a return type, and the "condition" code blocks in +`if`, `while`, and `switch` statements. ###### Binode types Use, -###### expr precedence - $TERM use +###### declare terminals + $TERM use ###### SimpleStatement Grammar | use Expression ${ - $0 = new_pos(binode, $1); - $0->op = Use; - $0->right = $<2; - if ($0->right->type == Xvar) { - struct var *v = cast(var, $0->right); - if (v->var->type == Tnone) { - /* Convert this to a label */ - struct value *val; - - v->var->type = Tlabel; - val = global_alloc(c, Tlabel, v->var, NULL); - val->label = val; - } - } + $0 = b = new_pos(binode, $1); + b->op = Use; + b->right = $<2; }$ ###### print binode cases @@ -4029,7 +4901,7 @@ function. case Use: /* result matches value */ - return propagate_types(b->right, c, ok, type, 0); + return propagate_types(b->right, c, perr, type, 0); ###### interp binode cases @@ -4158,7 +5030,7 @@ casepart` to track a list of case parts. ###### ComplexStatement Grammar | CondStatement ${ $0 = $<1; }$ -###### expr precedence +###### declare terminals $TERM for then while do $TERM else $TERM switch case @@ -4175,136 +5047,136 @@ casepart` to track a list of case parts. // ForPart, SwitchPart, and IfPart open scopes, o we have to close // them. WhilePart opens and closes its own scope. CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${ - $0 = $forpart = $thenpart = $looppart = $forpart = $looppart = $looppart = $condpart = $next = $0->casepart; - $0->casepart = $condpart = $next = $0->casepart; - $0->casepart = $condpart = $IP.condpart; $IP.condpart = NULL; - $0->thenpart = $IP.thenpart; $IP.thenpart = NULL; - // This is where we close an "if" statement - var_block_close(c, CloseSequential, $0); - }$ + $0 = $forpart = $thenpart = $looppart = $forpart = $looppart = $looppart = $condpart = $next = $0->casepart; + $0->casepart = $condpart = $next = $0->casepart; + $0->casepart = $condpart = $IP.condpart; $IP.condpart = NULL; + $0->thenpart = $IP.thenpart; $IP.thenpart = NULL; + // This is where we close an "if" statement + var_block_close(c, CloseSequential, $0); + }$ CondSuffix -> IfSuffix ${ - $0 = $<1; - }$ - | Newlines CasePart CondSuffix ${ - $0 = $next = $0->casepart; - $0->casepart = $next = $0->casepart; - $0->casepart = $next = $0->casepart; + $0->casepart = $next = $0->casepart; + $0->casepart = $ Newlines ${ $0 = new(cond_statement); }$ - | Newlines ElsePart ${ $0 = $ else OpenBlock Newlines ${ - $0 = new(cond_statement); - $0->elsepart = $elsepart); - }$ - | else OpenScope CondStatement ${ - $0 = new(cond_statement); - $0->elsepart = $elsepart); - }$ + $0 = new(cond_statement); + $0->elsepart = $elsepart); + }$ + | else OpenScope CondStatement ${ + $0 = new(cond_statement); + $0->elsepart = $elsepart); + }$ $*casepart CasePart -> case Expression OpenScope ColonBlock ${ - $0 = calloc(1,sizeof(struct casepart)); - $0->value = $action = $action); - }$ + $0 = calloc(1,sizeof(struct casepart)); + $0->value = $action = $action); + }$ $*exec // These scopes are closed in CondStatement ForPart -> for OpenBlock ${ - $0 = $ then OpenBlock ${ - $0 = $ while UseBlock OptNL do OpenBlock ${ - $0 = new(binode); - $0->op = Loop; - $0->left = $right = $right); - var_block_close(c, CloseSequential, $0); - }$ - | while OpenScope Expression OpenScope ColonBlock ${ - $0 = new(binode); - $0->op = Loop; - $0->left = $right = $right); - var_block_close(c, CloseSequential, $0); - }$ + $0 = new(binode); + $0->op = Loop; + $0->left = $right = $right); + var_block_close(c, CloseSequential, $0); + }$ + | while OpenScope Expression OpenScope ColonBlock ${ + $0 = new(binode); + $0->op = Loop; + $0->left = $right = $right); + var_block_close(c, CloseSequential, $0); + }$ $cond_statement IfPart -> if UseBlock OptNL then OpenBlock ${ - $0.condpart = $ switch OpenScope Expression ${ - $0 = $right, c, ok, Tnone, 0); - if (!type_compat(Tnone, t, 0)) - *ok = 0; // UNTESTED - return propagate_types(b->left, c, ok, type, rules); + propagate_types(b->right, c, perr_local, Tnone, 0); + return propagate_types(b->left, c, perr, type, rules); ###### propagate exec cases case Xcond_statement: @@ -4437,51 +5307,47 @@ casepart` to track a list of case parts. struct cond_statement *cs = cast(cond_statement, prog); struct casepart *cp; - t = propagate_types(cs->forpart, c, ok, Tnone, 0); - if (!type_compat(Tnone, t, 0)) - *ok = 0; // UNTESTED + t = propagate_types(cs->forpart, c, perr, Tnone, 0); if (cs->looppart) { - t = propagate_types(cs->thenpart, c, ok, Tnone, 0); - if (!type_compat(Tnone, t, 0)) - *ok = 0; // UNTESTED + t = propagate_types(cs->thenpart, c, perr, Tnone, 0); } if (cs->casepart == NULL) { - propagate_types(cs->condpart, c, ok, Tbool, 0); - propagate_types(cs->looppart, c, ok, Tbool, 0); + propagate_types(cs->condpart, c, perr, Tbool, 0); + propagate_types(cs->looppart, c, perr, Tbool, 0); } else { /* Condpart must match case values, with bool permitted */ t = NULL; for (cp = cs->casepart; cp && !t; cp = cp->next) - t = propagate_types(cp->value, c, ok, NULL, 0); + t = propagate_types(cp->value, c, perr, NULL, 0); if (!t && cs->condpart) - t = propagate_types(cs->condpart, c, ok, NULL, Rboolok); // UNTESTED + t = propagate_types(cs->condpart, c, perr, NULL, Rboolok); // NOTEST if (!t && cs->looppart) - t = propagate_types(cs->looppart, c, ok, NULL, Rboolok); // UNTESTED + t = propagate_types(cs->looppart, c, perr, NULL, Rboolok); // NOTEST // Now we have a type (I hope) push it down if (t) { for (cp = cs->casepart; cp; cp = cp->next) - propagate_types(cp->value, c, ok, t, 0); - propagate_types(cs->condpart, c, ok, t, Rboolok); - propagate_types(cs->looppart, c, ok, t, Rboolok); + propagate_types(cp->value, c, perr, t, 0); + propagate_types(cs->condpart, c, perr, t, Rboolok); + propagate_types(cs->looppart, c, perr, t, Rboolok); } } // (if)then, else, and case parts must return expected type. if (!cs->looppart && !type) - type = propagate_types(cs->thenpart, c, ok, NULL, rules); + type = propagate_types(cs->thenpart, c, perr, NULL, rules); if (!type) - type = propagate_types(cs->elsepart, c, ok, NULL, rules); + type = propagate_types(cs->elsepart, c, perr, NULL, rules); for (cp = cs->casepart; cp && !type; - cp = cp->next) // UNTESTED - type = propagate_types(cp->action, c, ok, NULL, rules); // UNTESTED + cp = cp->next) // NOTEST + type = propagate_types(cp->action, c, perr, NULL, rules); // NOTEST if (type) { if (!cs->looppart) - propagate_types(cs->thenpart, c, ok, type, rules); - propagate_types(cs->elsepart, c, ok, type, rules); + propagate_types(cs->thenpart, c, perr, type, rules); + propagate_types(cs->elsepart, c, perr, type, rules); for (cp = cs->casepart; cp ; cp = cp->next) - propagate_types(cp->action, c, ok, type, rules); + propagate_types(cp->action, c, perr, type, rules); return type; } else return NULL; @@ -4493,7 +5359,7 @@ casepart` to track a list of case parts. rv = interp_exec(c, b->left, &rvtype); if (rvtype == Tnone || (rvtype == Tbool && rv.bool != 0)) - // cnd is Tnone or Tbool, doesn't need to be freed + // rvtype is Tnone or Tbool, doesn't need to be freed interp_exec(c, b->right, NULL); break; @@ -4564,20 +5430,21 @@ various declarations in the parse context. ## declare terminals OptNL -> - | OptNL NEWLINE + | OptNL NEWLINE + Newlines -> NEWLINE - | Newlines NEWLINE + | Newlines NEWLINE DeclarationList -> Declaration - | DeclarationList Declaration + | DeclarationList Declaration Declaration -> ERROR Newlines ${ - tok_err(c, // UNTESTED - "error: unhandled parse error", &$1); - }$ - | DeclareConstant - | DeclareFunction - | DeclareStruct + tok_err(c, // NOTEST + "error: unhandled parse error", &$1); + }$ + | DeclareConstant + | DeclareFunction + | DeclareStruct ## top level grammar @@ -4585,13 +5452,15 @@ various declarations in the parse context. ### The `const` section -As well as being defined in with the code that uses them, constants -can be declared at the top level. These have full-file scope, so they -are always `InScope`. The value of a top level constant can be given -as an expression, and this is evaluated immediately rather than in the -later interpretation stage. Once we add functions to the language, we -will need rules concern which, if any, can be used to define a top -level constant. +As well as being defined in with the code that uses them, constants can +be declared at the top level. These have full-file scope, so they are +always `InScope`, even before(!) they have been declared. The value of +a top level constant can be given as an expression, and this is +evaluated after parsing and before execution. + +A function call can be used to evaluate a constant, but it will not have +access to any program state, once such statement becomes meaningful. +e.g. arguments and filesystem will not be visible. Constants are defined in a section that starts with the reserved word `const` and then has a block with a list of assignment statements. @@ -4600,140 +5469,261 @@ make it clear that they are constants. Type can also be given: if not, the type will be determined during analysis, as with other constants. -As the types constants are inserted at the head of a list, printing -them in the same order that they were read is not straight forward. -We take a quadratic approach here and count the number of constants -(variables of depth 0), then count down from there, each time -searching through for the Nth constant for decreasing N. +###### parse context + struct binode *constlist; ###### top level grammar $TERM const DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines - | const { SimpleConstList } Newlines - | const IN OptNL ConstList OUT Newlines - | const SimpleConstList Newlines + | const { SimpleConstList } Newlines + | const IN OptNL ConstList OUT Newlines + | const SimpleConstList Newlines ConstList -> ConstList SimpleConstLine - | SimpleConstLine + | SimpleConstLine + SimpleConstList -> SimpleConstList ; Const - | Const - | SimpleConstList ; + | Const + | SimpleConstList ; + SimpleConstLine -> SimpleConstList Newlines - | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$ + | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$ $*type CType -> Type ${ $0 = $<1; }$ - | ${ $0 = NULL; }$ + | ${ $0 = NULL; }$ + $void Const -> IDENTIFIER :: CType = Expression ${ { - int ok; struct variable *v; + struct binode *bl, *bv; + struct var *var = new_pos(var, $ID); - v = var_decl(c, $1.txt); + v = var_decl(c, $ID.txt); if (v) { - struct var *var = new_pos(var, $1); v->where_decl = var; v->where_set = var; - var->var = v; + v->type = $constant = 1; + v->global = 1; } else { v = var_ref(c, $1.txt); - tok_err(c, "error: name already declared", &$1); - type_err(c, "info: this is where '%v' was first declared", - v->where_decl, NULL, 0, NULL); - } - do { - ok = 1; - propagate_types($5, c, &ok, $3, 0); - } while (ok == 2); - if (!ok) - c->parse_error = 1; - else if (v) { - struct value res = interp_exec(c, $5, &v->type); - global_alloc(c, v->type, v, &res); + if (v->type == Tnone) { + v->where_decl = var; + v->where_set = var; + v->type = $constant = 1; + v->global = 1; + } else { + tok_err(c, "error: name already declared", &$1); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); + } } + var->var = v; + + bv = new(binode); + bv->op = Declare; + bv->left = var; + bv->right= $op = List; + bl->left = c->constlist; + bl->right = bv; + c->constlist = bl; } }$ -###### print const decls +###### core functions + static void resolve_consts(struct parse_context *c) { - struct variable *v; - int target = -1; - - while (target != 0) { - int i = 0; - for (v = context.in_scope; v; v=v->in_scope) - if (v->depth == 0 && v->constant) { - i += 1; - if (i == target) - break; + struct binode *b; + int retry = 1; + enum { none, some, cannot } progress = none; + + c->constlist = reorder_bilist(c->constlist); + while (retry) { + retry = 0; + for (b = cast(binode, c->constlist); b; + b = cast(binode, b->right)) { + enum prop_err perr; + struct binode *vb = cast(binode, b->left); + struct var *v = cast(var, vb->left); + if (v->var->frame_pos >= 0) + continue; + do { + perr = 0; + propagate_types(vb->right, c, &perr, + v->var->type, 0); + } while (perr & Eretry); + if (perr & Efail) + c->parse_error += 1; + else if (!(perr & Eruntime)) { + progress = some; + struct value res = interp_exec( + c, vb->right, &v->var->type); + global_alloc(c, v->var->type, v->var, &res); + } else { + if (progress == cannot) + type_err(c, "error: const %v cannot be resolved.", + v, NULL, 0, NULL); + else + retry = 1; } - - if (target == -1) { - if (i) - printf("const\n"); - target = i; - } else { - struct value *val = var_value(&context, v); - printf(" %.*s :: ", v->name->name.len, v->name->name.txt); - type_print(v->type, stdout); - printf(" = "); - if (v->type == Tstr) - printf("\""); - print_value(v->type, val); - if (v->type == Tstr) - printf("\""); - printf("\n"); - target -= 1; } + switch (progress) { + case cannot: + retry = 0; break; + case none: + progress = cannot; break; + case some: + progress = none; break; + } + } + } + +###### print const decls + { + struct binode *b; + int first = 1; + + for (b = cast(binode, context.constlist); b; + b = cast(binode, b->right)) { + struct binode *vb = cast(binode, b->left); + struct var *vr = cast(var, vb->left); + struct variable *v = vr->var; + + if (first) + printf("const\n"); + first = 0; + + printf(" %.*s :: ", v->name->name.len, v->name->name.txt); + type_print(v->type, stdout); + printf(" = "); + print_exec(vb->right, -1, 0); + printf("\n"); } } +###### free const decls + free_binode(context.constlist); + ### Function declarations The code in an Ocean program is all stored in function declarations. One of the functions must be named `main` and it must accept an array of strings as a parameter - the command line arguments. - -As this is the top level, several things are handled a bit -differently. -The function is not interpreted by `interp_exec` as that isn't -passed the argument list which the program requires. Similarly type -analysis is a bit more interesting at this level. +As this is the top level, several things are handled a bit differently. +The function is not interpreted by `interp_exec` as that isn't passed +the argument list which the program requires. Similarly type analysis +is a bit more interesting at this level. ###### ast functions + static struct type *handle_results(struct parse_context *c, + struct binode *results) + { + /* Create a 'struct' type from the results list, which + * is a list for 'struct var' + */ + struct type *t = add_anon_type(c, &structure_prototype, + "function result"); + int cnt = 0; + struct binode *b; + + for (b = results; b; b = cast(binode, b->right)) + cnt += 1; + t->structure.nfields = cnt; + t->structure.fields = calloc(cnt, sizeof(struct field)); + cnt = 0; + for (b = results; b; b = cast(binode, b->right)) { + struct var *v = cast(var, b->left); + struct field *f = &t->structure.fields[cnt++]; + int a = v->var->type->align; + f->name = v->var->name->name; + f->type = v->var->type; + f->init = NULL; + f->offset = t->size; + v->var->frame_pos = f->offset; + t->size += ((f->type->size - 1) | (a-1)) + 1; + if (a > t->align) + t->align = a; + variable_unlink_exec(v->var); + } + free_binode(results); + return t; + } + static struct variable *declare_function(struct parse_context *c, struct variable *name, struct binode *args, + struct type *ret, + struct binode *results, struct exec *code) { - struct text funcname = {" func", 5}; if (name) { struct value fn = {.function = code}; - name->type = add_type(c, funcname, &function_prototype); - name->type->function.params = reorder_bilist(args); - global_alloc(c, name->type, name, &fn); - var_block_close(c, CloseSequential, code); - } else - var_block_close(c, CloseSequential, NULL); + struct type *t; + var_block_close(c, CloseFunction, code); + t = add_anon_type(c, &function_prototype, + "func %.*s", name->name->name.len, + name->name->name.txt); + name->type = t; + t->function.params = reorder_bilist(args); + if (!ret) { + ret = handle_results(c, reorder_bilist(results)); + t->function.inline_result = 1; + t->function.local_size = ret->size; + } + t->function.return_type = ret; + global_alloc(c, t, name, &fn); + name->type->function.scope = c->out_scope; + } else { + free_binode(args); + free_type(ret); + free_exec(code); + var_block_close(c, CloseFunction, NULL); + } + c->out_scope = NULL; return name; } +###### declare terminals + $TERM return + ###### top level grammar $*variable - DeclareFunction -> func FuncName ( OpenScope Args ) Block Newlines ${ - $0 = declare_function(c, $ func FuncName ( OpenScope ArgsLine ) Block Newlines ${ + $0 = declare_function(c, $name->name.len, v->name->name.txt); v->type->print_type_decl(v->type, stdout); - if (brackets) - print_exec(val->function, 0, brackets); - else - print_value(v->type, val); + if (brackets) { + printf(" {\n"); + print_exec(val->function, 1, brackets); + printf("}\n"); + } else { + print_value(v->type, val, stdout); + } printf("/* frame size %d */\n", v->type->function.local_size); target -= 1; } @@ -4770,48 +5763,59 @@ analysis is a bit more interesting at this level. static int analyse_funcs(struct parse_context *c) { struct variable *v; - int ok = 1; - for (v = c->in_scope; ok && v; v = v->in_scope) { + int all_ok = 1; + for (v = c->in_scope; v; v = v->in_scope) { struct value *val; + struct type *ret; + enum prop_err perr; if (v->depth != 0 || !v->type || !v->type->check_args) continue; + ret = v->type->function.inline_result ? + Tnone : v->type->function.return_type; val = var_value(c, v); do { - ok = 1; - propagate_types(val->function, c, &ok, Tnone, 0); - } while (ok == 2); - if (ok) + perr = 0; + propagate_types(val->function, c, &perr, ret, 0); + } while (!(perr & Efail) && (perr & Eretry)); + if (!(perr & Efail)) /* Make sure everything is still consistent */ - propagate_types(val->function, c, &ok, Tnone, 0); - v->type->function.local_size = scope_finalize(c); + propagate_types(val->function, c, &perr, ret, 0); + if (perr & Efail) + all_ok = 0; + if (!v->type->function.inline_result && + !v->type->function.return_type->dup) { + type_err(c, "error: function cannot return value of type %1", + v->where_decl, v->type->function.return_type, 0, NULL); + } + + scope_finalize(c, v->type); } - return ok; + return all_ok; } static int analyse_main(struct type *type, struct parse_context *c) { struct binode *bp = type->function.params; struct binode *b; - int ok = 1; + enum prop_err perr; int arg = 0; struct type *argv_type; - struct text argv_type_name = { " argv", 5 }; - argv_type = add_type(c, argv_type_name, &array_prototype); + argv_type = add_anon_type(c, &array_prototype, "argv"); argv_type->array.member = Tstr; argv_type->array.unspec = 1; for (b = bp; b; b = cast(binode, b->right)) { - ok = 1; + perr = 0; switch (arg++) { case 0: /* argv */ - propagate_types(b->left, c, &ok, argv_type, 0); + propagate_types(b->left, c, &perr, argv_type, 0); break; default: /* invalid */ // NOTEST - propagate_types(b->left, c, &ok, Tnone, 0); // NOTEST + propagate_types(b->left, c, &perr, Tnone, 0); // NOTEST } - if (!ok) - c->parse_error = 1; + if (perr & Efail) + c->parse_error += 1; } return !c->parse_error; @@ -4832,12 +5836,12 @@ analysis is a bit more interesting at this level. progp = var_value(c, mainv); if (!progp || !progp->function) { fprintf(stderr, "oceani: no main function found.\n"); - c->parse_error = 1; + c->parse_error += 1; return; } if (!analyse_main(mainv->type, c)) { fprintf(stderr, "oceani: main has wrong type.\n"); - c->parse_error = 1; + c->parse_error += 1; return; } al = mainv->type->function.params; @@ -4849,20 +5853,16 @@ analysis is a bit more interesting at this level. struct value *vl = var_value(c, v->var); struct value arg; struct type *t; - mpq_t argcq; int i; switch (anum++) { case 0: /* argv */ t = v->var->type; - mpq_init(argcq); - mpq_set_ui(argcq, argc, 1); - memcpy(var_value(c, t->array.vsize), &argcq, sizeof(argcq)); + t->array.size = argc; t->prepare_type(c, t, 0); array_init(v->var->type, vl); for (i = 0; i < argc; i++) { struct value *vl2 = vl->array + i * v->var->type->array.member->size; - arg.str.txt = argv[i]; arg.str.len = strlen(argv[i]); @@ -4912,7 +5912,7 @@ things which will likely grow as the languages grows. name:string alive:Boolean - func main(argv:[argc::]string) + func main(argv:[]string) print "Hello World, what lovely oceans you have!" print "Are there", five, "?" print pi, pie, "but", cake @@ -4940,7 +5940,7 @@ things which will likely grow as the languages grows. a : number a = A; b:number = B - if a > 0 and then b > 0: + if a > 0 and b > 0: while a != b: if a < b: b = b - a @@ -4971,19 +5971,19 @@ things which will likely grow as the languages grows. while mid := (lo + hi) / 2 if mid == target: - use Found + use .Found if mid < target: lo = mid else hi = mid if hi - lo < 1: lo = mid - use GiveUp + use .GiveUp use True do pass - case Found: + case .Found: print "Yay, I found", target - case GiveUp: + case .GiveUp: print "Closest I found was", lo size::= 10