X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Foceani.mdc;h=c2a19ce0c7744190c2fedba910f7205c851e83db;hp=beb4fc9830f41e25f4fbe65e54bb4195a3abde18;hb=234ae7c044ea183c719799a5369b2cfcbc69fd27;hpb=bc126aadf03e4e64710f2ad7d76d1247090219d5 diff --git a/csrc/oceani.mdc b/csrc/oceani.mdc index beb4fc9..c2a19ce 100644 --- a/csrc/oceani.mdc +++ b/csrc/oceani.mdc @@ -176,7 +176,7 @@ structures can be used. int fd; int len; char *file; - struct section *s, *ss; + struct section *s = NULL, *ss; char *section = NULL; struct parse_context context = { .config = { @@ -231,13 +231,13 @@ structures can be used. if (!ss) { fprintf(stderr, "oceani: cannot find section %s\n", section); - exit(1); + goto cleanup; } } else ss = s; // NOTEST if (!ss->code) { fprintf(stderr, "oceani: no code found in requested section\n"); // NOTEST - exit(1); // NOTEST + goto cleanup; // NOTEST } parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL); @@ -254,16 +254,17 @@ structures can be used. } if (doexec && !context.parse_error) interp_main(&context, argc - optind, argv + optind); - + cleanup: while (s) { struct section *t = s->next; code_free(s->code); free(s); s = t; } - if (!context.parse_error) { - ## free global vars - } + // FIXME parser should pop scope even on error + while (context.scope_depth > 0) + scope_pop(&context); + ## free global vars ## free context types ## free context storage exit(context.parse_error ? 1 : 0); @@ -934,6 +935,12 @@ is used to distinguish between the first of a set of parallel scopes, in which declared variables must not be in scope, and subsequent branches, whether they may already be conditionally scoped. +We need a total ordering of scopes so we can easily compare to variables +to see if they are concurrently in scope. To achieve this we record a +`scope_count` which is actually a count of both beginnings and endings +of scopes. Then each variable has a record of the scope count where it +enters scope, and where it leaves. + To push a new frame *before* any code in the frame is parsed, we need a grammar reduction. This is most easily achieved with a grammar element which derives the empty string, and creates the new scope when @@ -948,8 +955,12 @@ like "if" and the code following it. ###### parse context int scope_depth; + int scope_count; struct scope *scope_stack; +###### variable fields + int scope_start, scope_end; + ###### ast functions static void scope_pop(struct parse_context *c) { @@ -958,6 +969,7 @@ like "if" and the code following it. c->scope_stack = s->parent; free(s); c->scope_depth -= 1; + c->scope_count += 1; } static void scope_push(struct parse_context *c) @@ -968,6 +980,7 @@ like "if" and the code following it. s->parent = c->scope_stack; c->scope_stack = s; c->scope_depth += 1; + c->scope_count += 1; } ###### Grammar @@ -1004,7 +1017,10 @@ Each variable records a scope depth and is in one of four states: - "out of scope". The variable is neither in scope nor conditionally in scope. It is permanently out of scope now and can be removed from - the "in scope" stack. + the "in scope" stack. When a variable becomes out-of-scope it is + moved to a separate list (`out_scope`) of variables which have fully + known scope. This will be used at the end of each function to assign + each variable a place in the stack frame. ###### variable fields int depth, min_depth; @@ -1014,6 +1030,7 @@ Each variable records a scope depth and is in one of four states: ###### parse context struct variable *in_scope; + struct variable *out_scope; All variables with the same name are linked together using the 'previous' link. Those variable that have been affirmatively merged all @@ -1051,6 +1068,10 @@ need to be freed. For this we need to be able to find it, so assume that v->merged == secondary->merged) { v->scope = OutScope; v->merged = primary; + if (v->scope_start < primary->scope_start) + primary->scope_start = v->scope_start; + if (v->scope_end > primary->scope_end) + primary->scope_end = v->scope_end; // NOTEST variable_unlink_exec(v); } } @@ -1081,13 +1102,15 @@ need to be freed. For this we need to be able to find it, so assume that #### Manipulating Bindings -When a name is conditionally visible, a new declaration discards the -old binding - the condition lapses. Conversely a usage of the name -affirms the visibility and extends it to the end of the containing -block - i.e. the block that contains both the original declaration and -the latest usage. This is determined from `min_depth`. When a -conditionally visible variable gets affirmed like this, it is also -merged with other conditionally visible variables with the same name. +When a name is conditionally visible, a new declaration discards the old +binding - the condition lapses. Similarly when we reach the end of a +function (outermost non-global scope) any conditional scope must lapse. +Conversely a usage of the name affirms the visibility and extends it to +the end of the containing block - i.e. the block that contains both the +original declaration and the latest usage. This is determined from +`min_depth`. When a conditionally visible variable gets affirmed like +this, it is also merged with other conditionally visible variables with +the same name. When we parse a variable declaration we either report an error if the name is currently bound, or create a new variable at the current nest @@ -1122,7 +1145,7 @@ we need to mark all pending-scope variable as out-of-scope. Otherwise all pending-scope variables become conditionally scoped. ###### ast - enum closetype { CloseSequential, CloseParallel, CloseElse }; + enum closetype { CloseSequential, CloseFunction, CloseParallel, CloseElse }; ###### ast functions @@ -1151,6 +1174,7 @@ all pending-scope variables become conditionally scoped. v->min_depth = v->depth = c->scope_depth; v->scope = InScope; v->in_scope = c->in_scope; + v->scope_start = c->scope_count; c->in_scope = v; ## variable init return v; @@ -1184,6 +1208,19 @@ all pending-scope variables become conditionally scoped. return v; } + static int var_refile(struct parse_context *c, struct variable *v) + { + /* Variable just went out of scope. Add it to the out_scope + * list, sorted by ->scope_start + */ + struct variable **vp = &c->out_scope; + while ((*vp) && (*vp)->scope_start < v->scope_start) + vp = &(*vp)->in_scope; + v->in_scope = *vp; + *vp = v; + return 0; + } + static void var_block_close(struct parse_context *c, enum closetype ct, struct exec *e) { @@ -1201,7 +1238,7 @@ all pending-scope variables become conditionally scoped. for (vp = &c->in_scope; (v = *vp) && v->min_depth > c->scope_depth; (v->scope == OutScope || v->name->var != v) - ? (*vp = v->in_scope, 0) + ? (*vp = v->in_scope, var_refile(c, v)) : ( vp = &v->in_scope, 0)) { v->min_depth = c->scope_depth; if (v->name->var != v) @@ -1210,7 +1247,9 @@ all pending-scope variables become conditionally scoped. */ continue; v->min_depth = c->scope_depth; - if (v->scope == InScope && e) { + if (v->scope == InScope) + v->scope_end = c->scope_count; + if (v->scope == InScope && e && !v->global) { /* This variable gets cleaned up when 'e' finishes */ variable_unlink_exec(v); v->cleanup_exec = e; @@ -1257,6 +1296,11 @@ all pending-scope variables become conditionally scoped. abort(); // NOTEST } break; + case CloseFunction: + if (v->scope == CondScope) + /* Condition cannot continue past end of function */ + v->scope = InScope; + /* fallthrough */ case CloseSequential: if (v->type == Tlabel) v->scope = PendingScope; @@ -1366,35 +1410,50 @@ tell if it was set or not later. As global values are found -- struct field initializers, labels etc -- `global_alloc()` is called to record the value in the global frame. -When the program is fully parsed, we need to walk the list of variables -to find any that weren't merged away and that aren't global, and to -calculate the frame size and assign a frame position for each -variable. For this we have `scope_finalize()`. +When the program is fully parsed, each function is analysed, we need to +walk the list of variables local to that function and assign them an +offset in the stack frame. For this we have `scope_finalize()`. + +We keep the stack from dense by re-using space for between variables +that are not in scope at the same time. The `out_scope` list is sorted +by `scope_start` and as we process a varible, we move it to an FIFO +stack. For each variable we consider, we first discard any from the +stack anything that went out of scope before the new variable came in. +Then we place the new variable just after the one at the top of the +stack. ###### ast functions - static int scope_finalize(struct parse_context *c) + static void scope_finalize(struct parse_context *c, struct type *ft) { - struct binding *b; int size = 0; - - for (b = c->varlist; b; b = b->next) { - struct variable *v; - for (v = b->var; v; v = v->previous) { - struct type *t = v->type; - if (v->merged != v) - continue; - if (v->global) - continue; - if (!t) - continue; - if (size & (t->align - 1)) - size = (size + t->align) & ~(t->align-1); - v->frame_pos = size; - size += v->type->size; - } + struct variable *next = ft->function.scope; + struct variable *done = NULL; + while (next) { + struct variable *v = next; + struct type *t = v->type; + int pos; + next = v->in_scope; + if (v->merged != v) + continue; + if (!t) + continue; + while (done && done->scope_end < v->scope_start) + done = done->in_scope; + if (done) + pos = done->frame_pos + done->type->size; + else + pos = 0; + if (pos & (t->align - 1)) + pos = (pos + t->align) & ~(t->align-1); + v->frame_pos = pos; + if (size < pos + v->type->size) + size = pos + v->type->size; + v->in_scope = done; + done = v; } - return size; + c->out_scope = NULL; + ft->function.local_size = size; } ###### free context storage @@ -1548,6 +1607,7 @@ also want to know what sort of bracketing to use. do_indent(indent, "/* FREE"); for (v = e->to_free; v; v = v->next_free) { printf(" %.*s", v->name->name.len, v->name->name.txt); + printf("[%d,%d]", v->scope_start, v->scope_end); if (v->frame_pos >= 0) printf("(%d+%d)", v->frame_pos, v->type ? v->type->size:0); @@ -1641,12 +1701,16 @@ in `rval`. struct value rval, *lval; }; - static struct lrval _interp_exec(struct parse_context *c, struct exec *e); + /* If dest is passed, dtype must give the expected type, and + * result can go there, in which case type is returned as NULL. + */ + static struct lrval _interp_exec(struct parse_context *c, struct exec *e, + struct value *dest, struct type *dtype); static struct value interp_exec(struct parse_context *c, struct exec *e, struct type **typeret) { - struct lrval ret = _interp_exec(c, e); + struct lrval ret = _interp_exec(c, e, NULL, NULL); if (!ret.type) abort(); if (typeret) @@ -1659,8 +1723,9 @@ in `rval`. static struct value *linterp_exec(struct parse_context *c, struct exec *e, struct type **typeret) { - struct lrval ret = _interp_exec(c, e); + struct lrval ret = _interp_exec(c, e, NULL, NULL); + if (!ret.type) abort(); if (ret.lval) *typeret = ret.type; else @@ -1668,8 +1733,28 @@ in `rval`. return ret.lval; } - static struct lrval _interp_exec(struct parse_context *c, struct exec *e) + /* dinterp_exec is used when the destination type is certain and + * the value has a place to go. + */ + static void dinterp_exec(struct parse_context *c, struct exec *e, + struct value *dest, struct type *dtype, + int need_free) + { + struct lrval ret = _interp_exec(c, e, dest, dtype); + if (!ret.type) + return; // NOTEST + if (need_free) + free_value(dtype, dest); + if (ret.lval) + dup_value(dtype, ret.lval, dest); + else + memcpy(dest, &ret.rval, dtype->size); + } + + static struct lrval _interp_exec(struct parse_context *c, struct exec *e, + struct value *dest, struct type *dtype) { + /* If the result is copied to dest, ret.type is set to NULL */ struct lrval ret; struct value rv = {}, *lrv = NULL; struct type *rvtype; @@ -1697,9 +1782,11 @@ in `rval`. } ## interp exec cases } - ret.lval = lrv; - ret.rval = rv; - ret.type = rvtype; + if (rvtype) { + ret.lval = lrv; + ret.rval = rv; + ret.type = rvtype; + } ## interp exec cleanup return ret; } @@ -1875,9 +1962,10 @@ with a const size by whether they are prepared at parse time or not. t->array.vsize = NULL; if (number_parse(num, tail, $2.txt) == 0) tok_err(c, "error: unrecognised number", &$2); - else if (tail[0]) + else if (tail[0]) { tok_err(c, "error: unsupported number suffix", &$2); - else { + mpq_clear(num); + } else { t->array.size = mpz_get_ui(mpq_numref(num)); if (mpz_cmp_ui(mpq_denref(num), 1) != 0) { tok_err(c, "error: array size must be an integer", @@ -1990,7 +2078,7 @@ with a const size by whether they are prepared at parse time or not. if (i >= 0 && i < ltype->array.size) lrv = ptr + i * rvtype->size; else - val_init(ltype->array.member, &rv); + val_init(ltype->array.member, &rv); // UNSAFE ltype = NULL; break; } @@ -2335,10 +2423,9 @@ function will be needed. #### Functions A function is a chunk of code which can be passed parameters and can -return results (though results are not yet implemented). Each function -has a type which includes the set of parameters and the return value. -As yet these types cannot be declared separately from the function -itself. +return results. Each function has a type which includes the set of +parameters and the return value. As yet these types cannot be declared +separately from the function itself. The parameters can be specified either in parentheses as a ';' separated list, such as @@ -2359,6 +2446,22 @@ be a ';' separated list) do code block +In the first case a return type can follow the paentheses after a colon, +in the second it is given on a line starting with the word `return`. + +##### Example: functions that return + + func add(a:number; b:number): number + code block + + func catenate + a: string + b: string + return string + do + code block + + For constructing these lists we use a `List` binode, which will be further detailed when Expression Lists are introduced. @@ -2366,6 +2469,8 @@ further detailed when Expression Lists are introduced. struct { struct binode *params; + struct type *return_type; + struct variable *scope; int local_size; } function; @@ -2433,7 +2538,12 @@ further detailed when Expression Lists are introduced. if (b->right) fprintf(f, "; "); } - fprintf(f, ")\n"); + fprintf(f, ")"); + if (type->function.return_type != Tnone) { + fprintf(f, ":"); + type_print(type->function.return_type, f); + } + fprintf(f, "\n"); } static void function_free_type(struct type *t) @@ -3529,7 +3639,7 @@ arguments, form with the 'List' nodes. case Funcall: { /* Every arg must match formal parameter, and result - * is return type of function (currently Tnone). + * is return type of function */ struct binode *args = cast(binode, b->right); struct var *v = cast(var, b->left); @@ -3540,7 +3650,7 @@ arguments, form with the 'List' nodes. return NULL; } v->var->type->check_args(c, ok, v->var->type, args); - return Tnone; + return v->var->type->function.return_type; } ###### interp binode cases @@ -3568,7 +3678,7 @@ arguments, form with the 'List' nodes. arg = cast(binode, arg->right); } c->local = local; c->local_size = t->function.local_size; - right = interp_exec(c, fbody->function, &rtype); + rv = interp_exec(c, fbody->function, &rvtype); c->local = oldlocal; c->local_size = old_size; free(local); break; @@ -3894,6 +4004,7 @@ it is declared, and error will be raised as the name is created as type_err(c, "Variable declared with no type or value: %v", $1, NULL, 0, NULL); + free_var($1); } else { $0 = new(binode); $0->op = Declare; @@ -3975,12 +4086,9 @@ it is declared, and error will be raised as the name is created as case Assign: lleft = linterp_exec(c, b->left, <ype); - right = interp_exec(c, b->right, &rtype); - if (lleft) { - free_value(ltype, lleft); - dup_value(ltype, &right, lleft); - ltype = NULL; - } + if (lleft) + dinterp_exec(c, b->right, lleft, ltype, 1); + ltype = Tnone; break; case Declare: @@ -3991,22 +4099,19 @@ it is declared, and error will be raised as the name is created as val = var_value(c, v); if (v->type->prepare_type) v->type->prepare_type(c, v->type, 0); - if (b->right) { - right = interp_exec(c, b->right, &rtype); - memcpy(val, &right, rtype->size); - rtype = Tnone; - } else { + if (b->right) + dinterp_exec(c, b->right, val, v->type, 0); + else val_init(v->type, val); - } break; } ### The `use` statement -The `use` statement is the last "simple" statement. It is needed when -the condition in a conditional statement is a block. `use` works much -like `return` in C, but only completes the `condition`, not the whole -function. +The `use` statement is the last "simple" statement. It is needed when a +statement block can return a value. This includes the body of a +function which has a return type, and the "condition" code blocks in +`if`, `while`, and `switch` statements. ###### Binode types Use, @@ -4509,7 +4614,7 @@ casepart` to track a list of case parts. rv = interp_exec(c, b->left, &rvtype); if (rvtype == Tnone || (rvtype == Tbool && rv.bool != 0)) - // cnd is Tnone or Tbool, doesn't need to be freed + // rvtype is Tnone or Tbool, doesn't need to be freed interp_exec(c, b->right, NULL); break; @@ -4654,11 +4759,12 @@ searching through for the Nth constant for decreasing N. v->where_set = var; var->var = v; v->constant = 1; + v->global = 1; } else { - v = var_ref(c, $1.txt); + struct variable *vorig = var_ref(c, $1.txt); tok_err(c, "error: name already declared", &$1); type_err(c, "info: this is where '%v' was first declared", - v->where_decl, NULL, 0, NULL); + vorig->where_decl, NULL, 0, NULL); } do { ok = 1; @@ -4712,17 +4818,17 @@ The code in an Ocean program is all stored in function declarations. One of the functions must be named `main` and it must accept an array of strings as a parameter - the command line arguments. -As this is the top level, several things are handled a bit -differently. -The function is not interpreted by `interp_exec` as that isn't -passed the argument list which the program requires. Similarly type -analysis is a bit more interesting at this level. +As this is the top level, several things are handled a bit differently. +The function is not interpreted by `interp_exec` as that isn't passed +the argument list which the program requires. Similarly type analysis +is a bit more interesting at this level. ###### ast functions static struct variable *declare_function(struct parse_context *c, struct variable *name, struct binode *args, + struct type *ret, struct exec *code) { struct text funcname = {" func", 5}; @@ -4730,24 +4836,43 @@ analysis is a bit more interesting at this level. struct value fn = {.function = code}; name->type = add_type(c, funcname, &function_prototype); name->type->function.params = reorder_bilist(args); + name->type->function.return_type = ret; global_alloc(c, name->type, name, &fn); - var_block_close(c, CloseSequential, code); - } else - var_block_close(c, CloseSequential, NULL); + var_block_close(c, CloseFunction, code); + name->type->function.scope = c->out_scope; + } else { + free_binode(args); + free_type(ret); + free_exec(code); + var_block_close(c, CloseFunction, NULL); + } + c->out_scope = NULL; return name; } +###### declare terminals + $TERM return + ###### top level grammar $*variable DeclareFunction -> func FuncName ( OpenScope ArgsLine ) Block Newlines ${ - $0 = declare_function(c, $function, c, &ok, Tnone, 0); + propagate_types(val->function, c, &ok, + v->type->function.return_type, 0); } while (ok == 2); if (ok) /* Make sure everything is still consistent */ - propagate_types(val->function, c, &ok, Tnone, 0); + propagate_types(val->function, c, &ok, + v->type->function.return_type, 0); if (!ok) all_ok = 0; - v->type->function.local_size = scope_finalize(c); + if (!v->type->function.return_type->dup) { + type_err(c, "error: function cannot return value of type %1", + v->where_decl, v->type->function.return_type, 0, NULL); + } + + scope_finalize(c, v->type); } return all_ok; }