X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Foceani.mdc;h=dca205570c9af2c1ae704e0a58c04f01bdd96782;hp=2398fb3bb87b4f8c04c1ddb49f67baa64ce64071;hb=63e4b945b7fcf3c2ccbf7fee64bc08ae9f0aecf0;hpb=9c3c0e628883128ce89c34c177c1b5bb534ea69d diff --git a/csrc/oceani.mdc b/csrc/oceani.mdc index 2398fb3..dca2055 100644 --- a/csrc/oceani.mdc +++ b/csrc/oceani.mdc @@ -110,6 +110,7 @@ structures can be used. ## macros struct parse_context; ## ast + ## ast late struct parse_context { struct token_config config; char *file_name; @@ -588,23 +589,17 @@ expected to return, and returns the type that it does return, either of which can be `NULL` signifying "unknown". A `prop_err` flag set is passed by reference. It has `Efail` set when an error is found, and `Eretry` when the type for some element is set via propagation. If -any expression cannot be evaluated immediately, `Enoconst` is set. +any expression cannot be evaluated a compile time, `Eruntime` is set. If the expression can be copied, `Emaycopy` is set. If it remains unchanged at `0`, then no more propagation is needed. ###### ast - enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 1<<2}; - enum prop_err {Efail = 1<<0, Eretry = 1<<1, Enoconst = 1<<2, + enum val_rules {Rboolok = 1<<1, Rnoconstant = 1<<2}; + enum prop_err {Efail = 1<<0, Eretry = 1<<1, Eruntime = 1<<2, Emaycopy = 1<<3}; -###### format cases - case 'r': - if (rules & Rnolabel) - fputs(" (labels not permitted)", stderr); - break; - ###### forward decls static struct type *propagate_types(struct exec *prog, struct parse_context *c, enum prop_err *perr, struct type *type, int rules); @@ -789,6 +784,7 @@ which might be reported in error messages. }; }; +###### ast late struct type { struct text name; struct type *next; @@ -871,6 +867,28 @@ which might be reported in error messages. return _add_type(c, t, proto, 1); } + static struct type *find_anon_type(struct parse_context *c, + struct type *proto, char *name, ...) + { + struct type *t = c->typelist; + struct text nm; + va_list ap; + + va_start(ap, name); + vasprintf(&nm.txt, name, ap); + va_end(ap); + nm.len = strlen(name); + + while (t && (!t->anon || + text_cmp(t->name, nm) != 0)) + t = t->next; + if (t) { + free(nm.txt); + return t; + } + return _add_type(c, nm, proto, 1); + } + static void free_type(struct type *t) { /* The type is always a reference to something in the @@ -918,8 +936,8 @@ which might be reported in error messages. { if (tl && tl->cmp_order) return tl->cmp_order(tl, tr, left, right); - if (tl && tl->cmp_eq) // NOTEST - return tl->cmp_eq(tl, tr, left, right); // NOTEST + if (tl && tl->cmp_eq) + return tl->cmp_eq(tl, tr, left, right); return -1; // NOTEST } @@ -1041,8 +1059,6 @@ A separate function encoding these cases will simplify some code later. { if ((rules & Rboolok) && have == Tbool) return 1; // NOTEST - if ((rules & Rnolabel) && have == Tlabel) - return 0; // NOTEST if (!require || !have) return 1; @@ -1068,7 +1084,7 @@ A separate function encoding these cases will simplify some code later. struct text str; mpq_t num; unsigned char bool; - void *label; + int label; ###### ast functions static void _free_value(struct type *type, struct value *v) @@ -1101,8 +1117,8 @@ A separate function encoding these cases will simplify some code later. val->bool = 0; break; case Vlabel: - val->label = NULL; - break; + val->label = 0; // NOTEST + break; // NOTEST } } @@ -1113,8 +1129,8 @@ A separate function encoding these cases will simplify some code later. case Vnone: // NOTEST break; // NOTEST case Vlabel: - vnew->label = vold->label; - break; + vnew->label = vold->label; // NOTEST + break; // NOTEST case Vbool: vnew->bool = vold->bool; break; @@ -1152,7 +1168,7 @@ A separate function encoding these cases will simplify some code later. case Vnone: // NOTEST fprintf(f, "*no-value*"); break; // NOTEST case Vlabel: // NOTEST - fprintf(f, "*label-%p*", v->label); break; // NOTEST + fprintf(f, "*label-%d*", v->label); break; // NOTEST case Vstr: fprintf(f, "%.*s", v->str.len, v->str.txt); break; case Vbool: @@ -1237,10 +1253,11 @@ executable. return v; } -###### Grammar - +###### declare terminals $TERM True False +###### Grammar + $*val Value -> True ${ $0 = new_val(Tbool, $1); @@ -1294,7 +1311,7 @@ executable. { struct val *val = cast(val, prog); if (!type_compat(type, val->vtype, rules)) - type_err(c, "error: expected %1%r found %2", + type_err(c, "error: expected %1 found %2", prog, type, rules, val->vtype); return val->vtype; } @@ -1337,6 +1354,98 @@ executable. return rv; } +#### Labels + +Labels are a temporary concept until I implement enums. There are an +anonymous enum which is declared by usage. Thet are only allowed in +`use` statements and corresponding `case` entries. They appear as a +period followed by an identifier. All identifiers that are "used" must +have a "case". + +For now, we have a global list of labels, and don't check that all "use" +match "case". + +###### exec type + Xlabel, + +###### ast + struct label { + struct exec; + struct text name; + int value; + }; +###### free exec cases + case Xlabel: + free(e); + break; +###### print exec cases + case Xlabel: { + struct label *l = cast(label, e); + printf(".%.*s", l->name.len, l->name.txt); + break; + } + +###### ast + struct labels { + struct labels *next; + struct text name; + int value; + }; +###### parse context + struct labels *labels; + int next_label; +###### ast functions + static int label_lookup(struct parse_context *c, struct text name) + { + struct labels *l, **lp = &c->labels; + while (*lp && text_cmp((*lp)->name, name) < 0) + lp = &(*lp)->next; + if (*lp && text_cmp((*lp)->name, name) == 0) + return (*lp)->value; + l = calloc(1, sizeof(*l)); + l->next = *lp; + l->name = name; + if (c->next_label == 0) + c->next_label = 2; + l->value = c->next_label; + c->next_label += 1; + *lp = l; + return l->value; + } + +###### free context storage + while (context.labels) { + struct labels *l = context.labels; + context.labels = l->next; + free(l); + } + +###### declare terminals + $TERM . +###### term grammar + | . IDENTIFIER ${ { + struct label *l = new_pos(label, $ID); + l->name = $ID.txt; + $0 = l; + } }$ +###### propagate exec cases + case Xlabel: { + struct label *l = cast(label, prog); + l->value = label_lookup(c, l->name); + if (!type_compat(type, Tlabel, rules)) + type_err(c, "error: expected %1 found %2", + prog, type, rules, Tlabel); + return Tlabel; + } +###### interp exec cases + case Xlabel : { + struct label *l = cast(label, e); + rv.label = l->value; + rvtype = Tlabel; + break; + } + + ### Variables Variables are scoped named values. We store the names in a linked list @@ -1814,9 +1923,6 @@ all pending-scope variables become conditionally scoped. v->previous->scope == PendingScope) /* all previous branches used name */ v->scope = PendingScope; - else if (v->type == Tlabel) - /* Labels remain pending even when not used */ - v->scope = PendingScope; // UNTESTED else v->scope = OutScope; if (ct == CloseElse) { @@ -1847,8 +1953,6 @@ all pending-scope variables become conditionally scoped. v->scope = InScope; /* fallthrough */ case CloseSequential: - if (v->type == Tlabel) - v->scope = PendingScope; switch (v->scope) { case InScope: v->scope = OutScope; @@ -1862,10 +1966,7 @@ all pending-scope variables become conditionally scoped. for (v2 = v; v2 && v2->scope == PendingScope; v2 = v2->previous) - if (v2->type == Tlabel) - v2->scope = CondScope; - else - v2->scope = OutScope; + v2->scope = OutScope; break; case CondScope: case OutScope: break; @@ -1952,7 +2053,7 @@ tell if it was set or not later. if (init) memcpy(ret, init, t->size); else - val_init(t, ret); + val_init(t, ret); // NOTEST return ret; } @@ -2182,13 +2283,13 @@ correctly. *perr |= Eretry; } } else if (!type_compat(type, v->type, rules)) { - type_err(c, "error: expected %1%r but variable '%v' is %2", prog, + type_err(c, "error: expected %1 but variable '%v' is %2", prog, type, rules, v->type); type_err(c, "info: this is where '%v' was set to %1", v->where_set, v->type, rules, NULL); } if (!v->global || v->frame_pos < 0) - *perr |= Enoconst; + *perr |= Eruntime; if (!type) return v->type; return type; @@ -2598,6 +2699,8 @@ function will be needed. ###### type functions void (*print_type_decl)(struct type *type, FILE *f); + struct type *(*fieldref)(struct type *t, struct parse_context *c, + struct fieldref *f, struct value **vp); ###### value functions @@ -2702,12 +2805,41 @@ function will be needed. return 1; } + static int find_struct_index(struct type *type, struct text field) + { + int i; + for (i = 0; i < type->structure.nfields; i++) + if (text_cmp(type->structure.fields[i].name, field) == 0) + return i; + return IndexInvalid; + } + + static struct type *structure_fieldref(struct type *t, struct parse_context *c, + struct fieldref *f, struct value **vp) + { + if (f->index == IndexUnknown) { + f->index = find_struct_index(t, f->name); + if (f->index < 0) + type_err(c, "error: cannot find requested field in %1", + f->left, t, 0, NULL); + } + if (f->index < 0) + return NULL; + if (vp) { + struct value *v = *vp; + v = (void*)v->ptr + t->structure.fields[f->index].offset; + *vp = v; + } + return t->structure.fields[f->index].type; + } + static struct type structure_prototype = { .init = structure_init, .free = structure_free, .free_type = structure_free_type, .print_type_decl = structure_print_type, .prepare_type = structure_prepare_type, + .fieldref = structure_fieldref, }; ###### exec type @@ -2720,6 +2852,7 @@ function will be needed. int index; struct text name; }; + enum { IndexUnknown = -1, IndexInvalid = -2 }; ###### free exec cases case Xfieldref: @@ -2728,7 +2861,7 @@ function will be needed. break; ###### declare terminals - $TERM struct . + $TERM struct ###### term grammar @@ -2736,7 +2869,7 @@ function will be needed. struct fieldref *fr = new_pos(fieldref, $2); fr->left = $<1; fr->name = $3.txt; - fr->index = -2; + fr->index = IndexUnknown; $0 = fr; } }$ @@ -2750,16 +2883,6 @@ function will be needed. break; } -###### ast functions - static int find_struct_index(struct type *type, struct text field) - { - int i; - for (i = 0; i < type->structure.nfields; i++) - if (text_cmp(type->structure.fields[i].name, field) == 0) - return i; - return -1; - } - ###### propagate exec cases case Xfieldref: @@ -2767,24 +2890,15 @@ function will be needed. struct fieldref *f = cast(fieldref, prog); struct type *st = propagate_types(f->left, c, perr, NULL, 0); - if (!st) - type_err(c, "error: unknown type for field access", f->left, // UNTESTED - NULL, 0, NULL); - else if (st->init != structure_init) - type_err(c, "error: field reference attempted on %1, not a struct", + if (!st || !st->fieldref) + type_err(c, "error: field reference on %1 is not supported", f->left, st, 0, NULL); - else if (f->index == -2) { - f->index = find_struct_index(st, f->name); - if (f->index < 0) - type_err(c, "error: cannot find requested field in %1", - f->left, st, 0, NULL); - } - if (f->index >= 0) { - struct type *ft = st->structure.fields[f->index].type; - if (!type_compat(type, ft, rules)) + else { + t = st->fieldref(st, c, f, NULL); + if (t && !type_compat(type, t, rules)) type_err(c, "error: have %1 but need %2", prog, - ft, rules, type); - return ft; + t, rules, type); + return t; } break; } @@ -2795,8 +2909,8 @@ function will be needed. struct fieldref *f = cast(fieldref, e); struct type *ltype; struct value *lleft = linterp_exec(c, f->left, <ype); - lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset; - rvtype = ltype->structure.fields[f->index].type; + lrv = lleft; + rvtype = ltype->fieldref(ltype, c, f, &lrv); break; } @@ -2906,6 +3020,336 @@ function will be needed. } } +#### References + +References, or pointers, are values that refer to another value. They +can only refer to a `struct`, though as a struct can embed anything they +can effectively refer to anything. + +References are potentially dangerous as they might refer to some +variable which no longer exists - either because a stack frame +containing it has been discarded or because the value was allocated on +the heap and has now been free. Ocean does not yet provide any +protection against these problems. It will in due course. + +With references comes the opportunity and the need to explicitly +allocate values on the "heap" and to free them. We currently provide +fairly basic support for this. + +Reference make use of the `@` symbol in various ways. A type that starts +with `@` is a reference to whatever follows. A reference value +followed by an `@` acts as the referred value, though the `@` is often +not needed. Finally, an expression that starts with `@` is a special +reference related expression. Some examples might help. + +##### Example: Reference examples + + struct foo + a: number + b: string + ref: @foo + bar: foo + bar.number = 23; bar.string = "hello" + baz: foo + ref = bar + baz = @ref + baz.a = ref.a * 2 + + ref = @new() + ref@ = baz + @free = ref + ref = @nil + +Obviously this is very contrived. `ref` is a reference to a `foo` which +is initially set to refer to the value stored in `bar` - no extra syntax +is needed to "Take the address of" `bar` - the fact that `ref` is a +reference means that only the address make sense. + +When `ref.a` is accessed, that is whatever value is stored in `bar.a`. +The same syntax is used for accessing fields both in structs and in +references to structs. It would be correct to use `ref@.a`, but not +necessary. + +`@new()` creates an object of whatever type is needed for the program +to by type-correct. In future iterations of Ocean, arguments a +constructor will access arguments, so the the syntax now looks like a +function call. `@free` can be assigned any reference that was returned +by `@new()`, and it will be freed. `@nil` is a value of whatever +reference type is appropriate, and is stable and never the address of +anything in the heap or on the stack. A reference can be assigned +`@nil` or compared against that value. + +###### declare terminals + $TERM @ + +###### type union fields + + struct { + struct type *referent; + } reference; + +###### value union fields + struct value *ref; + +###### value functions + + static void reference_print_type(struct type *t, FILE *f) + { + fprintf(f, "@"); + type_print(t->reference.referent, f); + } + + static int reference_cmp(struct type *tl, struct type *tr, + struct value *left, struct value *right) + { + return left->ref == right->ref ? 0 : 1; + } + + static void reference_dup(struct type *t, + struct value *vold, struct value *vnew) + { + vnew->ref = vold->ref; + } + + static void reference_free(struct type *t, struct value *v) + { + /* Nothing to do here */ + } + + static int reference_compat(struct type *require, struct type *have) + { + if (have->compat != require->compat) + return 0; + if (have->reference.referent != require->reference.referent) + return 0; + return 1; + } + + static int reference_test(struct type *type, struct value *val) + { + return val->ref != NULL; + } + + static struct type *reference_fieldref(struct type *t, struct parse_context *c, + struct fieldref *f, struct value **vp) + { + struct type *rt = t->reference.referent; + + if (rt->fieldref) { + if (vp) + *vp = (*vp)->ref; + return rt->fieldref(rt, c, f, vp); + } + type_err(c, "error: field reference on %1 is not supported", + f->left, rt, 0, NULL); + return Tnone; + } + + + static struct type reference_prototype = { + .print_type = reference_print_type, + .cmp_eq = reference_cmp, + .dup = reference_dup, + .test = reference_test, + .free = reference_free, + .compat = reference_compat, + .fieldref = reference_fieldref, + .size = sizeof(void*), + .align = sizeof(void*), + }; + +###### type grammar + + | @ IDENTIFIER ${ { + struct type *t = find_type(c, $ID.txt); + if (!t) { + t = add_type(c, $ID.txt, NULL); + t->first_use = $ID; + } + $0 = find_anon_type(c, &reference_prototype, "@%.*s", + $ID.txt.len, $ID.txt.txt); + $0->reference.referent = t; + } }$ + +###### core functions + static int text_is(struct text t, char *s) + { + return (strlen(s) == t.len && + strncmp(s, t.txt, t.len) == 0); + } + +###### exec type + Xref, + +###### ast + struct ref { + struct exec; + enum ref_func { RefNew, RefFree, RefNil } action; + struct type *reftype; + struct exec *right; + }; + +###### SimpleStatement Grammar + + | @ IDENTIFIER = Expression ${ { + struct ref *r = new_pos(ref, $ID); + // Must be "free" + if (!text_is($ID.txt, "free")) + tok_err(c, "error: only \"@free\" makes sense here", + &$ID); + + $0 = r; + r->action = RefFree; + r->right = $action = RefNew; + } + }$ + | @ IDENTIFIER ${ + // Only 'nil' valid here + if (!text_is($ID.txt, "nil")) { + tok_err(c, "error: Only reference value is \"@nil\"", + &$ID); + } else { + struct ref *r = new_pos(ref,$ID); + $0 = r; + r->action = RefNil; + } + }$ + +###### print exec cases + case Xref: { + struct ref *r = cast(ref, e); + switch (r->action) { + case RefNew: + printf("@new()"); break; + case RefNil: + printf("@nil"); break; + case RefFree: + do_indent(indent, "@free = "); + print_exec(r->right, indent, bracket); + break; + } + break; + } + +###### propagate exec cases + case Xref: { + struct ref *r = cast(ref, prog); + switch (r->action) { + case RefNew: + if (type && type->free != reference_free) { + type_err(c, "error: @new() can only be used with references, not %1", + prog, type, 0, NULL); + return NULL; + } + if (type && !r->reftype) { + r->reftype = type; + *perr |= Eretry; + } + return type; + case RefNil: + if (type && type->free != reference_free) + type_err(c, "error: @nil can only be used with reference, not %1", + prog, type, 0, NULL); + if (type && !r->reftype) { + r->reftype = type; + *perr |= Eretry; + } + return type; + case RefFree: + t = propagate_types(r->right, c, perr, NULL, 0); + if (t && t->free != reference_free) + type_err(c, "error: @free can only be assigned a reference, not %1", + prog, t, 0, NULL); + r->reftype = Tnone; + return Tnone; + } + break; // NOTEST + } + + +###### interp exec cases + case Xref: { + struct ref *r = cast(ref, e); + switch (r->action) { + case RefNew: + if (r->reftype) + rv.ref = calloc(1, r->reftype->reference.referent->size); + rvtype = r->reftype; + break; + case RefNil: + rv.ref = NULL; + rvtype = r->reftype; + break; + case RefFree: + rv = interp_exec(c, r->right, &rvtype); + free_value(rvtype->reference.referent, rv.ref); + free(rv.ref); + rvtype = Tnone; + break; + } + break; + } + +###### free exec cases + case Xref: { + struct ref *r = cast(ref, e); + free_exec(r->right); + free(r); + break; + } + +###### Expressions: dereference + +###### Binode types + Deref, + +###### term grammar + + | Term @ ${ { + struct binode *b = new(binode); + b->op = Deref; + b->left = $left, -1, bracket); + printf("@"); + break; + +###### propagate binode cases + case Deref: + /* left must be a reference, and we return what it refers to */ + /* FIXME how can I pass the expected type down? */ + t = propagate_types(b->left, c, perr, NULL, 0); + if (!t || t->free != reference_free) + type_err(c, "error: Cannot dereference %1", b, t, 0, NULL); + else + return t->reference.referent; + break; + +###### interp binode cases + case Deref: { + left = interp_exec(c, b->left, <ype); + lrv = left.ref; + rvtype = ltype->reference.referent; + break; + } + + #### Functions A function is a chunk of code which can be passed parameters and can @@ -3221,7 +3665,7 @@ it in the "SimpleStatement Grammar" which will be described later. prog, NULL, 0, NULL); return NULL; } - *perr |= Enoconst; + *perr |= Eruntime; v->var->type->check_args(c, perr, v->var->type, args); if (v->var->type->function.inline_result) *perr |= Emaycopy; @@ -3338,8 +3782,8 @@ there. struct type *t2; propagate_types(b->left, c, perr, Tbool, 0); - t = propagate_types(b2->left, c, perr, type, Rnolabel); - t2 = propagate_types(b2->right, c, perr, type ?: t, Rnolabel); + t = propagate_types(b2->left, c, perr, type, 0); + t2 = propagate_types(b2->right, c, perr, type ?: t, 0); return t ?: t2; } @@ -3621,11 +4065,11 @@ expression operator, and the `CMPop` non-terminal will match one of them. case Eql: case NEql: /* Both must match but not be labels, result is Tbool */ - t = propagate_types(b->left, c, perr, NULL, Rnolabel); + t = propagate_types(b->left, c, perr, NULL, 0); if (t) propagate_types(b->right, c, perr, t, 0); else { - t = propagate_types(b->right, c, perr, NULL, Rnolabel); // UNTESTED + t = propagate_types(b->right, c, perr, NULL, 0); // UNTESTED if (t) // UNTESTED t = propagate_types(b->left, c, perr, t, 0); // UNTESTED } @@ -4142,7 +4586,7 @@ the common header for all reductions to use. if (!type) type = t; else if (t != type) - type_err(c, "error: expected %1%r, found %2", + type_err(c, "error: expected %1, found %2", e->left, type, rules, t); } } @@ -4219,7 +4663,7 @@ printed. else b = cast(binode, b->right); while (b) { - propagate_types(b->left, c, perr, NULL, Rnolabel); + propagate_types(b->left, c, perr, NULL, 0); b = cast(binode, b->right); } break; @@ -4250,9 +4694,9 @@ An assignment will assign a value to a variable, providing it hasn't been declared as a constant. The analysis phase ensures that the type will be correct so the interpreter just needs to perform the calculation. There is a form of assignment which declares a new -variable as well as assigning a value. If a name is assigned before -it is declared, and error will be raised as the name is created as -`Tlabel` and it is illegal to assign to such names. +variable as well as assigning a value. If a name is used before +it is declared, it is assumed to be a global constant which are allowed to +be declared at any time. ###### Binode types Assign, @@ -4337,7 +4781,7 @@ it is declared, and error will be raised as the name is created as * result is Tnone */ t = propagate_types(b->left, c, perr, NULL, - Rnolabel | (b->op == Assign ? Rnoconstant : 0)); + (b->op == Assign ? Rnoconstant : 0)); if (!b->right) return Tnone; @@ -4347,7 +4791,7 @@ it is declared, and error will be raised as the name is created as type_err(c, "info: variable '%v' was set as %1 here.", cast(var, b->left)->var->where_set, t, rules, NULL); } else { - t = propagate_types(b->right, c, perr, NULL, Rnolabel); + t = propagate_types(b->right, c, perr, NULL, 0); if (t) propagate_types(b->left, c, perr, t, (b->op == Assign ? Rnoconstant : 0)); @@ -4400,17 +4844,6 @@ function which has a return type, and the "condition" code blocks in $0 = b = new_pos(binode, $1); b->op = Use; b->right = $<2; - if (b->right->type == Xvar) { - struct var *v = cast(var, b->right); - if (v->var->type == Tnone) { - /* Convert this to a label */ - struct value *val; - - v->var->type = Tlabel; - val = global_alloc(c, Tlabel, v->var, NULL); - val->label = val; - } - } }$ ###### print binode cases @@ -5091,7 +5524,7 @@ constants. } while (perr & Eretry); if (perr & Efail) c->parse_error += 1; - else if (!(perr & Enoconst)) { + else if (!(perr & Eruntime)) { progress = some; struct value res = interp_exec( c, vb->right, &v->var->type); @@ -5502,19 +5935,19 @@ things which will likely grow as the languages grows. while mid := (lo + hi) / 2 if mid == target: - use Found + use .Found if mid < target: lo = mid else hi = mid if hi - lo < 1: lo = mid - use GiveUp + use .GiveUp use True do pass - case Found: + case .Found: print "Yay, I found", target - case GiveUp: + case .GiveUp: print "Closest I found was", lo size::= 10