X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;f=csrc%2Foceani.mdc;h=1cf84edc20206976ad5d171afe8274ff45727f91;hb=dcdc6aaac6d7f0882caa1f07a0bf91ce869ef7bf;hp=f3bf6a1456981f5ed491d884326aa8829aa27803;hpb=db571f9737d8d3f14921ae53f41d13897cadd1f5;p=ocean diff --git a/csrc/oceani.mdc b/csrc/oceani.mdc index f3bf6a1..1cf84ed 100644 --- a/csrc/oceani.mdc +++ b/csrc/oceani.mdc @@ -47,7 +47,7 @@ Elements which are present to make a usable language are: - "blocks" of multiple statements. - `pass`: a statement which does nothing. - - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can + - expressions: `+`, `-`, `*`, `/`, `%` can apply to numbers and `++` can catenate strings. `and`, `or`, `not` manipulate Booleans, and normal comparison operators can work on all three types. - `print`: will print the values in a list of expressions. @@ -114,6 +114,7 @@ structures can be used. struct token_config config; char *file_name; int parse_error; + struct exec *prog; ## parse context }; @@ -179,7 +180,6 @@ structures can be used. }, }; int doprint=0, dotrace=0, doexec=1, brackets=0; - struct exec **prog; int opt; while ((opt = getopt_long(argc, argv, options, long_options, NULL)) != -1) { @@ -223,32 +223,33 @@ structures can be used. break; } if (ss) - prog = parse_oceani(ss->code, &context.config, - dotrace ? stderr : NULL); + parse_oceani(ss->code, &context.config, + dotrace ? stderr : NULL); else { fprintf(stderr, "oceani: cannot find section %s\n", section); exit(1); } } else - prog = parse_oceani(s->code, &context.config, - dotrace ? stderr : NULL); - if (!prog) { - fprintf(stderr, "oceani: fatal parser error.\n"); + parse_oceani(s->code, &context.config, + dotrace ? stderr : NULL); + if (!context.prog) { + fprintf(stderr, "oceani: no program found.\n"); context.parse_error = 1; } - if (prog && doprint) - print_exec(*prog, 0, brackets); - if (prog && doexec && !context.parse_error) { - if (!analyse_prog(*prog, &context)) { + if (context.prog && doprint) { + ## print decls + print_exec(context.prog, 0, brackets); + } + if (context.prog && doexec && !context.parse_error) { + if (!analyse_prog(context.prog, &context)) { fprintf(stderr, "oceani: type error in program - not running.\n"); exit(1); } - interp_prog(*prog, argv+optind+1); + interp_prog(context.prog, argv+optind+1); } - if (prog) { - free_exec(*prog); - free(prog); + if (context.prog) { + free_exec(context.prog); } while (s) { struct section *t = s->next; @@ -375,20 +376,13 @@ context so indicate that parsing failed. } fmt++; switch (*fmt) { - case '%': fputc(*fmt, stderr); break; - default: fputc('?', stderr); break; + case '%': fputc(*fmt, stderr); break; // NOTEST + default: fputc('?', stderr); break; // NOTEST case '1': - if (t1) - fprintf(stderr, "%.*s", t1->name.len, t1->name.txt); - else - fputs("*unknown*", stderr); + type_print(t1, stderr); break; case '2': - if (t2) - fprintf(stderr, "%.*s", t2->name.len, t2->name.txt); - else - fputs("*unknown*", stderr); - break; + type_print(t2, stderr); break; ## format cases } @@ -447,13 +441,15 @@ which are often passed around by value. struct text name; struct type *next; struct value (*init)(struct type *type); + struct value (*prepare)(struct type *type); struct value (*parse)(struct type *type, char *str); void (*print)(struct value val); + void (*print_type)(struct type *type, FILE *f); int (*cmp_order)(struct value v1, struct value v2); int (*cmp_eq)(struct value v1, struct value v2); struct value (*dup)(struct value val); void (*free)(struct value val); - struct type *(*compat)(struct type *this, struct type *other); + int (*compat)(struct type *this, struct type *other); long long (*to_int)(struct value *v); double (*to_float)(struct value *v); int (*to_mpq)(mpq_t *q, struct value *v); @@ -504,6 +500,43 @@ which are often passed around by value. v.type->free(v); } + static int type_compat(struct type *require, struct type *have, int rules) + { + if ((rules & Rboolok) && have == Tbool) + return 1; + if ((rules & Rnolabel) && have == Tlabel) + return 0; + if (!require || !have) + return 1; + + if (require->compat) + return require->compat(require, have); + + return require == have; + } + + static void type_print(struct type *type, FILE *f) + { + if (!type) + fputs("*unknown*type*", f); + else if (type->name.len) + fprintf(f, "%.*s", type->name.len, type->name.txt); + else if (type->print_type) + type->print_type(type, f); + else + fputs("*invalid*type*", f); // NOTEST + } + + static struct value val_prepare(struct type *type) + { + struct value rv; + + if (type) + return type->prepare(type); + rv.type = type; + return rv; + } + static struct value val_init(struct type *type) { struct value rv; @@ -535,7 +568,7 @@ which are often passed around by value. if (v.type && v.type->print) v.type->print(v); else - printf("*Unknown*"); + printf("*Unknown*"); // NOTEST } static struct value parse_value(struct type *type, char *arg) @@ -544,10 +577,21 @@ which are often passed around by value. if (type && type->parse) return type->parse(type, arg); - rv.type = NULL; - return rv; + rv.type = NULL; // NOTEST + return rv; // NOTEST } +###### forward decls + + static void free_value(struct value v); + static int type_compat(struct type *require, struct type *have, int rules); + static void type_print(struct type *type, FILE *f); + static struct value val_init(struct type *type); + static struct value dup_value(struct value v); + static int value_cmp(struct value left, struct value right); + static void print_value(struct value v); + static struct value parse_value(struct type *type, char *arg); + ###### free context types while (context.typelist) { @@ -557,7 +601,7 @@ which are often passed around by value. free(t); } -### Base Types +#### Base Types Values of the base types can be numbers, which we represent as multi-precision fractions, strings, Booleans and labels. When @@ -609,21 +653,9 @@ to parse each type from a string. } } - static int vtype_compat(struct type *require, struct type *have, int rules) - { - if ((rules & Rboolok) && have == Tbool) - return 1; - if ((rules & Rnolabel) && have == Tlabel) - return 0; - if (!require || !have) - return 1; - - return require == have; - } - ###### value functions - static struct value _val_init(struct type *type) + static struct value _val_prepare(struct type *type) { struct value rv; @@ -632,9 +664,10 @@ to parse each type from a string. case Vnone: break; case Vnum: - mpq_init(rv.num); break; + memset(&rv.num, 0, sizeof(rv.num)); + break; case Vstr: - rv.str.txt = malloc(1); + rv.str.txt = NULL; rv.str.len = 0; break; case Vbool: @@ -647,13 +680,37 @@ to parse each type from a string. return rv; } + static struct value _val_init(struct type *type) + { + struct value rv; + + rv.type = type; + switch(type->vtype) { + case Vnone: // NOTEST + break; // NOTEST + case Vnum: + mpq_init(rv.num); break; + case Vstr: + rv.str.txt = malloc(1); + rv.str.len = 0; + break; + case Vbool: + rv.bool = 0; + break; + case Vlabel: // NOTEST + rv.label = NULL; // NOTEST + break; // NOTEST + } + return rv; + } + static struct value _dup_value(struct value v) { struct value rv; rv.type = v.type; switch (rv.type->vtype) { - case Vnone: - break; + case Vnone: // NOTEST + break; // NOTEST case Vlabel: rv.label = v.label; break; @@ -677,13 +734,13 @@ to parse each type from a string. { int cmp; if (left.type != right.type) - return left.type - right.type; + return left.type - right.type; // NOTEST switch (left.type->vtype) { case Vlabel: cmp = left.label == right.label ? 0 : 1; break; case Vnum: cmp = mpq_cmp(left.num, right.num); break; case Vstr: cmp = text_cmp(left.str, right.str); break; case Vbool: cmp = left.bool - right.bool; break; - case Vnone: cmp = 0; + case Vnone: cmp = 0; // NOTEST } return cmp; } @@ -691,10 +748,10 @@ to parse each type from a string. static void _print_value(struct value v) { switch (v.type->vtype) { - case Vnone: - printf("*no-value*"); break; - case Vlabel: - printf("*label-%p*", v.label); break; + case Vnone: // NOTEST + printf("*no-value*"); break; // NOTEST + case Vlabel: // NOTEST + printf("*label-%p*", v.label); break; // NOTEST case Vstr: printf("%.*s", v.str.len, v.str.txt); break; case Vbool: @@ -720,10 +777,10 @@ to parse each type from a string. val.type = type; switch(type->vtype) { - case Vlabel: - case Vnone: - val.type = NULL; - break; + case Vlabel: // NOTEST + case Vnone: // NOTEST + val.type = NULL; // NOTEST + break; // NOTEST case Vstr: val.str.len = strlen(arg); val.str.txt = malloc(val.str.len); @@ -764,6 +821,7 @@ to parse each type from a string. static struct type base_prototype = { .init = _val_init, + .prepare = _val_prepare, .parse = _parse_value, .print = _print_value, .cmp_order = _value_cmp, @@ -1025,6 +1083,8 @@ no longer be primary. v = t->previous; free_value(t->val); + if (t->min_depth == 0) + free_exec(t->where_decl); free(t); } } @@ -1099,7 +1159,7 @@ all pending-scope variables become conditionally scoped. v->scope = InScope; v->in_scope = c->in_scope; c->in_scope = v; - v->val = val_init(NULL); + v->val = val_prepare(NULL); return v; } @@ -1259,6 +1319,8 @@ subclasses, and to access these we need to be able to `cast` the static int __fput_loc(struct exec *loc, FILE *f) { + if (!loc) + return 0; // NOTEST if (loc->line >= 0) { fprintf(f, "%d:%d: ", loc->line, loc->column); return 1; @@ -1271,7 +1333,7 @@ subclasses, and to access these we need to be able to `cast` the static void fput_loc(struct exec *loc, FILE *f) { if (!__fput_loc(loc, f)) - fprintf(f, "??:??: "); + fprintf(f, "??:??: "); // NOTEST } Each different type of `exec` node needs a number of functions @@ -1342,7 +1404,7 @@ also want to know what sort of bracketing to use. static void print_exec(struct exec *e, int indent, int bracket) { if (!e) - return; + return; // NOTEST switch (e->type) { case Xbinode: print_binode(cast(binode, e), indent, bracket); break; @@ -1369,7 +1431,7 @@ propagation is needed. ###### ast - enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1}; + enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1}; ###### format cases case 'r': @@ -1413,18 +1475,45 @@ Each `exec` can return a value, which may be `Tnone` but must be non-NULL; ###### core functions + struct lrval { + struct value val, *lval; + }; + + static struct lrval _interp_exec(struct exec *e); + static struct value interp_exec(struct exec *e) { - struct value rv; + struct lrval ret = _interp_exec(e); + + if (ret.lval) + return dup_value(*ret.lval); + else + return ret.val; + } + + static struct value *linterp_exec(struct exec *e) + { + struct lrval ret = _interp_exec(e); + + return ret.lval; + } + + static struct lrval _interp_exec(struct exec *e) + { + struct lrval ret; + struct value rv, *lrv = NULL; rv.type = Tnone; - if (!e) - return rv; + if (!e) { + ret.lval = lrv; + ret.val = rv; + return ret; + } switch(e->type) { case Xbinode: { struct binode *b = cast(binode, e); - struct value left, right; + struct value left, right, *lleft; left.type = right.type = Tnone; switch (b->op) { ## interp binode cases @@ -1434,7 +1523,240 @@ Each `exec` can return a value, which may be `Tnone` but must be non-NULL; } ## interp exec cases } - return rv; + ret.lval = lrv; + ret.val = rv; + return ret; + } + +### Complex types + +Now that we have the shape of the interpreter in place we can add some +complex types and connected them in to the data structures and the +different phases of parse, analyse, print, interpret. + +For now, just arrays. + +#### Arrays + +Arrays can be declared by giving a size and a type, as `[size]type' so +`freq:[26]number` declares `freq` to be an array of 26 numbers. The +size can be an arbitrary expression which is evaluated when the name +comes into scope. + +Arrays cannot be assigned. When pointers are introduced we will also +introduce array slices which can refer to part or all of an array - +the assignment syntax will create a slice. For now, an array can only +ever be referenced by the name it is declared with. It is likely that +a "`copy`" primitive will eventually be define which can be used to +make a copy of an array with controllable depth. + +###### type union fields + + struct { + int size; + struct variable *vsize; + struct type *member; + } array; + +###### value union fields + struct { + struct value *elmnts; + } array; + +###### value functions + + static struct value array_prepare(struct type *type) + { + struct value ret; + + ret.type = type; + ret.array.elmnts = NULL; + return ret; + } + + static struct value array_init(struct type *type) + { + struct value ret; + int i; + + ret.type = type; + if (type->array.vsize) { + mpz_t q; + mpz_init(q); + mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num), + mpq_denref(type->array.vsize->val.num)); + type->array.size = mpz_get_si(q); + mpz_clear(q); + } + ret.array.elmnts = calloc(type->array.size, + sizeof(ret.array.elmnts[0])); + for (i = 0; ret.array.elmnts && i < type->array.size; i++) + ret.array.elmnts[i] = val_init(type->array.member); + return ret; + } + + static void array_free(struct value val) + { + int i; + + if (val.array.elmnts) + for (i = 0; i < val.type->array.size; i++) + free_value(val.array.elmnts[i]); + free(val.array.elmnts); + } + + static int array_compat(struct type *require, struct type *have) + { + if (have->compat != require->compat) + return 0; + /* Both are arrays, so we can look at details */ + if (!type_compat(require->array.member, have->array.member, 0)) + return 0; + if (require->array.vsize == NULL && have->array.vsize == NULL) + return require->array.size == have->array.size; + + return require->array.vsize == have->array.vsize; + } + + static void array_print_type(struct type *type, FILE *f) + { + fputs("[", f); + if (type->array.vsize) { + struct binding *b = type->array.vsize->name; + fprintf(f, "%.*s]", b->name.len, b->name.txt); + } else + fprintf(f, "%d]", type->array.size); + type_print(type->array.member, f); + } + + static struct type array_prototype = { + .prepare = array_prepare, + .init = array_init, + .print_type = array_print_type, + .compat = array_compat, + .free = array_free, + }; + +###### type grammar + + | [ NUMBER ] Type ${ + $0 = calloc(1, sizeof(struct type)); + *($0) = array_prototype; + $0->array.member = $<4; + $0->array.vsize = NULL; + { + struct parse_context *c = config2context(config); + char tail[3]; + mpq_t num; + if (number_parse(num, tail, $2.txt) == 0) + tok_err(c, "error: unrecognised number", &$2); + else if (tail[0]) + tok_err(c, "error: unsupported number suffix", &$2); + else { + $0->array.size = mpz_get_ui(mpq_numref(num)); + if (mpz_cmp_ui(mpq_denref(num), 1) != 0) { + tok_err(c, "error: array size must be an integer", + &$2); + } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0) + tok_err(c, "error: array size is too large", + &$2); + mpq_clear(num); + } + $0->next= c->anon_typelist; + c->anon_typelist = $0; + } + }$ + + | [ IDENTIFIER ] Type ${ { + struct parse_context *c = config2context(config); + struct variable *v = var_ref(c, $2.txt); + + if (!v) + tok_err(config2context(config), "error: name undeclared", &$2); + else if (!v->constant) + tok_err(config2context(config), "error: array size must be a constant", &$2); + + $0 = calloc(1, sizeof(struct type)); + *($0) = array_prototype; + $0->array.member = $<4; + $0->array.size = 0; + $0->array.vsize = v; + $0->next= c->anon_typelist; + c->anon_typelist = $0; + } }$ + +###### parse context + + struct type *anon_typelist; + +###### free context types + + while (context.anon_typelist) { + struct type *t = context.anon_typelist; + + context.anon_typelist = t->next; + free(t); + } + +###### Binode types + Index, + +###### variable grammar + + | Variable [ Expression ] ${ { + struct binode *b = new(binode); + b->op = Index; + b->left = $<1; + b->right = $<3; + $0 = b; + } }$ + +###### print binode cases + case Index: + print_exec(b->left, -1, 0); + printf("["); + print_exec(b->right, -1, 0); + printf("]"); + break; + +###### propagate binode cases + case Index: + /* left must be an array, right must be a number, + * result is the member type of the array + */ + propagate_types(b->right, c, ok, Tnum, 0); + t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant); + if (!t || t->compat != array_compat) { + type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL); + *ok = 0; + return NULL; + } else { + if (!type_compat(type, t->array.member, rules)) { + type_err(c, "error: have %1 but need %2", prog, + t->array.member, rules, type); + *ok = 0; + } + return t->array.member; + } + break; + +###### interp binode cases + case Index: { + mpz_t q; + long i; + + lleft = linterp_exec(b->left); + right = interp_exec(b->right); + mpz_init(q); + mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num)); + i = mpz_get_si(q); + mpz_clear(q); + + if (i >= 0 && i < lleft->type->array.size) + lrv = &lleft->array.elmnts[i]; + else + rv = val_init(lleft->type->array.member); + break; } ## Language elements @@ -1520,20 +1842,21 @@ an executable. } ###### propagate exec cases - case Xval: - { - struct val *val = cast(val, prog); - if (!vtype_compat(type, val->val.type, rules)) { - type_err(c, "error: expected %1%r found %2", - prog, type, rules, val->val.type); - *ok = 0; - } - return val->val.type; + case Xval: + { + struct val *val = cast(val, prog); + if (!type_compat(type, val->val.type, rules)) { + type_err(c, "error: expected %1%r found %2", + prog, type, rules, val->val.type); + *ok = 0; } + return val->val.type; + } ###### interp exec cases case Xval: - return dup_value(cast(val, e)->val); + rv = dup_value(cast(val, e)->val); + break; ###### ast functions static void free_val(struct val *v) @@ -1589,7 +1912,7 @@ link to find the primary instance. ###### Grammar $*var - VariableDecl -> IDENTIFIER := ${ { + VariableDecl -> IDENTIFIER : ${ { struct variable *v = var_decl(config2context(config), $1.txt); $0 = new_pos(var, $1); $0->var = v; @@ -1604,7 +1927,7 @@ link to find the primary instance. v->where_decl, Tnone, 0, Tnone); } } }$ - | IDENTIFIER ::= ${ { + | IDENTIFIER :: ${ { struct variable *v = var_decl(config2context(config), $1.txt); $0 = new_pos(var, $1); $0->var = v; @@ -1620,14 +1943,14 @@ link to find the primary instance. v->where_decl, Tnone, 0, Tnone); } } }$ - | IDENTIFIER : Type = ${ { + | IDENTIFIER : Type ${ { struct variable *v = var_decl(config2context(config), $1.txt); $0 = new_pos(var, $1); $0->var = v; if (v) { v->where_decl = $0; v->where_set = $0; - v->val = val_init($<3); + v->val = val_prepare($<3); } else { v = var_ref(config2context(config), $1.txt); $0->var = v; @@ -1637,14 +1960,14 @@ link to find the primary instance. v->where_decl, Tnone, 0, Tnone); } } }$ - | IDENTIFIER :: Type = ${ { + | IDENTIFIER :: Type ${ { struct variable *v = var_decl(config2context(config), $1.txt); $0 = new_pos(var, $1); $0->var = v; if (v) { v->where_decl = $0; v->where_set = $0; - v->val = val_init($<3); + v->val = val_prepare($<3); v->constant = 1; } else { v = var_ref(config2context(config), $1.txt); @@ -1656,6 +1979,7 @@ link to find the primary instance. } } }$ + $*exec Variable -> IDENTIFIER ${ { struct variable *v = var_ref(config2context(config), $1.txt); $0 = new_pos(var, $1); @@ -1663,13 +1987,14 @@ link to find the primary instance. /* This might be a label - allocate a var just in case */ v = var_decl(config2context(config), $1.txt); if (v) { - v->val = val_init(Tlabel); + v->val = val_prepare(Tlabel); v->val.label = &v->val; v->where_set = $0; } } - $0->var = v; + cast(var, $0)->var = v; } }$ + ## variable grammar $*type Type -> IDENTIFIER ${ @@ -1681,6 +2006,7 @@ link to find the primary instance. $0 = Tnone; } }$ + ## type grammar ###### print exec cases case Xvar: @@ -1701,9 +2027,9 @@ link to find the primary instance. struct binding *b = v->var->name; fprintf(stderr, "%.*s", b->name.len, b->name.txt); } else - fputs("???", stderr); + fputs("???", stderr); // NOTEST } else - fputs("NOTVAR", stderr); + fputs("NOTVAR", stderr); // NOTEST break; ###### propagate exec cases @@ -1713,21 +2039,29 @@ link to find the primary instance. struct var *var = cast(var, prog); struct variable *v = var->var; if (!v) { - type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone); - *ok = 0; - return Tnone; + type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone); // NOTEST + *ok = 0; // NOTEST + return Tnone; // NOTEST } if (v->merged) v = v->merged; + if (v->constant && (rules & Rnoconstant)) { + type_err(c, "error: Cannot assign to a constant: %v", + prog, NULL, 0, NULL); + type_err(c, "info: name was defined as a constant here", + v->where_decl, NULL, 0, NULL); + *ok = 0; + return v->val.type; + } if (v->val.type == NULL) { if (type && *ok != 0) { - v->val = val_init(type); + v->val = val_prepare(type); v->where_set = prog; *ok = 2; } return type; } - if (!vtype_compat(type, v->val.type, rules)) { + if (!type_compat(type, v->val.type, rules)) { type_err(c, "error: expected %1%r but variable '%v' is %2", prog, type, rules, v->val.type); type_err(c, "info: this is where '%v' was set to %1", v->where_set, @@ -1747,7 +2081,8 @@ link to find the primary instance. if (v->merged) v = v->merged; - return dup_value(v->val); + lrv = &v->val; + break; } ###### ast functions @@ -1760,30 +2095,111 @@ link to find the primary instance. ###### free exec cases case Xvar: free_var(cast(var, e)); break; +### Expressions: Conditional + +Our first user of the `binode` will be conditional expressions, which +is a bit odd as they actually have three components. That will be +handled by having 2 binodes for each expression. The conditional +expression is the lowest precedence operatior, so it gets to define +what an "Expression" is. The next level up is "BoolExpr", which +comes next. + +Conditional expressions are of the form "value `if` condition `else` +other_value". There is no associativite with this operator: the +values and conditions can only be other conditional expressions if +they are enclosed in parentheses. Allowing nesting without +parentheses would be too confusing. + +###### Binode types + CondExpr, + +###### Grammar + + $*exec + Expression -> BoolExpr if BoolExpr else BoolExpr ${ { + struct binode *b1 = new(binode); + struct binode *b2 = new(binode); + b1->op = CondExpr; + b1->left = $<3; + b1->right = b2; + b2->op = CondExpr; + b2->left = $<1; + b2->right = $<5; + $0 = b1; + } }$ + | BoolExpr ${ $0 = $<1; }$ + +###### print binode cases + + case CondExpr: + b2 = cast(binode, b->right); + print_exec(b2->left, -1, 0); + printf(" if "); + print_exec(b->left, -1, 0); + printf(" else "); + print_exec(b2->right, -1, 0); + break; + +###### propagate binode cases + + case CondExpr: { + /* cond must be Tbool, others must match */ + struct binode *b2 = cast(binode, b->right); + struct type *t2; + + propagate_types(b->left, c, ok, Tbool, 0); + t = propagate_types(b2->left, c, ok, type, Rnolabel); + t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel); + return t ?: t2; + } + +###### interp binode cases + + case CondExpr: { + struct binode *b2 = cast(binode, b->right); + left = interp_exec(b->left); + if (left.bool) + rv = interp_exec(b2->left); + else + rv = interp_exec(b2->right); + } + break; + ### Expressions: Boolean -Our first user of the `binode` will be expressions, and particularly -Boolean expressions. As I haven't implemented precedence in the -parser generator yet, we need different names for each precedence -level used by expressions. The outer most or lowest level precedence -are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s -and `BFact`s. +The next class of expressions to use the `binode` will be Boolean +expressions. As I haven't implemented precedence in the parser +generator yet, we need different names for each precedence level used +by expressions. The outer most or lowest level precedence are +conditional expressions are Boolean operators which form an `BoolExpr` +out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we +have `and then` and `or else` which only evaluate the second operand +if the result would make a difference. ###### Binode types And, + AndThen, Or, + OrElse, Not, ###### Grammar $*exec - Expression -> Expression or BTerm ${ { + BoolExpr -> BoolExpr or BTerm ${ { struct binode *b = new(binode); b->op = Or; b->left = $<1; b->right = $<3; $0 = b; } }$ + | BoolExpr or else BTerm ${ { + struct binode *b = new(binode); + b->op = OrElse; + b->left = $<1; + b->right = $<4; + $0 = b; + } }$ | BTerm ${ $0 = $<1; }$ BTerm -> BTerm and BFact ${ { @@ -1793,6 +2209,13 @@ and `BFact`s. b->right = $<3; $0 = b; } }$ + | BTerm and then BFact ${ { + struct binode *b = new(binode); + b->op = AndThen; + b->left = $<1; + b->right = $<4; + $0 = b; + } }$ | BFact ${ $0 = $<1; }$ BFact -> not BFact ${ { @@ -1809,11 +2232,21 @@ and `BFact`s. printf(" and "); print_exec(b->right, -1, 0); break; + case AndThen: + print_exec(b->left, -1, 0); + printf(" and then "); + print_exec(b->right, -1, 0); + break; case Or: print_exec(b->left, -1, 0); printf(" or "); print_exec(b->right, -1, 0); break; + case OrElse: + print_exec(b->left, -1, 0); + printf(" or else "); + print_exec(b->right, -1, 0); + break; case Not: printf("not "); print_exec(b->right, -1, 0); @@ -1821,7 +2254,9 @@ and `BFact`s. ###### propagate binode cases case And: + case AndThen: case Or: + case OrElse: case Not: /* both must be Tbool, result is Tbool */ propagate_types(b->left, c, ok, Tbool, 0); @@ -1839,11 +2274,21 @@ and `BFact`s. right = interp_exec(b->right); rv.bool = rv.bool && right.bool; break; + case AndThen: + rv = interp_exec(b->left); + if (rv.bool) + rv = interp_exec(b->right); + break; case Or: rv = interp_exec(b->left); right = interp_exec(b->right); rv.bool = rv.bool || right.bool; break; + case OrElse: + rv = interp_exec(b->left); + if (!rv.bool) + rv = interp_exec(b->right); + break; case Not: rv = interp_exec(b->right); rv.bool = !rv.bool; @@ -1915,7 +2360,7 @@ expression operator. case GtrEq: printf(" >= "); break; case Eql: printf(" == "); break; case NEql: printf(" != "); break; - default: abort(); + default: abort(); // NOTEST } print_exec(b->right, -1, 0); break; @@ -1936,7 +2381,7 @@ expression operator. if (t) t = propagate_types(b->left, c, ok, t, 0); } - if (!vtype_compat(type, Tbool, 0)) { + if (!type_compat(type, Tbool, 0)) { type_err(c, "error: Comparison returns %1 but %2 expected", prog, Tbool, rules, type); *ok = 0; @@ -1963,7 +2408,7 @@ expression operator. case GtrEq: rv.bool = cmp >= 0; break; case Eql: rv.bool = cmp == 0; break; case NEql: rv.bool = cmp != 0; break; - default: rv.bool = 0; break; + default: rv.bool = 0; break; // NOTEST } break; } @@ -1984,7 +2429,7 @@ precedence is handled better I might be able to discard this. ###### Binode types Plus, Minus, - Times, Divide, + Times, Divide, Rem, Concat, Absolute, Negate, Bracket, @@ -2034,6 +2479,7 @@ precedence is handled better I might be able to discard this. Top -> * ${ $0.op = Times; }$ | / ${ $0.op = Divide; }$ + | % ${ $0.op = Rem; }$ | ++ ${ $0.op = Concat; }$ ###### print binode cases @@ -2042,15 +2488,17 @@ precedence is handled better I might be able to discard this. case Times: case Divide: case Concat: + case Rem: print_exec(b->left, indent, 0); switch(b->op) { - case Plus: printf(" + "); break; - case Minus: printf(" - "); break; - case Times: printf(" * "); break; - case Divide: printf(" / "); break; - case Concat: printf(" ++ "); break; - default: abort(); - } + case Plus: fputs(" + ", stdout); break; + case Minus: fputs(" - ", stdout); break; + case Times: fputs(" * ", stdout); break; + case Divide: fputs(" / ", stdout); break; + case Rem: fputs(" % ", stdout); break; + case Concat: fputs(" ++ ", stdout); break; + default: abort(); // NOTEST + } // NOTEST print_exec(b->right, indent, 0); break; case Absolute: @@ -2071,6 +2519,7 @@ precedence is handled better I might be able to discard this. case Plus: case Minus: case Times: + case Rem: case Divide: /* both must be numbers, result is Tnum */ case Absolute: @@ -2079,7 +2528,7 @@ precedence is handled better I might be able to discard this. * unary ops fit here too */ propagate_types(b->left, c, ok, Tnum, 0); propagate_types(b->right, c, ok, Tnum, 0); - if (!vtype_compat(type, Tnum, 0)) { + if (!type_compat(type, Tnum, 0)) { type_err(c, "error: Arithmetic returns %1 but %2 expected", prog, Tnum, rules, type); *ok = 0; @@ -2090,7 +2539,7 @@ precedence is handled better I might be able to discard this. /* both must be Tstr, result is Tstr */ propagate_types(b->left, c, ok, Tstr, 0); propagate_types(b->right, c, ok, Tstr, 0); - if (!vtype_compat(type, Tstr, 0)) { + if (!type_compat(type, Tstr, 0)) { type_err(c, "error: Concat returns %1 but %2 expected", prog, Tstr, rules, type); *ok = 0; @@ -2122,6 +2571,20 @@ precedence is handled better I might be able to discard this. right = interp_exec(b->right); mpq_div(rv.num, rv.num, right.num); break; + case Rem: { + mpz_t l, r, rem; + + left = interp_exec(b->left); + right = interp_exec(b->right); + mpz_init(l); mpz_init(r); mpz_init(rem); + mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num)); + mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num)); + mpz_tdiv_r(rem, l, r); + rv = val_init(Tnum); + mpq_set_z(rv.num, rem); + mpz_clear(r); mpz_clear(l); mpz_clear(rem); + break; + } case Negate: rv = interp_exec(b->right); mpq_neg(rv.num, rv.num); @@ -2442,25 +2905,29 @@ it is declared, and error will be raised as the name is created as Declare, ###### SimpleStatement Grammar - | Variable = Expression ${ { - struct var *v = cast(var, $1); - + | Variable = Expression ${ $0 = new(binode); $0->op = Assign; $0->left = $<1; $0->right = $<3; - if (v->var && v->var->constant) { - type_err(config2context(config), "Cannot assign to a constant: %v", - $0->left, NULL, 0, NULL); - type_err(config2context(config), "name was defined as a constant here", - v->var->where_decl, NULL, 0, NULL); - } - } }$ - | VariableDecl Expression ${ + }$ + | VariableDecl = Expression ${ $0 = new(binode); $0->op = Declare; $0->left = $<1; - $0->right =$<2; + $0->right =$<3; + }$ + + | VariableDecl ${ + if ($1->var->where_set == NULL) { + type_err(config2context(config), "Variable declared with no type or value: %v", + $1, NULL, 0, NULL); + } else { + $0 = new(binode); + $0->op = Declare; + $0->left = $<1; + $0->right = NULL; + } }$ ###### print binode cases @@ -2480,19 +2947,24 @@ it is declared, and error will be raised as the name is created as do_indent(indent, ""); print_exec(b->left, indent, 0); if (cast(var, b->left)->var->constant) { - if (v->where_decl == v->where_set) - printf("::%.*s = ", v->val.type->name.len, - v->val.type->name.txt); - else - printf(" ::= "); + if (v->where_decl == v->where_set) { + printf("::"); + type_print(v->val.type, stdout); + printf(" "); + } else + printf(" ::"); } else { - if (v->where_decl == v->where_set) - printf(":%.*s = ", v->val.type->name.len, - v->val.type->name.txt); - else - printf(" := "); + if (v->where_decl == v->where_set) { + printf(":"); + type_print(v->val.type, stdout); + printf(" "); + } else + printf(" :"); + } + if (b->right) { + printf("= "); + print_exec(b->right, indent, 0); } - print_exec(b->right, indent, 0); if (indent >= 0) printf("\n"); } @@ -2502,8 +2974,16 @@ it is declared, and error will be raised as the name is created as case Assign: case Declare: - /* Both must match and not be labels, result is Tnone */ - t = propagate_types(b->left, c, ok, NULL, Rnolabel); + /* Both must match and not be labels, + * Type must support 'dup', + * For Assign, left must not be constant. + * result is Tnone + */ + t = propagate_types(b->left, c, ok, NULL, + Rnolabel | (b->op == Assign ? Rnoconstant : 0)); + if (!b->right) + return Tnone; + if (t) { if (propagate_types(b->right, c, ok, t, 0) != t) if (b->left->type == Xvar) @@ -2512,7 +2992,12 @@ it is declared, and error will be raised as the name is created as } else { t = propagate_types(b->right, c, ok, NULL, Rnolabel); if (t) - propagate_types(b->left, c, ok, t, 0); + propagate_types(b->left, c, ok, t, + (b->op == Assign ? Rnoconstant : 0)); + } + if (t && t->dup == NULL) { + type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL); + *ok = 0; } return Tnone; @@ -2521,12 +3006,25 @@ it is declared, and error will be raised as the name is created as ###### interp binode cases case Assign: + lleft = linterp_exec(b->left); + right = interp_exec(b->right); + if (lleft) { + free_value(*lleft); + *lleft = right; + } else + free_value(right); // NOTEST + right.type = NULL; + break; + case Declare: { struct variable *v = cast(var, b->left)->var; if (v->merged) v = v->merged; - right = interp_exec(b->right); + if (b->right) + right = interp_exec(b->right); + else + right = val_init(v->val.type); free_value(v->val); v->val = right; right.type = NULL; @@ -2954,14 +3452,14 @@ defined. struct casepart *cp; t = propagate_types(cs->forpart, c, ok, Tnone, 0); - if (!vtype_compat(Tnone, t, 0)) + if (!type_compat(Tnone, t, 0)) *ok = 0; t = propagate_types(cs->dopart, c, ok, Tnone, 0); - if (!vtype_compat(Tnone, t, 0)) + if (!type_compat(Tnone, t, 0)) *ok = 0; if (cs->dopart) { t = propagate_types(cs->thenpart, c, ok, Tnone, 0); - if (!vtype_compat(Tnone, t, 0)) + if (!type_compat(Tnone, t, 0)) *ok = 0; } if (cs->casepart == NULL) @@ -3023,10 +3521,10 @@ defined. interp_exec(c->dopart); if (c->thenpart) { - v = interp_exec(c->thenpart); - if (v.type != Tnone || !c->dopart) - return v; - free_value(v); + rv = interp_exec(c->thenpart); + if (rv.type != Tnone || !c->dopart) + goto Xcond_done; + free_value(rv); } } while (c->dopart); @@ -3035,15 +3533,154 @@ defined. if (value_cmp(v, cnd) == 0) { free_value(v); free_value(cnd); - return interp_exec(cp->action); + rv = interp_exec(cp->action); + goto Xcond_done; } free_value(v); } free_value(cnd); if (c->elsepart) - return interp_exec(c->elsepart); - v.type = Tnone; - return v; + rv = interp_exec(c->elsepart); + else + rv.type = Tnone; + Xcond_done: + break; + } + +### Top level structure + +All the language elements so far can be used in various places. Now +it is time to clarify what those places are. + +At the top level of a file there will be a number of declarations. +Many of the things that can be declared haven't been described yet, +such as functions, procedures, imports, named types, and probably +more. +For now there are two sorts of things that can appear at the top +level. They are predefined constants and the main program. While the +syntax will allow the main program to appear multiple times, that will +trigger an error if it is actually attempted. + +The various declarations do not return anything. They store the +various declarations in the parse context. + +###### Parser: grammar + + $void + Ocean -> DeclarationList + + DeclarationList -> Declaration + | DeclarationList Declaration + + Declaration -> DeclareConstant + | DeclareProgram + | NEWLINE + + ## top level grammar + +### The `const` section + +As well as being defined in with the code that uses them, constants +can be declared at the top level. These have full-file scope, so they +are always `InScope`. The value of a top level constant can be given +as an expression, and this is evaluated immediately rather than in the +later interpretation stage. Once we add functions to the language, we +will need rules concern which, if any, can be used to define a top +level constant. + +Constants are defined in a sectiont that starts with the reserved word +`const` and then has a block with a list of assignment statements. +For syntactic consistency, these must use the double-colon syntax to +make it clear that they are constants. Type can also be given: if +not, the type will be determined during analysis, as with other +constants. + +As the types constants are inserted at the head of a list, printing +them in the same order that they were read is not straight forward. +We take a quadratic approach here and count the number of constants +(variables of depth 0), then count down from there, each time +searching through for the Nth constant for decreasing N. + +###### top level grammar + + DeclareConstant -> const Open ConstList Close + | const Open Newlines ConstList Close + | const Open SimpleConstList } + | const Open Newlines SimpleConstList } + | const : ConstList + | const SimpleConstList + + ConstList -> ComplexConsts + ComplexConsts -> ComplexConst ComplexConsts + | ComplexConst + ComplexConst -> SimpleConstList NEWLINE + SimpleConstList -> Const ; SimpleConstList + | Const + | Const ; SimpleConstList ; + + $*type + CType -> Type ${ $0 = $<1; }$ + | ${ $0 = NULL; }$ + $void + Const -> IDENTIFIER :: CType = Expression ${ { + int ok; + struct variable *v; + + v = var_decl(config2context(config), $1.txt); + if (v) { + struct var *var = new_pos(var, $1); + v->where_decl = var; + v->where_set = var; + var->var = v; + v->constant = 1; + } else { + v = var_ref(config2context(config), $1.txt); + tok_err(config2context(config), "error: name already declared", &$1); + type_err(config2context(config), "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); + } + do { + ok = 1; + propagate_types($5, config2context(config), &ok, $3, 0); + } while (ok == 2); + if (!ok) + config2context(config)->parse_error = 1; + else if (v) { + v->val = interp_exec($5); + } + } }$ + +###### print decls + { + struct variable *v; + int target = -1; + + while (target != 0) { + int i = 0; + for (v = context.in_scope; v; v=v->in_scope) + if (v->depth == 0) { + i += 1; + if (i == target) + break; + } + + if (target == -1) { + if (i) + printf("const:\n"); + target = i; + } else { + printf(" %.*s :: ", v->name->name.len, v->name->name.txt); + type_print(v->val.type, stdout); + printf(" = "); + if (v->val.type == Tstr) + printf("\""); + print_value(v->val); + if (v->val.type == Tstr) + printf("\""); + printf("\n"); + target -= 1; + } + } } ### Finally the whole program. @@ -3051,7 +3688,8 @@ defined. Somewhat reminiscent of Pascal a (current) Ocean program starts with the keyword "program" and a list of variable names which are assigned values from command line arguments. Following this is a `block` which -is the code to execute. +is the code to execute. Unlike Pascal, constants and other +declarations come *before* the program. As this is the top level, several things are handled a bit differently. @@ -3062,7 +3700,17 @@ analysis is a bit more interesting at this level. ###### Binode types Program, -###### Parser: grammar +###### top level grammar + + DeclareProgram -> Program ${ { + struct parse_context *c = config2context(config); + if (c->prog) + type_err(c, "Program defined a second time", + $1, NULL, 0, NULL); + else + c->prog = $<1; + } }$ + $*binode Program -> program OpenScope Varlist Block OptNL ${ @@ -3112,7 +3760,7 @@ analysis is a bit more interesting at this level. break; ###### propagate binode cases - case Program: abort(); + case Program: abort(); // NOTEST ###### core functions @@ -3122,7 +3770,7 @@ analysis is a bit more interesting at this level. int ok = 1; if (!b) - return 0; + return 0; // NOTEST do { ok = 1; propagate_types(b->right, c, &ok, Tnone, 0); @@ -3134,7 +3782,7 @@ analysis is a bit more interesting at this level. struct var *v = cast(var, b->left); if (!v->var->val.type) { v->var->where_set = b; - v->var->val = val_init(Tstr); + v->var->val = val_prepare(Tstr); } } b = cast(binode, prog); @@ -3157,7 +3805,7 @@ analysis is a bit more interesting at this level. struct value v; if (!prog) - return; + return; // NOTEST al = cast(binode, p->left); while (al) { struct var *v = cast(var, al->left); @@ -3179,7 +3827,7 @@ analysis is a bit more interesting at this level. } ###### interp binode cases - case Program: abort(); + case Program: abort(); // NOTEST ## And now to test it out. @@ -3196,8 +3844,18 @@ Fibonacci, and performs a binary search for a number. ###### test: hello + const: + pi ::= 3.1415926 + four ::= 2 + 2 ; five ::= 10/2 + const pie ::= "I like Pie"; + cake ::= "The cake is" + ++ " a lie" + program A B: print "Hello World, what lovely oceans you have!" + print "are there", five, "?" + print pi, pie, "but", cake + /* When a variable is defined in both branches of an 'if', * and used afterwards, the variables are merged. */ @@ -3209,16 +3867,17 @@ Fibonacci, and performs a binary search for a number. /* If a variable is not used after the 'if', no * merge happens, so types can be different */ - if A * 2 > B: + if A > B * 2: double:string = "yes" print A, "is more than twice", B, "?", double else: - double := A*2 - print "double", A, "is only", double + double := B*2 + print "double", B, "is", double - a := A; + a : number + a = A; b:number = B - if a > 0 and b > 0: + if a > 0 and then b > 0: while a != b: if a < b: b = b - a @@ -3262,3 +3921,24 @@ Fibonacci, and performs a binary search for a number. print "Yay, I found", target case GiveUp: print "Closest I found was", mid + + size::=55 + list:[size]number + list[0] = 1234 + for i:=1; then i = i + 1; while i < size: + n := list[i-1] * list[i-1] + list[i] = (n / 100) % 10000 + + print "Before sort:" + for i:=0; then i = i + 1; while i < size: + print "list[",i,"]=",list[i] + + for i := 1; then i=i+1; while i < size: + for j:=i-1; then j=j-1; while j >= 0: + if list[j] > list[j+1]: + t:= list[j] + list[j] = list[j+1] + list[j+1] = t + print "After sort:" + for i:=0; then i = i + 1; while i < size: + print "list[",i,"]=",list[i]