# Ocean Interpreter - Stoney Creek version
-Ocean is intended to be an compiled language, so this interpreter is
+Ocean is intended to be a compiled language, so this interpreter is
not targeted at being the final product. It is, rather, an intermediate
-stage, and fills that role in two distinct ways.
+stage and fills that role in two distinct ways.
Firstly, it exists as a platform to experiment with the early language
design. An interpreter is easy to write and easy to get working, so
- "blocks" of multiple statements.
- `pass`: a statement which does nothing.
- - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
+ - expressions: `+`, `-`, `*`, `/`, `%` can apply to numbers and `++` can
catenate strings. `and`, `or`, `not` manipulate Booleans, and
normal comparison operators can work on all three types.
- `print`: will print the values in a list of expressions.
for validating the parsing.
So the main requirements of the interpreter are:
-- Parse the program, possibly with tracing
-- Analyse the parsed program to ensure consistency
-- print the program
-- execute the program
+- Parse the program, possibly with tracing,
+- Analyse the parsed program to ensure consistency,
+- Print the program,
+- Execute the program.
This is all performed by a single C program extracted with
`parsergen`.
can contain multiple programs This is effected with the `--section`
option.
+This code must be compiled with `-fplan9-extensions` so that anonymous
+structures can be used.
+
###### File: oceani.mk
myCFLAGS := -Wall -g -fplan9-extensions
argv[optind]);
exit(1);
}
+
+ ## context initialization
+
if (section) {
struct section *ss;
for (ss = s; ss; ss = ss->next) {
### Analysis
-These four requirements of parse, analyse, print, interpret apply to
+The four requirements of parse, analyse, print, interpret apply to
each language element individually so that is how most of the code
will be structured.
Three of the four are fairly self explanatory. The one that requires
a little explanation is the analysis step.
-The current language design does not require (or even allow) the types
-of variables to be declared, but they must still have a single type.
-Different operations impose different requirements on the variables,
-for example addition requires both arguments to be numeric, and
-assignment requires the variable on the left to have the same type as
-the expression on the right.
+The current language design does not require the types of variables to
+be declared, but they must still have a single type. Different
+operations impose different requirements on the variables, for example
+addition requires both arguments to be numeric, and assignment
+requires the variable on the left to have the same type as the
+expression on the right.
Analysis involves propagating these type requirements around and
consequently setting the type of each variable. If any requirements
type matching and they might affect error messages, we need to pass those
in too.
+As well as type errors, we sometimes need to report problems with
+tokens, which might be unexpected or might name a type that has not
+been defined. For these we have `tok_err()` which reports an error
+with a given token. Each of the error functions sets the flag in the
+context so indicate that parsing failed.
+
###### forward decls
static void fput_loc(struct exec *loc, FILE *f);
case '%': fputc(*fmt, stderr); break;
default: fputc('?', stderr); break;
case '1':
- if (t1)
- fprintf(stderr, "%.*s", t1->name.len, t1->name.txt);
- else
- fputs("*unknown*", stderr);
+ type_print(t1, stderr);
break;
case '2':
- if (t2)
- fprintf(stderr, "%.*s", t2->name.len, t2->name.txt);
- else
- fputs("*unknown*", stderr);
- break;
+ type_print(t2, stderr);
break;
## format cases
}
static void tok_err(struct parse_context *c, char *fmt, struct token *t)
{
- fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
+ fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
+ t->txt.len, t->txt.txt);
c->parse_error = 1;
}
existance of these conversion functions enable types to determine if
they are compatible with other types.
-Named type are stored in a simple linked list.
+Named type are stored in a simple linked list. Objects of each type are "values"
+which are often passed around by value.
###### ast
+ struct value {
+ struct type *type;
+ union {
+ ## value union fields
+ };
+ };
+
struct type {
struct text name;
struct type *next;
struct value (*init)(struct type *type);
+ struct value (*prepare)(struct type *type);
struct value (*parse)(struct type *type, char *str);
void (*print)(struct value val);
+ void (*print_type)(struct type *type, FILE *f);
int (*cmp_order)(struct value v1, struct value v2);
int (*cmp_eq)(struct value v1, struct value v2);
struct value (*dup)(struct value val);
void (*free)(struct value val);
- struct type *(*compat)(struct type *this, struct type *other);
+ int (*compat)(struct type *this, struct type *other);
long long (*to_int)(struct value *v);
double (*to_float)(struct value *v);
int (*to_mpq)(mpq_t *q, struct value *v);
};
};
- struct typep {
- struct type *t;
- };
-
###### parse context
struct type *typelist;
*/
}
- static void context_init(struct parse_context *c)
+ static void free_value(struct value v)
{
- ## context initialization
+ if (v.type)
+ v.type->free(v);
+ }
+
+ static int type_compat(struct type *require, struct type *have, int rules)
+ {
+ if ((rules & Rboolok) && have == Tbool)
+ return 1;
+ if ((rules & Rnolabel) && have == Tlabel)
+ return 0;
+ if (!require || !have)
+ return 1;
+
+ if (require->compat)
+ return require->compat(require, have);
+
+ return require == have;
+ }
+
+ static void type_print(struct type *type, FILE *f)
+ {
+ if (!type)
+ fputs("*unknown*type*", f);
+ else if (type->name.len)
+ fprintf(f, "%.*s", type->name.len, type->name.txt);
+ else if (type->print_type)
+ type->print_type(type, f);
+ else
+ fputs("*invalid*type*", f);
+ }
+
+ static struct value val_prepare(struct type *type)
+ {
+ struct value rv;
+
+ if (type)
+ return type->prepare(type);
+ rv.type = type;
+ return rv;
}
+ static struct value val_init(struct type *type)
+ {
+ struct value rv;
+
+ if (type)
+ return type->init(type);
+ rv.type = type;
+ return rv;
+ }
+
+ static struct value dup_value(struct value v)
+ {
+ if (v.type)
+ return v.type->dup(v);
+ return v;
+ }
+
+ static int value_cmp(struct value left, struct value right)
+ {
+ if (left.type && left.type->cmp_order)
+ return left.type->cmp_order(left, right);
+ if (left.type && left.type->cmp_eq)
+ return left.type->cmp_eq(left, right);
+ return -1;
+ }
+
+ static void print_value(struct value v)
+ {
+ if (v.type && v.type->print)
+ v.type->print(v);
+ else
+ printf("*Unknown*");
+ }
+
+ static struct value parse_value(struct type *type, char *arg)
+ {
+ struct value rv;
+
+ if (type && type->parse)
+ return type->parse(type, arg);
+ rv.type = NULL;
+ return rv;
+ }
+
+###### forward decls
+
+ static void free_value(struct value v);
+ static int type_compat(struct type *require, struct type *have, int rules);
+ static void type_print(struct type *type, FILE *f);
+ static struct value val_init(struct type *type);
+ static struct value dup_value(struct value v);
+ static int value_cmp(struct value left, struct value right);
+ static void print_value(struct value v);
+ static struct value parse_value(struct type *type, char *arg);
+
###### free context types
while (context.typelist) {
free(t);
}
-### Values
+#### Base Types
-Values can be numbers, which we represent as multi-precision
-fractions, strings, Booleans and labels. When analysing the program
-we also need to allow for places where no value is meaningful (type
-`Tnone`) and where we don't know what type to expect yet (type is
-`NULL`).
+Values of the base types can be numbers, which we represent as
+multi-precision fractions, strings, Booleans and labels. When
+analysing the program we also need to allow for places where no value
+is meaningful (type `Tnone`) and where we don't know what type to
+expect yet (type is `NULL`).
Values are never shared, they are always copied when used, and freed
when no longer needed.
###### type union fields
enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
-###### ast
- struct value {
- struct type *type;
- union {
- struct text str;
- mpq_t num;
- int bool;
- void *label;
- };
- };
-
- enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
-
-###### format cases
- case 'r':
- if (rules & Rnolabel)
- fputs(" (labels not permitted)", stderr);
- break;
+###### value union fields
+ struct text str;
+ mpq_t num;
+ int bool;
+ void *label;
###### ast functions
static void _free_value(struct value v)
}
}
- static void free_value(struct value v)
- {
- if (v.type)
- v.type->free(v);
- }
-
- static int vtype_compat(struct type *require, struct type *have, int rules)
- {
- if ((rules & Rboolok) && have == Tbool)
- return 1;
- if ((rules & Rnolabel) && have == Tlabel)
- return 0;
- if (!require || !have)
- return 1;
-
- return require == have;
- }
-
###### value functions
- static struct value _val_init(struct type *type)
+ static struct value _val_prepare(struct type *type)
{
struct value rv;
case Vnone:
break;
case Vnum:
- mpq_init(rv.num); break;
+ memset(&rv.num, 0, sizeof(rv.num));
+ break;
case Vstr:
- rv.str.txt = malloc(1);
+ rv.str.txt = NULL;
rv.str.len = 0;
break;
case Vbool:
return rv;
}
- static struct value val_init(struct type *type)
+ static struct value _val_init(struct type *type)
{
struct value rv;
- if (type)
- return type->init(type);
rv.type = type;
+ switch(type->vtype) {
+ case Vnone:
+ break;
+ case Vnum:
+ mpq_init(rv.num); break;
+ case Vstr:
+ rv.str.txt = malloc(1);
+ rv.str.len = 0;
+ break;
+ case Vbool:
+ rv.bool = 0;
+ break;
+ case Vlabel:
+ rv.label = NULL;
+ break;
+ }
return rv;
}
return rv;
}
- static struct value dup_value(struct value v)
- {
- if (v.type)
- return v.type->dup(v);
- return v;
- }
-
static int _value_cmp(struct value left, struct value right)
{
int cmp;
return cmp;
}
- static int value_cmp(struct value left, struct value right)
- {
- if (left.type && left.type->cmp_order)
- return left.type->cmp_order(left, right);
- if (left.type && left.type->cmp_eq)
- return left.type->cmp_eq(left, right);
- return -1;
- }
-
- static struct text text_join(struct text a, struct text b)
- {
- struct text rv;
- rv.len = a.len + b.len;
- rv.txt = malloc(rv.len);
- memcpy(rv.txt, a.txt, a.len);
- memcpy(rv.txt+a.len, b.txt, b.len);
- return rv;
- }
-
static void _print_value(struct value v)
{
switch (v.type->vtype) {
}
}
- static void print_value(struct value v)
- {
- if (v.type && v.type->print)
- v.type->print(v);
- else
- printf("*Unknown*");
- }
-
static struct value _parse_value(struct type *type, char *arg)
{
struct value val;
return val;
}
- static struct value parse_value(struct type *type, char *arg)
- {
- struct value rv;
-
- if (type && type->parse)
- return type->parse(type, arg);
- rv.type = NULL;
- return rv;
- }
-
static void _free_value(struct value v);
static struct type base_prototype = {
.init = _val_init,
+ .prepare = _val_prepare,
.parse = _parse_value,
.print = _print_value,
.cmp_order = _value_cmp,
return t;
}
-###### forward decls
- static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt);
-
###### context initialization
- Tbool = add_base_type(c, "Boolean", Vbool);
- Tstr = add_base_type(c, "string", Vstr);
- Tnum = add_base_type(c, "number", Vnum);
- Tnone = add_base_type(c, "none", Vnone);
- Tlabel = add_base_type(c, "label", Vlabel);
+ Tbool = add_base_type(&context, "Boolean", Vbool);
+ Tstr = add_base_type(&context, "string", Vstr);
+ Tnum = add_base_type(&context, "number", Vnum);
+ Tnone = add_base_type(&context, "none", Vnone);
+ Tlabel = add_base_type(&context, "label", Vlabel);
### Variables
Scopes in parallel branches can be partially merged. More
specifically, if a given name is declared in both branches of an
-if/else then it's scope is a candidate for merging. Similarly if
+if/else then its scope is a candidate for merging. Similarly if
every branch of an exhaustive switch (e.g. has an "else" clause)
declares a given name, then the scopes from the branches are
candidates for merging.
v->scope = InScope;
v->in_scope = c->in_scope;
c->in_scope = v;
- v->val = val_init(NULL);
+ v->val = val_prepare(NULL);
return v;
}
static int __fput_loc(struct exec *loc, FILE *f)
{
+ if (!loc)
+ return 0;
if (loc->line >= 0) {
fprintf(f, "%d:%d: ", loc->line, loc->column);
return 1;
any change is made. If it remains unchanged at `1`, then no more
propagation is needed.
+###### ast
+
+ enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
+
+###### format cases
+ case 'r':
+ if (rules & Rnolabel)
+ fputs(" (labels not permitted)", stderr);
+ break;
+
###### core functions
static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
###### core functions
+ struct lrval {
+ struct value val, *lval;
+ };
+
+ static struct lrval _interp_exec(struct exec *e);
+
static struct value interp_exec(struct exec *e)
{
- struct value rv;
+ struct lrval ret = _interp_exec(e);
+
+ if (ret.lval)
+ return dup_value(*ret.lval);
+ else
+ return ret.val;
+ }
+
+ static struct value *linterp_exec(struct exec *e)
+ {
+ struct lrval ret = _interp_exec(e);
+
+ return ret.lval;
+ }
+
+ static struct lrval _interp_exec(struct exec *e)
+ {
+ struct lrval ret;
+ struct value rv, *lrv = NULL;
rv.type = Tnone;
- if (!e)
- return rv;
+ if (!e) {
+ ret.lval = lrv;
+ ret.val = rv;
+ return ret;
+ }
switch(e->type) {
case Xbinode:
{
struct binode *b = cast(binode, e);
- struct value left, right;
+ struct value left, right, *lleft;
left.type = right.type = Tnone;
switch (b->op) {
## interp binode cases
}
## interp exec cases
}
- return rv;
+ ret.lval = lrv;
+ ret.val = rv;
+ return ret;
}
## Language elements
if (number_parse($0->val.num, tail, $1.txt) == 0)
mpq_init($0->val.num);
if (tail[0])
- tok_err(config2context(config), "error: unsupported number suffix.",
+ tok_err(config2context(config), "error: unsupported number suffix",
&$1);
}
}$
char tail[3];
string_parse(&$1, '\\', &$0->val.str, tail);
if (tail[0])
- tok_err(config2context(config), "error: unsupported string suffix.",
+ tok_err(config2context(config), "error: unsupported string suffix",
&$1);
}
}$
char tail[3];
string_parse(&$1, '\\', &$0->val.str, tail);
if (tail[0])
- tok_err(config2context(config), "error: unsupported string suffix.",
+ tok_err(config2context(config), "error: unsupported string suffix",
&$1);
}
}$
case Xval:
{
struct val *val = cast(val, prog);
- if (!vtype_compat(type, val->val.type, rules)) {
+ if (!type_compat(type, val->val.type, rules)) {
type_err(c, "error: expected %1%r found %2",
prog, type, rules, val->val.type);
*ok = 0;
###### interp exec cases
case Xval:
- return dup_value(cast(val, e)->val);
+ rv = dup_value(cast(val, e)->val);
+ break;
###### ast functions
static void free_val(struct val *v)
###### Grammar
$*var
- VariableDecl -> IDENTIFIER := ${ {
+ VariableDecl -> IDENTIFIER : ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
$0 = new_pos(var, $1);
$0->var = v;
v->where_decl, Tnone, 0, Tnone);
}
} }$
- | IDENTIFIER ::= ${ {
+ | IDENTIFIER :: ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
$0 = new_pos(var, $1);
$0->var = v;
v->where_decl, Tnone, 0, Tnone);
}
} }$
- | IDENTIFIER : Type = ${ {
+ | IDENTIFIER : Type ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
$0 = new_pos(var, $1);
$0->var = v;
if (v) {
v->where_decl = $0;
v->where_set = $0;
- v->val = val_init($<3);
+ v->val = val_prepare($<3);
} else {
v = var_ref(config2context(config), $1.txt);
$0->var = v;
v->where_decl, Tnone, 0, Tnone);
}
} }$
- | IDENTIFIER :: Type = ${ {
+ | IDENTIFIER :: Type ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
$0 = new_pos(var, $1);
$0->var = v;
if (v) {
v->where_decl = $0;
v->where_set = $0;
- v->val = val_init($<3);
+ v->val = val_prepare($<3);
v->constant = 1;
} else {
v = var_ref(config2context(config), $1.txt);
}
} }$
+ $*exec
Variable -> IDENTIFIER ${ {
struct variable *v = var_ref(config2context(config), $1.txt);
$0 = new_pos(var, $1);
/* This might be a label - allocate a var just in case */
v = var_decl(config2context(config), $1.txt);
if (v) {
- v->val = val_init(Tlabel);
+ v->val = val_prepare(Tlabel);
v->val.label = &v->val;
v->where_set = $0;
}
}
- $0->var = v;
+ cast(var, $0)->var = v;
} }$
+ ## variable grammar
$*type
Type -> IDENTIFIER ${
$0 = Tnone;
}
}$
+ ## type grammar
###### print exec cases
case Xvar:
}
if (v->merged)
v = v->merged;
+ if (v->constant && (rules & Rnoconstant)) {
+ type_err(c, "error: Cannot assign to a constant: %v",
+ prog, NULL, 0, NULL);
+ type_err(c, "info: name was defined as a constant here",
+ v->where_decl, NULL, 0, NULL);
+ *ok = 0;
+ return v->val.type;
+ }
if (v->val.type == NULL) {
if (type && *ok != 0) {
- v->val = val_init(type);
+ v->val = val_prepare(type);
v->where_set = prog;
*ok = 2;
}
return type;
}
- if (!vtype_compat(type, v->val.type, rules)) {
+ if (!type_compat(type, v->val.type, rules)) {
type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
type, rules, v->val.type);
type_err(c, "info: this is where '%v' was set to %1", v->where_set,
if (v->merged)
v = v->merged;
- return dup_value(v->val);
+ lrv = &v->val;
+ break;
}
###### ast functions
###### free exec cases
case Xvar: free_var(cast(var, e)); break;
+### Expressions: Conditional
+
+Our first user of the `binode` will be conditional expressions, which
+is a bit odd as they actually have three components. That will be
+handled by having 2 binodes for each expression. The conditional
+expression is the lowest precedence operatior, so it gets to define
+what an "Expression" is. The next level up is "BoolExpr", which
+comes next.
+
+Conditional expressions are of the form "value `if` condition `else`
+other_value". There is no associativite with this operator: the
+values and conditions can only be other conditional expressions if
+they are enclosed in parentheses. Allowing nesting without
+parentheses would be too confusing.
+
+###### Binode types
+ CondExpr,
+
+###### Grammar
+
+ $*exec
+ Expression -> BoolExpr if BoolExpr else BoolExpr ${ {
+ struct binode *b1 = new(binode);
+ struct binode *b2 = new(binode);
+ b1->op = CondExpr;
+ b1->left = $<3;
+ b1->right = b2;
+ b2->op = CondExpr;
+ b2->left = $<1;
+ b2->right = $<5;
+ $0 = b1;
+ } }$
+ | BoolExpr ${ $0 = $<1; }$
+
+###### print binode cases
+
+ case CondExpr:
+ b2 = cast(binode, b->right);
+ print_exec(b2->left, -1, 0);
+ printf(" if ");
+ print_exec(b->left, -1, 0);
+ printf(" else ");
+ print_exec(b2->right, -1, 0);
+ break;
+
+###### propagate binode cases
+
+ case CondExpr: {
+ /* cond must be Tbool, others must match */
+ struct binode *b2 = cast(binode, b->right);
+ struct type *t2;
+
+ propagate_types(b->left, c, ok, Tbool, 0);
+ t = propagate_types(b2->left, c, ok, type, Rnolabel);
+ t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
+ return t ?: t2;
+ }
+
+###### interp binode cases
+
+ case CondExpr: {
+ struct binode *b2 = cast(binode, b->right);
+ left = interp_exec(b->left);
+ if (left.bool)
+ rv = interp_exec(b2->left);
+ else
+ rv = interp_exec(b2->right);
+ }
+ break;
+
### Expressions: Boolean
-Our first user of the `binode` will be expressions, and particularly
-Boolean expressions. As I haven't implemented precedence in the
-parser generator yet, we need different names from each precedence
-level used by expressions. The outer most or lowest level precedence
-are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
-and `BFact`s.
+The next class of expressions to use the `binode` will be Boolean
+expressions. As I haven't implemented precedence in the parser
+generator yet, we need different names for each precedence level used
+by expressions. The outer most or lowest level precedence are
+conditional expressions are Boolean operators which form an `BoolExpr`
+out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
+have `and then` and `or else` which only evaluate the second operand
+if the result would make a difference.
###### Binode types
And,
+ AndThen,
Or,
+ OrElse,
Not,
###### Grammar
$*exec
- Expression -> Expression or BTerm ${ {
+ BoolExpr -> BoolExpr or BTerm ${ {
struct binode *b = new(binode);
b->op = Or;
b->left = $<1;
b->right = $<3;
$0 = b;
} }$
+ | BoolExpr or else BTerm ${ {
+ struct binode *b = new(binode);
+ b->op = OrElse;
+ b->left = $<1;
+ b->right = $<4;
+ $0 = b;
+ } }$
| BTerm ${ $0 = $<1; }$
BTerm -> BTerm and BFact ${ {
b->right = $<3;
$0 = b;
} }$
+ | BTerm and then BFact ${ {
+ struct binode *b = new(binode);
+ b->op = AndThen;
+ b->left = $<1;
+ b->right = $<4;
+ $0 = b;
+ } }$
| BFact ${ $0 = $<1; }$
BFact -> not BFact ${ {
printf(" and ");
print_exec(b->right, -1, 0);
break;
+ case AndThen:
+ print_exec(b->left, -1, 0);
+ printf(" and then ");
+ print_exec(b->right, -1, 0);
+ break;
case Or:
print_exec(b->left, -1, 0);
printf(" or ");
print_exec(b->right, -1, 0);
break;
+ case OrElse:
+ print_exec(b->left, -1, 0);
+ printf(" or else ");
+ print_exec(b->right, -1, 0);
+ break;
case Not:
printf("not ");
print_exec(b->right, -1, 0);
###### propagate binode cases
case And:
+ case AndThen:
case Or:
+ case OrElse:
case Not:
/* both must be Tbool, result is Tbool */
propagate_types(b->left, c, ok, Tbool, 0);
right = interp_exec(b->right);
rv.bool = rv.bool && right.bool;
break;
+ case AndThen:
+ rv = interp_exec(b->left);
+ if (rv.bool)
+ rv = interp_exec(b->right);
+ break;
case Or:
rv = interp_exec(b->left);
right = interp_exec(b->right);
rv.bool = rv.bool || right.bool;
break;
+ case OrElse:
+ rv = interp_exec(b->left);
+ if (!rv.bool)
+ rv = interp_exec(b->right);
+ break;
case Not:
rv = interp_exec(b->right);
rv.bool = !rv.bool;
case GtrEq:
case Eql:
case NEql:
- /* Both must match but not labels, result is Tbool */
+ /* Both must match but not be labels, result is Tbool */
t = propagate_types(b->left, c, ok, NULL, Rnolabel);
if (t)
propagate_types(b->right, c, ok, t, 0);
if (t)
t = propagate_types(b->left, c, ok, t, 0);
}
- if (!vtype_compat(type, Tbool, 0)) {
+ if (!type_compat(type, Tbool, 0)) {
type_err(c, "error: Comparison returns %1 but %2 expected", prog,
Tbool, rules, type);
*ok = 0;
absolute value and negation). These have different operator names.
We also have a 'Bracket' operator which records where parentheses were
-found. This make it easy to reproduce these when printing. Once
+found. This makes it easy to reproduce these when printing. Once
precedence is handled better I might be able to discard this.
###### Binode types
Plus, Minus,
- Times, Divide,
+ Times, Divide, Rem,
Concat,
Absolute, Negate,
Bracket,
Top -> * ${ $0.op = Times; }$
| / ${ $0.op = Divide; }$
+ | % ${ $0.op = Rem; }$
| ++ ${ $0.op = Concat; }$
###### print binode cases
case Times:
case Divide:
case Concat:
+ case Rem:
print_exec(b->left, indent, 0);
switch(b->op) {
- case Plus: printf(" + "); break;
- case Minus: printf(" - "); break;
- case Times: printf(" * "); break;
- case Divide: printf(" / "); break;
- case Concat: printf(" ++ "); break;
+ case Plus: fputs(" + ", stdout); break;
+ case Minus: fputs(" - ", stdout); break;
+ case Times: fputs(" * ", stdout); break;
+ case Divide: fputs(" / ", stdout); break;
+ case Rem: fputs(" % ", stdout); break;
+ case Concat: fputs(" ++ ", stdout); break;
default: abort();
}
print_exec(b->right, indent, 0);
case Plus:
case Minus:
case Times:
+ case Rem:
case Divide:
/* both must be numbers, result is Tnum */
case Absolute:
* unary ops fit here too */
propagate_types(b->left, c, ok, Tnum, 0);
propagate_types(b->right, c, ok, Tnum, 0);
- if (!vtype_compat(type, Tnum, 0)) {
+ if (!type_compat(type, Tnum, 0)) {
type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
Tnum, rules, type);
*ok = 0;
/* both must be Tstr, result is Tstr */
propagate_types(b->left, c, ok, Tstr, 0);
propagate_types(b->right, c, ok, Tstr, 0);
- if (!vtype_compat(type, Tstr, 0)) {
+ if (!type_compat(type, Tstr, 0)) {
type_err(c, "error: Concat returns %1 but %2 expected", prog,
Tstr, rules, type);
*ok = 0;
right = interp_exec(b->right);
mpq_div(rv.num, rv.num, right.num);
break;
+ case Rem: {
+ mpz_t l, r, rem;
+
+ left = interp_exec(b->left);
+ right = interp_exec(b->right);
+ mpz_init(l); mpz_init(r); mpz_init(rem);
+ mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
+ mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
+ mpz_tdiv_r(rem, l, r);
+ rv = val_init(Tnum);
+ mpq_set_z(rv.num, rem);
+ mpz_clear(r); mpz_clear(l); mpz_clear(rem);
+ break;
+ }
case Negate:
rv = interp_exec(b->right);
mpq_neg(rv.num, rv.num);
rv.str = text_join(left.str, right.str);
break;
+
+###### value functions
+
+ static struct text text_join(struct text a, struct text b)
+ {
+ struct text rv;
+ rv.len = a.len + b.len;
+ rv.txt = malloc(rv.len);
+ memcpy(rv.txt, a.txt, a.len);
+ memcpy(rv.txt+a.len, b.txt, b.len);
+ return rv;
+ }
+
+
### Blocks, Statements, and Statement lists.
Now that we have expressions out of the way we need to turn to
The only stand-alone statement we introduce at this stage is `pass`
which does nothing and is represented as a `NULL` pointer in a `Block`
-list.
+list. Other stand-alone statements will follow once the infrastructure
+is in-place.
###### Binode types
Block,
###### propagate binode cases
case Block:
{
- /* If any statement returns something other then Tnone
+ /* If any statement returns something other than Tnone
* or Tbool then all such must return same type.
* As each statement may be Tnone or something else,
* we must always pass NULL (unknown) down, otherwise an incorrect
Declare,
###### SimpleStatement Grammar
- | Variable = Expression ${ {
- struct var *v = cast(var, $1);
-
+ | Variable = Expression ${
$0 = new(binode);
$0->op = Assign;
$0->left = $<1;
$0->right = $<3;
- if (v->var && !v->var->constant) {
- /* FIXME error? */
- }
- } }$
- | VariableDecl Expression ${
+ }$
+ | VariableDecl = Expression ${
$0 = new(binode);
$0->op = Declare;
$0->left = $<1;
- $0->right =$<2;
+ $0->right =$<3;
+ }$
+
+ | VariableDecl ${
+ if ($1->var->where_set == NULL) {
+ type_err(config2context(config), "Variable declared with no type or value: %v",
+ $1, NULL, 0, NULL);
+ } else {
+ $0 = new(binode);
+ $0->op = Declare;
+ $0->left = $<1;
+ $0->right = NULL;
+ }
}$
###### print binode cases
do_indent(indent, "");
print_exec(b->left, indent, 0);
if (cast(var, b->left)->var->constant) {
- if (v->where_decl == v->where_set)
- printf("::%.*s = ", v->val.type->name.len,
- v->val.type->name.txt);
- else
- printf(" ::= ");
+ if (v->where_decl == v->where_set) {
+ printf("::");
+ type_print(v->val.type, stdout);
+ printf(" ");
+ } else
+ printf(" ::");
} else {
- if (v->where_decl == v->where_set)
- printf(":%.*s = ", v->val.type->name.len,
- v->val.type->name.txt);
- else
- printf(" := ");
+ if (v->where_decl == v->where_set) {
+ printf(":");
+ type_print(v->val.type, stdout);
+ printf(" ");
+ } else
+ printf(" :");
+ }
+ if (b->right) {
+ printf("= ");
+ print_exec(b->right, indent, 0);
}
- print_exec(b->right, indent, 0);
if (indent >= 0)
printf("\n");
}
case Assign:
case Declare:
- /* Both must match and not be labels, result is Tnone */
- t = propagate_types(b->left, c, ok, NULL, Rnolabel);
+ /* Both must match and not be labels,
+ * Type must support 'dup',
+ * For Assign, left must not be constant.
+ * result is Tnone
+ */
+ t = propagate_types(b->left, c, ok, NULL,
+ Rnolabel | (b->op == Assign ? Rnoconstant : 0));
+ if (!b->right)
+ return Tnone;
+
if (t) {
if (propagate_types(b->right, c, ok, t, 0) != t)
if (b->left->type == Xvar)
} else {
t = propagate_types(b->right, c, ok, NULL, Rnolabel);
if (t)
- propagate_types(b->left, c, ok, t, 0);
+ propagate_types(b->left, c, ok, t,
+ (b->op == Assign ? Rnoconstant : 0));
+ }
+ if (t && t->dup == NULL) {
+ type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
+ *ok = 0;
}
return Tnone;
###### interp binode cases
case Assign:
+ lleft = linterp_exec(b->left);
+ right = interp_exec(b->right);
+ if (lleft) {
+ free_value(*lleft);
+ *lleft = right;
+ } else
+ free_value(right);
+ right.type = NULL;
+ break;
+
case Declare:
{
struct variable *v = cast(var, b->left)->var;
if (v->merged)
v = v->merged;
- right = interp_exec(b->right);
+ if (b->right)
+ right = interp_exec(b->right);
+ else
+ right = val_init(v->val.type);
free_value(v->val);
v->val = right;
right.type = NULL;
// thenpart must return Tnone if there is a dopart,
// otherwise it is like elsepart.
// condpart must:
- // be bool if there is not casepart
+ // be bool if there is no casepart
// match casepart->values if there is a switchpart
// either be bool or match casepart->value if there
// is a whilepart
- // elsepart, casepart->action must match there return type
- // expected of this statement.
+ // elsepart and casepart->action must match the return type
+ // expected of this statement.
struct cond_statement *cs = cast(cond_statement, prog);
struct casepart *cp;
t = propagate_types(cs->forpart, c, ok, Tnone, 0);
- if (!vtype_compat(Tnone, t, 0))
+ if (!type_compat(Tnone, t, 0))
*ok = 0;
t = propagate_types(cs->dopart, c, ok, Tnone, 0);
- if (!vtype_compat(Tnone, t, 0))
+ if (!type_compat(Tnone, t, 0))
*ok = 0;
if (cs->dopart) {
t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
- if (!vtype_compat(Tnone, t, 0))
+ if (!type_compat(Tnone, t, 0))
*ok = 0;
}
if (cs->casepart == NULL)
interp_exec(c->dopart);
if (c->thenpart) {
- v = interp_exec(c->thenpart);
- if (v.type != Tnone || !c->dopart)
- return v;
- free_value(v);
+ rv = interp_exec(c->thenpart);
+ if (rv.type != Tnone || !c->dopart)
+ goto Xcond_done;
+ free_value(rv);
}
} while (c->dopart);
if (value_cmp(v, cnd) == 0) {
free_value(v);
free_value(cnd);
- return interp_exec(cp->action);
+ rv = interp_exec(cp->action);
+ goto Xcond_done;
}
free_value(v);
}
free_value(cnd);
if (c->elsepart)
- return interp_exec(c->elsepart);
- v.type = Tnone;
- return v;
+ rv = interp_exec(c->elsepart);
+ else
+ rv.type = Tnone;
+ Xcond_done:
+ break;
+ }
+
+## Complex types
+
+Now that we have the shape of the interpreter in place we can add some
+complex types and connected them in to the data structures and the
+different phases of parse, analyse, print, interpret.
+
+For now, just arrays.
+
+### Arrays
+
+Arrays can be declared by giving a size and a type, as `[size]type' so
+`freq:[26]number` declares `freq` to be an array of 26 numbers. The
+size can be an arbitrary expression which is evaluated when the name
+comes into scope.
+
+Arrays cannot be assigned. When pointers are introduced we will also
+introduce array slices which can refer to part or all of an array -
+the assignment syntax will create a slice. For now, an array can only
+ever be referenced by the name it is declared with. It is likely that
+a "`copy`" primitive will eventually be define which can be used to
+make a copy of an array with controllable depth.
+
+###### type union fields
+
+ struct {
+ int size;
+ struct variable *vsize;
+ struct type *member;
+ } array;
+
+###### value union fields
+ struct {
+ struct value *elmnts;
+ } array;
+
+###### value functions
+
+ static struct value array_prepare(struct type *type)
+ {
+ struct value ret;
+
+ ret.type = type;
+ ret.array.elmnts = NULL;
+ return ret;
+ }
+
+ static struct value array_init(struct type *type)
+ {
+ struct value ret;
+ int i;
+
+ ret.type = type;
+ if (type->array.vsize) {
+ mpz_t q;
+ mpz_init(q);
+ mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
+ mpq_denref(type->array.vsize->val.num));
+ type->array.size = mpz_get_si(q);
+ mpz_clear(q);
+ }
+ ret.array.elmnts = calloc(type->array.size,
+ sizeof(ret.array.elmnts[0]));
+ for (i = 0; ret.array.elmnts && i < type->array.size; i++)
+ ret.array.elmnts[i] = val_init(type->array.member);
+ return ret;
+ }
+
+ static void array_free(struct value val)
+ {
+ int i;
+
+ if (val.array.elmnts)
+ for (i = 0; i < val.type->array.size; i++)
+ free_value(val.array.elmnts[i]);
+ free(val.array.elmnts);
+ }
+
+ static int array_compat(struct type *require, struct type *have)
+ {
+ if (have->compat != require->compat)
+ return 0;
+ /* Both are arrays, so we can look at details */
+ if (!type_compat(require->array.member, have->array.member, 0))
+ return 0;
+ if (require->array.vsize == NULL && have->array.vsize == NULL)
+ return require->array.size == have->array.size;
+
+ return require->array.vsize == have->array.vsize;
+ }
+
+ static void array_print_type(struct type *type, FILE *f)
+ {
+ fputs("[", f);
+ if (type->array.vsize) {
+ struct binding *b = type->array.vsize->name;
+ fprintf(f, "%.*s]", b->name.len, b->name.txt);
+ } else
+ fprintf(f, "%d]", type->array.size);
+ type_print(type->array.member, f);
+ }
+
+ static struct type array_prototype = {
+ .prepare = array_prepare,
+ .init = array_init,
+ .print_type = array_print_type,
+ .compat = array_compat,
+ .free = array_free,
+ };
+
+###### type grammar
+
+ | [ NUMBER ] Type ${
+ $0 = calloc(1, sizeof(struct type));
+ *($0) = array_prototype;
+ $0->array.member = $<4;
+ $0->array.vsize = NULL;
+ {
+ struct parse_context *c = config2context(config);
+ char tail[3];
+ mpq_t num;
+ if (number_parse(num, tail, $2.txt) == 0)
+ tok_err(c, "error: unrecognised number", &$2);
+ else if (tail[0])
+ tok_err(c, "error: unsupported number suffix", &$2);
+ else {
+ $0->array.size = mpz_get_ui(mpq_numref(num));
+ if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
+ tok_err(c, "error: array size must be an integer",
+ &$2);
+ } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
+ tok_err(c, "error: array size is too large",
+ &$2);
+ mpq_clear(num);
+ }
+ $0->next= c->anon_typelist;
+ c->anon_typelist = $0;
+ }
+ }$
+
+ | [ IDENTIFIER ] Type ${ {
+ struct parse_context *c = config2context(config);
+ struct variable *v = var_ref(c, $2.txt);
+
+ if (!v)
+ tok_err(config2context(config), "error: name undeclared", &$2);
+ else if (!v->constant)
+ tok_err(config2context(config), "error: array size must be a constant", &$2);
+
+ $0 = calloc(1, sizeof(struct type));
+ *($0) = array_prototype;
+ $0->array.member = $<4;
+ $0->array.size = 0;
+ $0->array.vsize = v;
+ $0->next= c->anon_typelist;
+ c->anon_typelist = $0;
+ } }$
+
+###### parse context
+
+ struct type *anon_typelist;
+
+###### free context types
+
+ while (context.anon_typelist) {
+ struct type *t = context.anon_typelist;
+
+ context.anon_typelist = t->next;
+ free(t);
+ }
+
+###### Binode types
+ Index,
+
+###### variable grammar
+
+ | Variable [ Expression ] ${ {
+ struct binode *b = new(binode);
+ b->op = Index;
+ b->left = $<1;
+ b->right = $<3;
+ $0 = b;
+ } }$
+
+###### print binode cases
+ case Index:
+ print_exec(b->left, -1, 0);
+ printf("[");
+ print_exec(b->right, -1, 0);
+ printf("]");
+ break;
+
+###### propagate binode cases
+ case Index:
+ /* left must be an array, right must be a number,
+ * result is the member type of the array
+ */
+ propagate_types(b->right, c, ok, Tnum, 0);
+ t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
+ if (!t || t->compat != array_compat) {
+ type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
+ *ok = 0;
+ return NULL;
+ } else {
+ if (!type_compat(type, t->array.member, rules)) {
+ type_err(c, "error: have %1 but need %2", prog,
+ t->array.member, rules, type);
+ *ok = 0;
+ }
+ return t->array.member;
+ }
+ break;
+
+###### interp binode cases
+ case Index: {
+ mpz_t q;
+ long i;
+
+ lleft = linterp_exec(b->left);
+ right = interp_exec(b->right);
+ mpz_init(q);
+ mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
+ i = mpz_get_si(q);
+ mpz_clear(q);
+
+ if (i >= 0 && i < lleft->type->array.size)
+ lrv = &lleft->array.elmnts[i];
+ else
+ rv = val_init(lleft->type->array.member);
+ break;
}
### Finally the whole program.
###### Parser: grammar
$*binode
- Program -> InitProgram OpenScope Varlist Block OptNL ${
+ Program -> program OpenScope Varlist Block OptNL ${
$0 = new(binode);
$0->op = Program;
$0->left = reorder_bilist($<3);
}$
| ERROR ${
tok_err(config2context(config),
- "error: unhandled parse error.", &$1);
- }$
-
- InitProgram -> program ${
- context_init(config2context(config));
+ "error: unhandled parse error", &$1);
}$
Varlist -> Varlist ArgDecl ${
struct var *v = cast(var, b->left);
if (!v->var->val.type) {
v->var->where_set = b;
- v->var->val = val_init(Tstr);
+ v->var->val = val_prepare(Tstr);
}
}
b = cast(binode, prog);
/* If a variable is not used after the 'if', no
* merge happens, so types can be different
*/
- if A * 2 > B:
+ if A > B * 2:
double:string = "yes"
print A, "is more than twice", B, "?", double
else:
- double := A*2
- print "double", A, "is only", double
+ double := B*2
+ print "double", B, "is", double
- a := A;
+ a : number
+ a = A;
b:number = B
- if a > 0 and b > 0:
+ if a > 0 and then b > 0:
while a != b:
if a < b:
b = b - a
print "Yay, I found", target
case GiveUp:
print "Closest I found was", mid
+
+ size::=55
+ list:[size]number
+ list[0] = 1234
+ for i:=1; then i = i + 1; while i < size:
+ n := list[i-1] * list[i-1]
+ list[i] = (n / 100) % 10000
+
+ print "Before sort:"
+ for i:=0; then i = i + 1; while i < size:
+ print "list[",i,"]=",list[i]
+
+ for i := 1; then i=i+1; while i < size:
+ for j:=i-1; then j=j-1; while j >= 0:
+ if list[j] > list[j+1]:
+ t:= list[j]
+ list[j] = list[j+1]
+ list[j+1] = t
+ print "After sort:"
+ for i:=0; then i = i + 1; while i < size:
+ print "list[",i,"]=",list[i]