for validating the parsing.
So the main requirements of the interpreter are:
-- Parse the program, possible with tracing
+- Parse the program, possibly with tracing
- Analyse the parsed program to ensure consistency
- print the program
- execute the program
struct parse_context {
struct token_config config;
char *file_name;
+ int parse_error;
## parse context
};
} else
prog = parse_oceani(s->code, &context.config,
dotrace ? stderr : NULL);
+ if (!prog) {
+ fprintf(stderr, "oceani: fatal parser error.\n");
+ context.parse_error = 1;
+ }
if (prog && doprint)
print_exec(*prog, 0, brackets);
- if (prog && doexec) {
+ if (prog && doexec && !context.parse_error) {
if (!analyse_prog(*prog, &context)) {
fprintf(stderr, "oceani: type error in program - not running.\n");
exit(1);
s = t;
}
## free context
- exit(0);
+ exit(context.parse_error ? 1 : 0);
}
### Analysis
#### Error reporting
When analysis discovers an inconsistency it needs to report an error;
-just refusing to run the code esure that the error doesn't cascade,
+just refusing to run the code ensures that the error doesn't cascade,
but by itself it isn't very useful. A clear understand of the sort of
error message that are useful will help guide the process of analysis.
will detect that one argument is not a number and the usage of `hello`
will detect that a number was wanted, but not provided. In this
(early) version of the language, we will generate error reports at
-multiple locations, to the use of `hello` will report an error and
+multiple locations, so the use of `hello` will report an error and
explain were the value was set, and the addition will report an error
and say why numbers are needed. To be able to report locations for
errors, each language element will need to record a file location
of an error message indicates one location in the file, and up to 2
types. So we provide a `printf`-like function which takes a format, a
language (a `struct exec` which has not yet been introduced), and 2
-types. "`$1`" reports the first type, "`$2`" reports the second. We
+types. "`%1`" reports the first type, "`%2`" reports the second. We
will need a function to print the location, once we know how that is
-stored.
+stored. As will be explained later, there are sometimes extra rules for
+type matching and they might affect error messages, we need to pass those
+in too.
###### forward decls
static void type_err(struct parse_context *c,
char *fmt, struct exec *loc,
- enum vtype t1, enum vtype t2)
+ enum vtype t1, int rules, enum vtype t2)
{
fprintf(stderr, "%s:", c->file_name);
fput_loc(loc, stderr);
}
}
fputs("\n", stderr);
+ c->parse_error = 1;
+ }
+
+ static void tok_err(struct parse_context *c, char *fmt, struct token *t)
+ {
+ fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
+ c->parse_error = 1;
}
## Data Structures
Values can be numbers, which we represent as multi-precision
fractions, strings, Booleans and labels. When analysing the program
we also need to allow for places where no value is meaningful
-(`Vnone`) and where we don't know what type to expect yet (`Vunknown`
-which can be anything and `Vnolabel` which can be anything except a
-label). A 2 character 'tail' is included in each value as the scanner
-wants to parse that from the end of numbers and we need somewhere to
-put it. It is currently ignored but one day might allow for
-e.g. "imaginary" numbers.
+(`Vnone`) and where we don't know what type to expect yet (`Vunknown`).
Values are never shared, they are always copied when used, and freed
when no longer needed.
When propagating type information around the program, we need to
determine if two types are compatible, where `Vunknown` is compatible
-which anything, and `Vnolabel` is compatible with anything except a
-label. A separate funtion to encode this rule will simplify some code
-later.
+with anything. There are two special cases with type compatibility,
+both related to the Conditional Statement which will be described
+later. In some cases a Boolean can be accepted as well as some other
+primary type, and in others any type is acceptable except a label (`Vlabel`).
+A separate function encode these cases will simplify some code later.
-When assigning command line arguments to variable, we need to be able
+When assigning command line arguments to variables, we need to be able
to parse each type from a string.
###### includes
###### ast
struct value {
- enum vtype {Vnolabel, Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
+ enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
union {
struct text str;
mpq_t num;
int bool;
void *label;
};
- char tail[2];
};
- char *vtype_names[] = {"nolabel", "unknown", "none", "string",
+ enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
+
+ char *vtype_names[] = {"unknown", "none", "string",
"number", "Boolean", "label"};
+###### format cases
+ case 'r':
+ if (rules & Rnolabel)
+ fputs(" (labels not permitted)", stderr);
+ break;
+
###### ast functions
static void free_value(struct value v)
{
switch (v.vtype) {
case Vnone:
- case Vnolabel:
case Vunknown: break;
case Vstr: free(v.str.txt); break;
case Vnum: mpq_clear(v.num); break;
}
}
- static int vtype_compat(enum vtype require, enum vtype have, int bool_permitted)
+ static int vtype_compat(enum vtype require, enum vtype have, int rules)
{
- if (bool_permitted && have == Vbool)
+ if ((rules & Rboolok) && have == Vbool)
return 1;
- switch (require) {
- case Vnolabel:
- return have != Vlabel;
- case Vunknown:
+ if ((rules & Rnolabel) && have == Vlabel)
+ return 0;
+ if (require == Vunknown || have == Vunknown)
return 1;
- default:
- return have == Vunknown || require == have;
- }
+
+ return require == have;
}
###### value functions
val->vtype = type;
switch(type) {
case Vnone:abort();
- case Vnolabel:
case Vunknown: break;
case Vnum:
mpq_init(val->num); break;
rv.vtype = v.vtype;
switch (rv.vtype) {
case Vnone:
- case Vnolabel:
case Vunknown: break;
case Vlabel:
rv.label = v.label;
case Vstr: cmp = text_cmp(left.str, right.str); break;
case Vbool: cmp = left.bool - right.bool; break;
case Vnone:
- case Vnolabel:
case Vunknown: cmp = 0;
}
return cmp;
case Vunknown:
printf("*Unknown*"); break;
case Vnone:
- case Vnolabel:
printf("*no-value*"); break;
case Vlabel:
printf("*label-%p*", v.label); break;
{
struct text tx;
int neg = 0;
+ char tail[3] = "";
+
switch(vl->vtype) {
- case Vnolabel:
case Vlabel:
case Vunknown:
case Vnone:
arg++;
}
tx.txt = arg; tx.len = strlen(tx.txt);
- if (number_parse(vl->num, vl->tail, tx) == 0)
+ if (number_parse(vl->num, tail, tx) == 0)
mpq_init(vl->num);
else if (neg)
mpq_neg(vl->num, vl->num);
+ if (tail[0]) {
+ printf("Unsupported suffix: %s\n", arg);
+ return 0;
+ }
break;
case Vbool:
if (strcasecmp(arg, "true") == 0 ||
struct variable *previous;
struct value val;
struct binding *name;
+ struct exec *where_decl;// where name was declared
struct exec *where_set; // where type was set
## variable fields
};
Note that names declared inside a loop (which is only parallel to
itself) are never visible after the loop. Similarly names defined in
scopes which are not parallel, such as those started by `for` and
-`switch`, are never visible after the scope. Only variable defined in
+`switch`, are never visible after the scope. Only variables defined in
both `then` and `else` (including the implicit then after an `if`, and
excluding `then` used with `for`) and in all `case`s and `else` of a
`switch` or `while` can be visible beyond the `if`/`switch`/`while`.
least to the end of the immediately containing block and conditionally
in any larger containing block which does not declare the name in some
other way. Importantly, the conditional scope extension happens even
-if the label is only used in parallel branch of a conditional -- when
-used in one branch it is treated as having been declared in all
+if the label is only used in one parallel branch of a conditional --
+when used in one branch it is treated as having been declared in all
branches.
Merge candidates are tentatively visible beyond the end of the
To push a new frame *before* any code in the frame is parsed, we need a
grammar reduction. This is most easily achieved with a grammar
-element which derives the empty string, and created the new scope when
+element which derives the empty string, and creates the new scope when
it is recognized. This can be placed, for example, between a keyword
like "if" and the code following it.
- "in scope". This is the case between the declaration of the
variable and the end of the containing block, and also between
- the usage with affirms a merge and the end of the block.
+ the usage with affirms a merge and the end of that block.
The scope depth is not greater than the current parse context scope
nest depth. When the block of that depth closes, the state will
switch (v ? v->scope : OutScope) {
case InScope:
- /* Signal error ... once I build error signalling support */
+ /* Caller will report the error */
return NULL;
case CondScope:
for (;
executables. This allows for expressions and lists etc. Other times
an executable is something quite specific like a constant or variable
name. So we define a `struct exec` to be a general executable with a
-type, and a `struct binode` which is a subclass of `exec` and forms a
-node in a binary tree and holding an operation. There will be other
+type, and a `struct binode` which is a subclass of `exec`, forms a
+node in a binary tree, and holds an operation. There will be other
subclasses, and to access these we need to be able to `cast` the
`exec` into the various other types.
#### Freeing
The parser generator requires a `free_foo` function for each struct
-that stores attributes and they will be `exec`s of subtypes there-of.
+that stores attributes and they will be `exec`s and subtypes there-of.
So we need `free_exec` which can handle all the subtypes, and we need
`free_binode`.
the program and looking for errors.
So `propagate_types` is passed an expected type (being a `vtype`
-together with a `bool_permitted` flag) that the `exec` is expected to
+together with some `val_rules` flags) that the `exec` is expected to
return, and returns the type that it does return, either of which can
be `Vunknown`. An `ok` flag is passed by reference. It is set to `0`
when an error is found, and `2` when any change is made. If it
###### core functions
static enum vtype propagate_types(struct exec *prog, struct parse_context *c, int *ok,
- enum vtype type, int bool_permitted)
+ enum vtype type, int rules)
{
enum vtype t;
### Values
We have already met values as separate objects. When manifest
-constants appear in the program text that must result in an executable
+constants appear in the program text, that must result in an executable
which has a constant value. So the `val` structure embeds a value in
an executable.
| NUMBER ${
$0 = new_pos(val, $1);
$0->val.vtype = Vnum;
- if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
+ {
+ char tail[3];
+ if (number_parse($0->val.num, tail, $1.txt) == 0)
mpq_init($0->val.num);
+ if (tail[0])
+ tok_err(config2context(config), "error: unsupported number suffix.",
+ &$1);
+ }
}$
| STRING ${
$0 = new_pos(val, $1);
$0->val.vtype = Vstr;
- string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
+ {
+ char tail[3];
+ string_parse(&$1, '\\', &$0->val.str, tail);
+ if (tail[0])
+ tok_err(config2context(config), "error: unsupported string suffix.",
+ &$1);
+ }
}$
| MULTI_STRING ${
$0 = new_pos(val, $1);
$0->val.vtype = Vstr;
- string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
+ {
+ char tail[3];
+ string_parse(&$1, '\\', &$0->val.str, tail);
+ if (tail[0])
+ tok_err(config2context(config), "error: unsupported string suffix.",
+ &$1);
+ }
}$
###### print exec cases
case Xval:
{
struct val *val = cast(val, prog);
- if (!vtype_compat(type, val->val.vtype, bool_permitted)) {
- type_err(c, "error: expected %1 found %2",
- prog, type, val->val.vtype);
+ if (!vtype_compat(type, val->val.vtype, rules)) {
+ type_err(c, "error: expected %1%r found %2",
+ prog, type, rules, val->val.vtype);
*ok = 0;
}
return val->val.vtype;
### Variables
-Just as we used as `val` to wrap a value into an `exec`, we similarly
+Just as we used a `val` to wrap a value into an `exec`, we similarly
need a `var` to wrap a `variable` into an exec. While each `val`
contained a copy of the value, each `var` hold a link to the variable
because it really is the same variable no matter where it appears.
struct variable *v = var_decl(config2context(config), $1.txt);
$0 = new_pos(var, $1);
$0->var = v;
+ if (v)
+ v->where_decl = $0;
+ else {
+ v = var_ref(config2context(config), $1.txt);
+ $0->var = v;
+ type_err(config2context(config), "error: variable '%v' redeclared",
+ $0, Vnone, 0, Vnone);
+ type_err(config2context(config), "info: this is where '%v' was first declared",
+ v->where_decl, Vnone, 0, Vnone);
+ }
} }$
| IDENTIFIER ::= ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
- v->constant = 1;
$0 = new_pos(var, $1);
$0->var = v;
+ if (v) {
+ v->where_decl = $0;
+ v->constant = 1;
+ } else {
+ v = var_ref(config2context(config), $1.txt);
+ $0->var = v;
+ type_err(config2context(config), "error: variable '%v' redeclared",
+ $0, Vnone, 0, Vnone);
+ type_err(config2context(config), "info: this is where '%v' was first declared",
+ v->where_decl, Vnone, 0, Vnone);
+ }
} }$
Variable -> IDENTIFIER ${ {
struct var *var = cast(var, prog);
struct variable *v = var->var;
if (!v) {
- type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone);
+ type_err(c, "%d:BUG: no variable!!", prog, Vnone, 0, Vnone);
*ok = 0;
return Vnone;
}
if (v->merged)
v = v->merged;
if (v->val.vtype == Vunknown) {
- if (type > Vunknown && *ok != 0) {
+ if (type != Vunknown && *ok != 0) {
val_init(&v->val, type);
v->where_set = prog;
*ok = 2;
}
return type;
}
- if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
- type_err(c, "error: expected %1 but variable %v is %2", prog,
- type, v->val.vtype);
- type_err(c, "info: this is where %v was set to %1", v->where_set,
- v->val.vtype, Vnone);
+ if (!vtype_compat(type, v->val.vtype, rules)) {
+ type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
+ type, rules, v->val.vtype);
+ type_err(c, "info: this is where '%v' was set to %1", v->where_set,
+ v->val.vtype, rules, Vnone);
*ok = 0;
}
- if (type <= Vunknown)
+ if (type == Vunknown)
return v->val.vtype;
return type;
}
/* both must be Vbool, result is Vbool */
propagate_types(b->left, c, ok, Vbool, 0);
propagate_types(b->right, c, ok, Vbool, 0);
- if (type != Vbool && type > Vunknown) {
+ if (type != Vbool && type != Vunknown) {
type_err(c, "error: %1 operation found where %2 expected", prog,
- Vbool, type);
+ Vbool, 0, type);
*ok = 0;
}
return Vbool;
A comparison takes arguments of any type, but the two types must be
the same.
-To simplify the parsing we introduce an `eop` which can return an
+To simplify the parsing we introduce an `eop` which can record an
expression operator.
###### ast
case Eql:
case NEql:
/* Both must match but not labels, result is Vbool */
- t = propagate_types(b->left, c, ok, Vnolabel, 0);
- if (t > Vunknown)
+ t = propagate_types(b->left, c, ok, Vunknown, Rnolabel);
+ if (t != Vunknown)
propagate_types(b->right, c, ok, t, 0);
else {
- t = propagate_types(b->right, c, ok, Vnolabel, 0);
- if (t > Vunknown)
+ t = propagate_types(b->right, c, ok, Vunknown, Rnolabel);
+ if (t != Vunknown)
t = propagate_types(b->left, c, ok, t, 0);
}
if (!vtype_compat(type, Vbool, 0)) {
type_err(c, "error: Comparison returns %1 but %2 expected", prog,
- Vbool, type);
+ Vbool, rules, type);
*ok = 0;
}
return Vbool;
### Expressions: The rest
The remaining expressions with the highest precedence are arithmetic
-and string concatenation. There are `Expr`, `Term`, and `Factor`.
+and string concatenation. They are `Expr`, `Term`, and `Factor`.
The `Factor` is where the `Value` and `Variable` that we already have
are included.
propagate_types(b->right, c, ok, Vnum, 0);
if (!vtype_compat(type, Vnum, 0)) {
type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
- Vnum, type);
+ Vnum, rules, type);
*ok = 0;
}
return Vnum;
propagate_types(b->right, c, ok, Vstr, 0);
if (!vtype_compat(type, Vstr, 0)) {
type_err(c, "error: Concat returns %1 but %2 expected", prog,
- Vstr, type);
+ Vstr, rules, type);
*ok = 0;
}
return Vstr;
The later comprise complex statements and simple statement lists. They are
separated by newlines. Thus the semicolon is only used to separate
simple statements on the one line. This may be overly restrictive,
-but I'm not sure I every want a complex statement to share a line with
+but I'm not sure I ever want a complex statement to share a line with
anything else.
Note that a simple statement list can still use multiple lines if
struct binode *e;
for (e = b; e; e = cast(binode, e->right)) {
- t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
- if (bool_permitted && t == Vbool)
+ t = propagate_types(e->left, c, ok, Vunknown, rules);
+ if ((rules & Rboolok) && t == Vbool)
t = Vunknown;
if (t != Vunknown && t != Vnone && t != Vbool) {
if (type == Vunknown)
type = t;
else if (t != type) {
- type_err(c, "error: expected %1, found %2",
- e->left, type, t);
+ type_err(c, "error: expected %1%r, found %2",
+ e->left, type, rules, t);
*ok = 0;
}
}
case Print:
/* don't care but all must be consistent */
- propagate_types(b->left, c, ok, Vnolabel, 0);
- propagate_types(b->right, c, ok, Vnolabel, 0);
+ propagate_types(b->left, c, ok, Vunknown, Rnolabel);
+ propagate_types(b->right, c, ok, Vunknown, Rnolabel);
break;
###### interp binode cases
case Assign:
case Declare:
/* Both must match and not be labels, result is Vnone */
- t = propagate_types(b->left, c, ok, Vnolabel, 0);
- if (t > Vunknown) {
+ t = propagate_types(b->left, c, ok, Vunknown, Rnolabel);
+ if (t != Vunknown) {
if (propagate_types(b->right, c, ok, t, 0) != t)
if (b->left->type == Xvar)
- type_err(c, "info: variable %v was set as %1 here.",
- cast(var, b->left)->var->where_set, t, Vnone);
+ type_err(c, "info: variable '%v' was set as %1 here.",
+ cast(var, b->left)->var->where_set, t, rules, Vnone);
} else {
- t = propagate_types(b->right, c, ok, Vnolabel, 0);
- if (t > Vunknown)
+ t = propagate_types(b->right, c, ok, Vunknown, Rnolabel);
+ if (t != Vunknown)
propagate_types(b->left, c, ok, t, 0);
}
return Vnone;
subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
It is comprised of a number of parts, all of which are optional though
set combinations apply. Each part is (usually) a key word (`then` is
-sometimes optional) followed by either an expression of a code block,
+sometimes optional) followed by either an expression or a code block,
except the `casepart` which is a "key word and an expression" followed
by a code block. The code-block option is valid for all parts and,
where an expression is also allowed, the code block can use the `use`
-statement to report a value. If the code block does no report a value
-the effect is similar to reporting `False`.
+statement to report a value. If the code block does not report a value
+the effect is similar to reporting `True`.
The `else` and `case` parts, as well as `then` when combined with
`if`, can contain a `use` statement which will apply to some
The type requirements on the code block in a `whilepart` are quite
unusal. It is allowed to return a value of some identifiable type, in
-which case the loop abort and an appropriate `casepart` is run, or it
+which case the loop aborts and an appropriate `casepart` is run, or it
can return a Boolean, in which case the loop either continues to the
`dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
This is different both from the `ifpart` code block which is expected to
return a Boolean, or the `switchpart` code block which is expected to
return the same type as the casepart values. The correct analysis of
the type of the `whilepart` code block is the reason for the
-`bool_permitted` flag which is passed to `propagate_types()`.
+`Rboolok` flag which is passed to `propagate_types()`.
The `cond_statement` cannot fit into a `binode` so a new `exec` is
defined.
cp && (t == Vunknown); cp = cp->next)
t = propagate_types(cp->value, c, ok, Vunknown, 0);
if (t == Vunknown && cs->condpart)
- t = propagate_types(cs->condpart, c, ok, Vunknown, 1);
+ t = propagate_types(cs->condpart, c, ok, Vunknown, Rboolok);
// Now we have a type (I hope) push it down
if (t != Vunknown) {
for (cp = cs->casepart; cp; cp = cp->next)
propagate_types(cp->value, c, ok, t, 0);
- propagate_types(cs->condpart, c, ok, t, 1);
+ propagate_types(cs->condpart, c, ok, t, Rboolok);
}
}
// (if)then, else, and case parts must return expected type.
if (!cs->dopart && type == Vunknown)
- type = propagate_types(cs->thenpart, c, ok, Vunknown, bool_permitted);
+ type = propagate_types(cs->thenpart, c, ok, Vunknown, rules);
if (type == Vunknown)
- type = propagate_types(cs->elsepart, c, ok, Vunknown, bool_permitted);
+ type = propagate_types(cs->elsepart, c, ok, Vunknown, rules);
for (cp = cs->casepart;
cp && type == Vunknown;
cp = cp->next)
- type = propagate_types(cp->action, c, ok, Vunknown, bool_permitted);
- if (type > Vunknown) {
+ type = propagate_types(cp->action, c, ok, Vunknown, rules);
+ if (type != Vunknown) {
if (!cs->dopart)
- propagate_types(cs->thenpart, c, ok, type, bool_permitted);
- propagate_types(cs->elsepart, c, ok, type, bool_permitted);
+ propagate_types(cs->thenpart, c, ok, type, rules);
+ propagate_types(cs->elsepart, c, ok, type, rules);
for (cp = cs->casepart; cp ; cp = cp->next)
- propagate_types(cp->action, c, ok, type, bool_permitted);
+ propagate_types(cp->action, c, ok, type, rules);
return type;
} else
return Vunknown;
struct value v, cnd;
struct casepart *cp;
struct cond_statement *c = cast(cond_statement, e);
+
if (c->forpart)
interp_exec(c->forpart);
do {
if (!(cnd.vtype == Vnone ||
(cnd.vtype == Vbool && cnd.bool != 0)))
break;
- if (c->dopart) {
- free_value(cnd);
+ // cnd is Vnone or Vbool, doesn't need to be freed
+ if (c->dopart)
interp_exec(c->dopart);
- }
+
if (c->thenpart) {
v = interp_exec(c->thenpart);
if (v.vtype != Vnone || !c->dopart)
$0->right = $<4;
var_block_close(config2context(config), CloseSequential);
if (config2context(config)->scope_stack) abort();
- }$
+ }$
+ | ERROR ${
+ tok_err(config2context(config),
+ "error: unhandled parse error.", &$1);
+ }$
Varlist -> Varlist ArgDecl ${
$0 = new(binode);