struct parse_context {
struct token_config config;
char *file_name;
+ int parse_error;
## parse context
};
} else
prog = parse_oceani(s->code, &context.config,
dotrace ? stderr : NULL);
+ if (!prog) {
+ fprintf(stderr, "oceani: fatal parser error.\n");
+ context.parse_error = 1;
+ }
if (prog && doprint)
print_exec(*prog, 0, brackets);
- if (prog && doexec) {
+ if (prog && doexec && !context.parse_error) {
if (!analyse_prog(*prog, &context)) {
fprintf(stderr, "oceani: type error in program - not running.\n");
exit(1);
s = t;
}
## free context
- exit(0);
+ exit(context.parse_error ? 1 : 0);
}
### Analysis
#### Error reporting
When analysis discovers an inconsistency it needs to report an error;
-just refusing to run the code esure that the error doesn't cascade,
+just refusing to run the code ensures that the error doesn't cascade,
but by itself it isn't very useful. A clear understand of the sort of
error message that are useful will help guide the process of analysis.
will detect that one argument is not a number and the usage of `hello`
will detect that a number was wanted, but not provided. In this
(early) version of the language, we will generate error reports at
-multiple locations, to the use of `hello` will report an error and
+multiple locations, so the use of `hello` will report an error and
explain were the value was set, and the addition will report an error
and say why numbers are needed. To be able to report locations for
errors, each language element will need to record a file location
}
}
fputs("\n", stderr);
+ c->parse_error = 1;
+ }
+
+ static void tok_err(struct parse_context *c, char *fmt, struct token *t)
+ {
+ fprintf(stderr, "%s:%d:%d: %s\n", c->file_name, t->line, t->col, fmt);
+ c->parse_error = 1;
}
## Data Structures
we also need to allow for places where no value is meaningful
(`Vnone`) and where we don't know what type to expect yet (`Vunknown`
which can be anything and `Vnolabel` which can be anything except a
-label). A 2 character 'tail' is included in each value as the scanner
-wants to parse that from the end of numbers and we need somewhere to
-put it. It is currently ignored but one day might allow for
-e.g. "imaginary" numbers.
+label).
Values are never shared, they are always copied when used, and freed
when no longer needed.
When propagating type information around the program, we need to
determine if two types are compatible, where `Vunknown` is compatible
-which anything, and `Vnolabel` is compatible with anything except a
-label. A separate funtion to encode this rule will simplify some code
+with anything, and `Vnolabel` is compatible with anything except a
+label. A separate function to encode this rule will simplify some code
later.
-When assigning command line arguments to variable, we need to be able
+There is an extra complication that this function needs to handle,
+which is described later when the Conditional Statement is introduced.
+In certain cases where a particular type is generally expected, a
+Boolean is also alway permitted. To handle those cases, we explicitly tell
+`vtype_compat()` is a Boolean is permitted.
+
+When assigning command line arguments to variables, we need to be able
to parse each type from a string.
###### includes
int bool;
void *label;
};
- char tail[2];
};
char *vtype_names[] = {"nolabel", "unknown", "none", "string",
{
struct text tx;
int neg = 0;
+ char tail[3] = "";
+
switch(vl->vtype) {
case Vnolabel:
case Vlabel:
arg++;
}
tx.txt = arg; tx.len = strlen(tx.txt);
- if (number_parse(vl->num, vl->tail, tx) == 0)
+ if (number_parse(vl->num, tail, tx) == 0)
mpq_init(vl->num);
else if (neg)
mpq_neg(vl->num, vl->num);
+ if (tail[0]) {
+ printf("Unsupported suffix: %s\n", arg);
+ return 0;
+ }
break;
case Vbool:
if (strcasecmp(arg, "true") == 0 ||
struct variable *previous;
struct value val;
struct binding *name;
+ struct exec *where_decl;// where name was declared
struct exec *where_set; // where type was set
## variable fields
};
Note that names declared inside a loop (which is only parallel to
itself) are never visible after the loop. Similarly names defined in
scopes which are not parallel, such as those started by `for` and
-`switch`, are never visible after the scope. Only variable defined in
+`switch`, are never visible after the scope. Only variables defined in
both `then` and `else` (including the implicit then after an `if`, and
excluding `then` used with `for`) and in all `case`s and `else` of a
`switch` or `while` can be visible beyond the `if`/`switch`/`while`.
least to the end of the immediately containing block and conditionally
in any larger containing block which does not declare the name in some
other way. Importantly, the conditional scope extension happens even
-if the label is only used in parallel branch of a conditional -- when
-used in one branch it is treated as having been declared in all
+if the label is only used in one parallel branch of a conditional --
+when used in one branch it is treated as having been declared in all
branches.
Merge candidates are tentatively visible beyond the end of the
To push a new frame *before* any code in the frame is parsed, we need a
grammar reduction. This is most easily achieved with a grammar
-element which derives the empty string, and created the new scope when
+element which derives the empty string, and creates the new scope when
it is recognized. This can be placed, for example, between a keyword
like "if" and the code following it.
- "in scope". This is the case between the declaration of the
variable and the end of the containing block, and also between
- the usage with affirms a merge and the end of the block.
+ the usage with affirms a merge and the end of that block.
The scope depth is not greater than the current parse context scope
nest depth. When the block of that depth closes, the state will
switch (v ? v->scope : OutScope) {
case InScope:
- /* Signal error ... once I build error signalling support */
+ /* Caller will report the error */
return NULL;
case CondScope:
for (;
executables. This allows for expressions and lists etc. Other times
an executable is something quite specific like a constant or variable
name. So we define a `struct exec` to be a general executable with a
-type, and a `struct binode` which is a subclass of `exec` and forms a
-node in a binary tree and holding an operation. There will be other
+type, and a `struct binode` which is a subclass of `exec`, forms a
+node in a binary tree, and holds an operation. There will be other
subclasses, and to access these we need to be able to `cast` the
`exec` into the various other types.
#### Freeing
The parser generator requires a `free_foo` function for each struct
-that stores attributes and they will be `exec`s of subtypes there-of.
+that stores attributes and they will be `exec`s and subtypes there-of.
So we need `free_exec` which can handle all the subtypes, and we need
`free_binode`.
static void print_exec(struct exec *e, int indent, int bracket)
{
+ if (!e)
+ return;
switch (e->type) {
case Xbinode:
print_binode(cast(binode, e), indent, bracket); break;
### Values
We have already met values as separate objects. When manifest
-constants appear in the program text that must result in an executable
+constants appear in the program text, that must result in an executable
which has a constant value. So the `val` structure embeds a value in
an executable.
| NUMBER ${
$0 = new_pos(val, $1);
$0->val.vtype = Vnum;
- if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
+ {
+ char tail[3];
+ if (number_parse($0->val.num, tail, $1.txt) == 0)
mpq_init($0->val.num);
+ if (tail[0])
+ tok_err(config2context(config), "error: unsupported number suffix.",
+ &$1);
+ }
}$
| STRING ${
$0 = new_pos(val, $1);
$0->val.vtype = Vstr;
- string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
+ {
+ char tail[3];
+ string_parse(&$1, '\\', &$0->val.str, tail);
+ if (tail[0])
+ tok_err(config2context(config), "error: unsupported string suffix.",
+ &$1);
+ }
}$
| MULTI_STRING ${
$0 = new_pos(val, $1);
$0->val.vtype = Vstr;
- string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
+ {
+ char tail[3];
+ string_parse(&$1, '\\', &$0->val.str, tail);
+ if (tail[0])
+ tok_err(config2context(config), "error: unsupported string suffix.",
+ &$1);
+ }
}$
###### print exec cases
### Variables
-Just as we used as `val` to wrap a value into an `exec`, we similarly
+Just as we used a `val` to wrap a value into an `exec`, we similarly
need a `var` to wrap a `variable` into an exec. While each `val`
contained a copy of the value, each `var` hold a link to the variable
because it really is the same variable no matter where it appears.
struct variable *v = var_decl(config2context(config), $1.txt);
$0 = new_pos(var, $1);
$0->var = v;
+ if (v)
+ v->where_decl = $0;
+ else {
+ v = var_ref(config2context(config), $1.txt);
+ $0->var = v;
+ type_err(config2context(config), "error: variable '%v' redeclared",
+ $0, Vnone, Vnone);
+ type_err(config2context(config), "info: this is where '%v' was first declared",
+ v->where_decl, Vnone, Vnone);
+ }
} }$
| IDENTIFIER ::= ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
- v->constant = 1;
$0 = new_pos(var, $1);
$0->var = v;
+ if (v) {
+ v->where_decl = $0;
+ v->constant = 1;
+ } else {
+ v = var_ref(config2context(config), $1.txt);
+ $0->var = v;
+ type_err(config2context(config), "error: variable '%v' redeclared",
+ $0, Vnone, Vnone);
+ type_err(config2context(config), "info: this is where '%v' was first declared",
+ v->where_decl, Vnone, Vnone);
+ }
} }$
Variable -> IDENTIFIER ${ {
return type;
}
if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
- type_err(c, "error: expected %1 but variable %v is %2", prog,
+ type_err(c, "error: expected %1 but variable '%v' is %2", prog,
type, v->val.vtype);
- type_err(c, "info: this is where %v was set to %1", v->where_set,
+ type_err(c, "info: this is where '%v' was set to %1", v->where_set,
v->val.vtype, Vnone);
*ok = 0;
}
A comparison takes arguments of any type, but the two types must be
the same.
-To simplify the parsing we introduce an `eop` which can return an
+To simplify the parsing we introduce an `eop` which can record an
expression operator.
###### ast
### Expressions: The rest
The remaining expressions with the highest precedence are arithmetic
-and string concatenation. There are `Expr`, `Term`, and `Factor`.
+and string concatenation. They are `Expr`, `Term`, and `Factor`.
The `Factor` is where the `Value` and `Variable` that we already have
are included.
The later comprise complex statements and simple statement lists. They are
separated by newlines. Thus the semicolon is only used to separate
simple statements on the one line. This may be overly restrictive,
-but I'm not sure I every want a complex statement to share a line with
+but I'm not sure I ever want a complex statement to share a line with
anything else.
Note that a simple statement list can still use multiple lines if
if (t > Vunknown) {
if (propagate_types(b->right, c, ok, t, 0) != t)
if (b->left->type == Xvar)
- type_err(c, "info: variable %v was set as %1 here.",
+ type_err(c, "info: variable '%v' was set as %1 here.",
cast(var, b->left)->var->where_set, t, Vnone);
} else {
t = propagate_types(b->right, c, ok, Vnolabel, 0);
subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
It is comprised of a number of parts, all of which are optional though
set combinations apply. Each part is (usually) a key word (`then` is
-sometimes optional) followed by either an expression of a code block,
+sometimes optional) followed by either an expression or a code block,
except the `casepart` which is a "key word and an expression" followed
by a code block. The code-block option is valid for all parts and,
where an expression is also allowed, the code block can use the `use`
-statement to report a value. If the code block does no report a value
-the effect is similar to reporting `False`.
+statement to report a value. If the code block does not report a value
+the effect is similar to reporting `True`.
The `else` and `case` parts, as well as `then` when combined with
`if`, can contain a `use` statement which will apply to some
The type requirements on the code block in a `whilepart` are quite
unusal. It is allowed to return a value of some identifiable type, in
-which case the loop abort and an appropriate `casepart` is run, or it
+which case the loop aborts and an appropriate `casepart` is run, or it
can return a Boolean, in which case the loop either continues to the
`dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
This is different both from the `ifpart` code block which is expected to
struct value v, cnd;
struct casepart *cp;
struct cond_statement *c = cast(cond_statement, e);
+
if (c->forpart)
interp_exec(c->forpart);
do {
if (!(cnd.vtype == Vnone ||
(cnd.vtype == Vbool && cnd.bool != 0)))
break;
- if (c->dopart) {
- free_value(cnd);
+ // cnd is Vnone or Vbool, doesn't need to be freed
+ if (c->dopart)
interp_exec(c->dopart);
- }
+
if (c->thenpart) {
v = interp_exec(c->thenpart);
if (v.vtype != Vnone || !c->dopart)
$0->right = $<4;
var_block_close(config2context(config), CloseSequential);
if (config2context(config)->scope_stack) abort();
- }$
+ }$
+ | ERROR ${
+ tok_err(config2context(config),
+ "error: unhandled parse error.", &$1);
+ }$
Varlist -> Varlist ArgDecl ${
$0 = new(binode);
struct binode *b = cast(binode, prog);
int ok = 1;
+ if (!b)
+ return 0;
do {
ok = 1;
propagate_types(b->right, c, &ok, Vnone, 0);
static void interp_prog(struct exec *prog, char **argv)
{
struct binode *p = cast(binode, prog);
- struct binode *al = cast(binode, p->left);
+ struct binode *al;
struct value v;
+ if (!prog)
+ return;
+ al = cast(binode, p->left);
while (al) {
struct var *v = cast(var, al->left);
struct value *vl = &v->var->val;