## ast
struct parse_context {
struct token_config config;
+ char *file_name;
## parse context
};
fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
exit(1);
}
+ context.file_name = argv[optind];
len = lseek(fd, 0, 2);
file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
s = code_extract(file, file+len, NULL);
print_exec(*prog, 0, brackets);
if (prog && doexec) {
if (!analyse_prog(*prog, &context)) {
- fprintf(stderr, "oceani: type error in program\n");
+ fprintf(stderr, "oceani: type error in program - not running.\n");
exit(1);
}
interp_prog(*prog, argv+optind+1);
flag to indicate that `Vbool` is also permitted.
As there are, as yet, no distinct types that are compatible, there
-isn't much subtlety in the analysis. When we hav distinct number
+isn't much subtlety in the analysis. When we have distinct number
types, this will become more interesting.
+#### Error reporting
+
+When analysis discovers an inconsistency it needs to report an error;
+just refusing to run the code esure that the error doesn't cascade,
+but by itself it isn't very useful. A clear understand of the sort of
+error message that are useful will help guide the process of analysis.
+
+At a simplistic level, the only sort of error that type analysis can
+report is that the type of some construct doesn't match a contextual
+requirement. For example, in `4 + "hello"` the addition provides a
+contextual requirement for numbers, but `"hello"` is not a number. In
+this particular example no further information is needed as the types
+are obvious from local information. When a variable is involved that
+isn't the case. It may be helpful to explain why the variable has a
+particular type, by indicating the location where the type was set,
+whether by declaration or usage.
+
+Using a recursive-descent analysis we can easily detect a problem at
+multiple locations. In "`hello:= "there"; 4 + hello`" the addition
+will detect that one argument is not a number and the usage of `hello`
+will detect that a number was wanted, but not provided. In this
+(early) version of the language, we will generate error reports at
+multiple locations, to the use of `hello` will report an error and
+explain were the value was set, and the addition will report an error
+and say why numbers are needed. To be able to report locations for
+errors, each language element will need to record a file location
+(line and column) and each variable will need to record the language
+element where its type was set. For now we will assume that each line
+of an error message indicates one location in the file, and up to 2
+types. So we provide a `printf`-like function which takes a format, a
+language (a `struct exec` which has not yet been introduced), and 2
+types. "`$1`" reports the first type, "`$2`" reports the second. We
+will need a function to print the location, once we know how that is
+stored.
+
+###### forward decls
+
+ static void fput_loc(struct exec *loc, FILE *f);
+
+###### core functions
+
+ static void type_err(struct parse_context *c,
+ char *fmt, struct exec *loc,
+ enum vtype t1, enum vtype t2)
+ {
+ fprintf(stderr, "%s:", c->file_name);
+ fput_loc(loc, stderr);
+ for (; *fmt ; fmt++) {
+ if (*fmt != '%') {
+ fputc(*fmt, stderr);
+ continue;
+ }
+ fmt++;
+ switch (*fmt) {
+ case '%': fputc(*fmt, stderr); break;
+ default: fputc('?', stderr); break;
+ case '1':
+ fputs(vtype_names[t1], stderr);
+ break;
+ case '2':
+ fputs(vtype_names[t2], stderr);
+ break;
+ ## format cases
+ }
+ }
+ fputs("\n", stderr);
+ }
+
## Data Structures
One last introductory step before detailing the language elements and
char tail[2];
};
+ char *vtype_names[] = {"nolabel", "unknown", "none", "string",
+ "number", "Boolean", "label"};
+
###### ast functions
static void free_value(struct value v)
{
struct variable *previous;
struct value val;
struct binding *name;
+ struct exec *where_set; // where type was set
## variable fields
};
if (__mptr && *__mptr != X##structname) abort(); \
(struct structname *)( (char *)__mptr);})
- #define new(structname) ({ \
+ #define new(structname) ({ \
+ struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
+ __ptr->type = X##structname; \
+ __ptr->line = -1; __ptr->column = -1; \
+ __ptr;})
+
+ #define new_pos(structname, token) ({ \
struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
- __ptr->type = X##structname; \
+ __ptr->type = X##structname; \
+ __ptr->line = token.line; __ptr->column = token.col; \
__ptr;})
###### ast
};
struct exec {
enum exec_types type;
+ int line, column;
};
struct binode {
struct exec;
struct exec *left, *right;
};
+###### ast functions
+
+ static int __fput_loc(struct exec *loc, FILE *f)
+ {
+ if (loc->line >= 0) {
+ fprintf(f, "%d:%d: ", loc->line, loc->column);
+ return 1;
+ }
+ if (loc->type == Xbinode)
+ return __fput_loc(cast(binode,loc)->left, f) ||
+ __fput_loc(cast(binode,loc)->right, f);
+ return 0;
+ }
+ static void fput_loc(struct exec *loc, FILE *f)
+ {
+ if (!__fput_loc(loc, f))
+ fprintf(f, "??:??: ");
+ }
+
Each different type of `exec` node needs a number of functions
defined, a bit like methods. We must be able to be able to free it,
print it, analyse it and execute it. Once we have specific `exec`
$*val
Value -> True ${
- $0 = new(val);
+ $0 = new_pos(val, $1);
$0->val.vtype = Vbool;
$0->val.bool = 1;
}$
| False ${
- $0 = new(val);
+ $0 = new_pos(val, $1);
$0->val.vtype = Vbool;
$0->val.bool = 0;
}$
| NUMBER ${
- $0 = new(val);
+ $0 = new_pos(val, $1);
$0->val.vtype = Vnum;
if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
mpq_init($0->val.num);
}$
| STRING ${
- $0 = new(val);
+ $0 = new_pos(val, $1);
$0->val.vtype = Vstr;
string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
}$
| MULTI_STRING ${
- $0 = new(val);
+ $0 = new_pos(val, $1);
$0->val.vtype = Vstr;
string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
}$
case Xval:
{
struct val *val = cast(val, prog);
- if (!vtype_compat(type, val->val.vtype, bool_permitted))
+ if (!vtype_compat(type, val->val.vtype, bool_permitted)) {
+ type_err(c, "error: expected %1 found %2",
+ prog, type, val->val.vtype);
*ok = 0;
+ }
return val->val.vtype;
}
$*var
VariableDecl -> IDENTIFIER := ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
- $0 = new(var);
+ $0 = new_pos(var, $1);
$0->var = v;
} }$
| IDENTIFIER ::= ${ {
struct variable *v = var_decl(config2context(config), $1.txt);
v->constant = 1;
- $0 = new(var);
+ $0 = new_pos(var, $1);
$0->var = v;
} }$
Variable -> IDENTIFIER ${ {
struct variable *v = var_ref(config2context(config), $1.txt);
+ $0 = new_pos(var, $1);
if (v == NULL) {
/* This might be a label - allocate a var just in case */
v = var_decl(config2context(config), $1.txt);
- if (v)
+ if (v) {
val_init(&v->val, Vlabel);
+ v->where_set = $0;
+ }
}
- $0 = new(var);
$0->var = v;
} }$
break;
}
+###### format cases
+ case 'v':
+ if (loc->type == Xvar) {
+ struct var *v = cast(var, loc);
+ if (v->var) {
+ struct binding *b = v->var->name;
+ fprintf(stderr, "%.*s", b->name.len, b->name.txt);
+ } else
+ fputs("???", stderr);
+ } else
+ fputs("NOTVAR", stderr);
+ break;
+
###### propagate exec cases
case Xvar:
struct var *var = cast(var, prog);
struct variable *v = var->var;
if (!v) {
+ type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone);
*ok = 0;
return Vnone;
}
if (v->val.vtype == Vunknown) {
if (type > Vunknown && *ok != 0) {
val_init(&v->val, type);
+ v->where_set = prog;
*ok = 2;
}
return type;
}
- if (!vtype_compat(type, v->val.vtype, bool_permitted))
+ if (!vtype_compat(type, v->val.vtype, bool_permitted)) {
+ type_err(c, "error: expected %1 but variable %v is %2", prog,
+ type, v->val.vtype);
+ type_err(c, "info: this is where %v was set to %1", v->where_set,
+ v->val.vtype, Vnone);
*ok = 0;
+ }
if (type <= Vunknown)
return v->val.vtype;
return type;
/* both must be Vbool, result is Vbool */
propagate_types(b->left, c, ok, Vbool, 0);
propagate_types(b->right, c, ok, Vbool, 0);
- if (type != Vbool && type > Vunknown)
+ if (type != Vbool && type > Vunknown) {
+ type_err(c, "error: %1 operation found where %2 expected", prog,
+ Vbool, type);
*ok = 0;
+ }
return Vbool;
###### interp binode cases
if (t > Vunknown)
t = propagate_types(b->left, c, ok, t, 0);
}
- if (!vtype_compat(type, Vbool, 0))
+ if (!vtype_compat(type, Vbool, 0)) {
+ type_err(c, "error: Comparison returns %1 but %2 expected", prog,
+ Vbool, type);
*ok = 0;
+ }
return Vbool;
###### interp binode cases
| Factor ${ $0 = $<1; }$
Factor -> ( Expression ) ${
- $0 = new(binode);
+ $0 = new_pos(binode, $1);
$0->op = Bracket;
$0->right = $<2;
}$
* unary ops fit here too */
propagate_types(b->left, c, ok, Vnum, 0);
propagate_types(b->right, c, ok, Vnum, 0);
- if (!vtype_compat(type, Vnum, 0))
+ if (!vtype_compat(type, Vnum, 0)) {
+ type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
+ Vnum, type);
*ok = 0;
+ }
return Vnum;
case Concat:
/* both must be Vstr, result is Vstr */
propagate_types(b->left, c, ok, Vstr, 0);
propagate_types(b->right, c, ok, Vstr, 0);
- if (!vtype_compat(type, Vstr, 0))
+ if (!vtype_compat(type, Vstr, 0)) {
+ type_err(c, "error: Concat returns %1 but %2 expected", prog,
+ Vstr, type);
*ok = 0;
+ }
return Vstr;
case Bracket:
case Block:
{
/* If any statement returns something other then Vnone
- * then all such must return same type.
+ * or Vbool then all such must return same type.
* As each statement may be Vnone or something else,
* we must always pass Vunknown down, otherwise an incorrect
* error might occur. We never return Vnone unless it is
t = propagate_types(e->left, c, ok, Vunknown, bool_permitted);
if (bool_permitted && t == Vbool)
t = Vunknown;
- if (t != Vunknown && t != Vnone) {
+ if (t != Vunknown && t != Vnone && t != Vbool) {
if (type == Vunknown)
type = t;
- else if (t != type)
+ else if (t != type) {
+ type_err(c, "error: expected %1, found %2",
+ e->left, type, t);
*ok = 0;
+ }
}
}
return type;
case Declare:
/* Both must match and not be labels, result is Vnone */
t = propagate_types(b->left, c, ok, Vnolabel, 0);
- if (t > Vunknown)
- propagate_types(b->right, c, ok, t, 0);
- else {
+ if (t > Vunknown) {
+ if (propagate_types(b->right, c, ok, t, 0) != t)
+ if (b->left->type == Xvar)
+ type_err(c, "info: variable %v was set as %1 here.",
+ cast(var, b->left)->var->where_set, t, Vnone);
+ } else {
t = propagate_types(b->right, c, ok, Vnolabel, 0);
if (t > Vunknown)
- t = propagate_types(b->left, c, ok, t, 0);
+ propagate_types(b->left, c, ok, t, 0);
}
return Vnone;
###### SimpleStatement Grammar
| use Expression ${
- $0 = new(binode);
+ $0 = new_pos(binode, $1);
$0->op = Use;
$0->right = $<2;
}$
for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
struct var *v = cast(var, b->left);
- if (v->var->val.vtype == Vunknown)
+ if (v->var->val.vtype == Vunknown) {
+ v->var->where_set = b;
val_init(&v->var->val, Vstr);
+ }
}
b = cast(binode, prog);
do {
for:
togo := 10
- f1 := 1; f2 := 1;
+ f1 := 1; f2 := 1
print "Fibonacci:", f1,f2,
then togo = togo - 1
while togo > 0: