From: NeilBrown Date: Sat, 17 Feb 2018 08:20:53 +0000 (+1100) Subject: oceani - add error reporting for type errors X-Git-Tag: StoneyCreek~13 X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=commitdiff_plain;h=4d2220f65b57e8380d98cd1a4678f0faf2f32916 oceani - add error reporting for type errors This is still rough, but it is at least a basis to work on. Signed-off-by: NeilBrown --- diff --git a/csrc/oceani.mdc b/csrc/oceani.mdc index 6e981b1..bef5b3e 100644 --- a/csrc/oceani.mdc +++ b/csrc/oceani.mdc @@ -109,6 +109,7 @@ option. ## ast struct parse_context { struct token_config config; + char *file_name; ## parse context }; @@ -197,6 +198,7 @@ option. fprintf(stderr, "oceani: cannot open %s\n", argv[optind]); exit(1); } + context.file_name = argv[optind]; len = lseek(fd, 0, 2); file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); s = code_extract(file, file+len, NULL); @@ -228,7 +230,7 @@ option. print_exec(*prog, 0, brackets); if (prog && doexec) { if (!analyse_prog(*prog, &context)) { - fprintf(stderr, "oceani: type error in program\n"); + fprintf(stderr, "oceani: type error in program - not running.\n"); exit(1); } interp_prog(*prog, argv+optind+1); @@ -296,9 +298,77 @@ type that gets passed around comprises a type (`enum vtype`) and a flag to indicate that `Vbool` is also permitted. As there are, as yet, no distinct types that are compatible, there -isn't much subtlety in the analysis. When we hav distinct number +isn't much subtlety in the analysis. When we have distinct number types, this will become more interesting. +#### Error reporting + +When analysis discovers an inconsistency it needs to report an error; +just refusing to run the code esure that the error doesn't cascade, +but by itself it isn't very useful. A clear understand of the sort of +error message that are useful will help guide the process of analysis. + +At a simplistic level, the only sort of error that type analysis can +report is that the type of some construct doesn't match a contextual +requirement. For example, in `4 + "hello"` the addition provides a +contextual requirement for numbers, but `"hello"` is not a number. In +this particular example no further information is needed as the types +are obvious from local information. When a variable is involved that +isn't the case. It may be helpful to explain why the variable has a +particular type, by indicating the location where the type was set, +whether by declaration or usage. + +Using a recursive-descent analysis we can easily detect a problem at +multiple locations. In "`hello:= "there"; 4 + hello`" the addition +will detect that one argument is not a number and the usage of `hello` +will detect that a number was wanted, but not provided. In this +(early) version of the language, we will generate error reports at +multiple locations, to the use of `hello` will report an error and +explain were the value was set, and the addition will report an error +and say why numbers are needed. To be able to report locations for +errors, each language element will need to record a file location +(line and column) and each variable will need to record the language +element where its type was set. For now we will assume that each line +of an error message indicates one location in the file, and up to 2 +types. So we provide a `printf`-like function which takes a format, a +language (a `struct exec` which has not yet been introduced), and 2 +types. "`$1`" reports the first type, "`$2`" reports the second. We +will need a function to print the location, once we know how that is +stored. + +###### forward decls + + static void fput_loc(struct exec *loc, FILE *f); + +###### core functions + + static void type_err(struct parse_context *c, + char *fmt, struct exec *loc, + enum vtype t1, enum vtype t2) + { + fprintf(stderr, "%s:", c->file_name); + fput_loc(loc, stderr); + for (; *fmt ; fmt++) { + if (*fmt != '%') { + fputc(*fmt, stderr); + continue; + } + fmt++; + switch (*fmt) { + case '%': fputc(*fmt, stderr); break; + default: fputc('?', stderr); break; + case '1': + fputs(vtype_names[t1], stderr); + break; + case '2': + fputs(vtype_names[t2], stderr); + break; + ## format cases + } + } + fputs("\n", stderr); + } + ## Data Structures One last introductory step before detailing the language elements and @@ -355,6 +425,9 @@ to parse each type from a string. char tail[2]; }; + char *vtype_names[] = {"nolabel", "unknown", "none", "string", + "number", "Boolean", "label"}; + ###### ast functions static void free_value(struct value v) { @@ -584,6 +657,7 @@ cannot nest, so a declaration while a name is in-scope is an error. struct variable *previous; struct value val; struct binding *name; + struct exec *where_set; // where type was set ## variable fields }; @@ -960,9 +1034,16 @@ subclasses, and to access these we need to be able to `cast` the if (__mptr && *__mptr != X##structname) abort(); \ (struct structname *)( (char *)__mptr);}) - #define new(structname) ({ \ + #define new(structname) ({ \ + struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \ + __ptr->type = X##structname; \ + __ptr->line = -1; __ptr->column = -1; \ + __ptr;}) + + #define new_pos(structname, token) ({ \ struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \ - __ptr->type = X##structname; \ + __ptr->type = X##structname; \ + __ptr->line = token.line; __ptr->column = token.col; \ __ptr;}) ###### ast @@ -972,6 +1053,7 @@ subclasses, and to access these we need to be able to `cast` the }; struct exec { enum exec_types type; + int line, column; }; struct binode { struct exec; @@ -981,6 +1063,25 @@ subclasses, and to access these we need to be able to `cast` the struct exec *left, *right; }; +###### ast functions + + static int __fput_loc(struct exec *loc, FILE *f) + { + if (loc->line >= 0) { + fprintf(f, "%d:%d: ", loc->line, loc->column); + return 1; + } + if (loc->type == Xbinode) + return __fput_loc(cast(binode,loc)->left, f) || + __fput_loc(cast(binode,loc)->right, f); + return 0; + } + static void fput_loc(struct exec *loc, FILE *f) + { + if (!__fput_loc(loc, f)) + fprintf(f, "??:??: "); + } + Each different type of `exec` node needs a number of functions defined, a bit like methods. We must be able to be able to free it, print it, analyse it and execute it. Once we have specific `exec` @@ -1157,28 +1258,28 @@ an executable. $*val Value -> True ${ - $0 = new(val); + $0 = new_pos(val, $1); $0->val.vtype = Vbool; $0->val.bool = 1; }$ | False ${ - $0 = new(val); + $0 = new_pos(val, $1); $0->val.vtype = Vbool; $0->val.bool = 0; }$ | NUMBER ${ - $0 = new(val); + $0 = new_pos(val, $1); $0->val.vtype = Vnum; if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0) mpq_init($0->val.num); }$ | STRING ${ - $0 = new(val); + $0 = new_pos(val, $1); $0->val.vtype = Vstr; string_parse(&$1, '\\', &$0->val.str, $0->val.tail); }$ | MULTI_STRING ${ - $0 = new(val); + $0 = new_pos(val, $1); $0->val.vtype = Vstr; string_parse(&$1, '\\', &$0->val.str, $0->val.tail); }$ @@ -1199,8 +1300,11 @@ an executable. case Xval: { struct val *val = cast(val, prog); - if (!vtype_compat(type, val->val.vtype, bool_permitted)) + if (!vtype_compat(type, val->val.vtype, bool_permitted)) { + type_err(c, "error: expected %1 found %2", + prog, type, val->val.vtype); *ok = 0; + } return val->val.vtype; } @@ -1264,25 +1368,27 @@ link to find the primary instance. $*var VariableDecl -> IDENTIFIER := ${ { struct variable *v = var_decl(config2context(config), $1.txt); - $0 = new(var); + $0 = new_pos(var, $1); $0->var = v; } }$ | IDENTIFIER ::= ${ { struct variable *v = var_decl(config2context(config), $1.txt); v->constant = 1; - $0 = new(var); + $0 = new_pos(var, $1); $0->var = v; } }$ Variable -> IDENTIFIER ${ { struct variable *v = var_ref(config2context(config), $1.txt); + $0 = new_pos(var, $1); if (v == NULL) { /* This might be a label - allocate a var just in case */ v = var_decl(config2context(config), $1.txt); - if (v) + if (v) { val_init(&v->val, Vlabel); + v->where_set = $0; + } } - $0 = new(var); $0->var = v; } }$ @@ -1297,6 +1403,19 @@ link to find the primary instance. break; } +###### format cases + case 'v': + if (loc->type == Xvar) { + struct var *v = cast(var, loc); + if (v->var) { + struct binding *b = v->var->name; + fprintf(stderr, "%.*s", b->name.len, b->name.txt); + } else + fputs("???", stderr); + } else + fputs("NOTVAR", stderr); + break; + ###### propagate exec cases case Xvar: @@ -1304,6 +1423,7 @@ link to find the primary instance. struct var *var = cast(var, prog); struct variable *v = var->var; if (!v) { + type_err(c, "%d:BUG: no variable!!", prog, Vnone, Vnone); *ok = 0; return Vnone; } @@ -1312,12 +1432,18 @@ link to find the primary instance. if (v->val.vtype == Vunknown) { if (type > Vunknown && *ok != 0) { val_init(&v->val, type); + v->where_set = prog; *ok = 2; } return type; } - if (!vtype_compat(type, v->val.vtype, bool_permitted)) + if (!vtype_compat(type, v->val.vtype, bool_permitted)) { + type_err(c, "error: expected %1 but variable %v is %2", prog, + type, v->val.vtype); + type_err(c, "info: this is where %v was set to %1", v->where_set, + v->val.vtype, Vnone); *ok = 0; + } if (type <= Vunknown) return v->val.vtype; return type; @@ -1407,8 +1533,11 @@ and `BFact`s. /* both must be Vbool, result is Vbool */ propagate_types(b->left, c, ok, Vbool, 0); propagate_types(b->right, c, ok, Vbool, 0); - if (type != Vbool && type > Vunknown) + if (type != Vbool && type > Vunknown) { + type_err(c, "error: %1 operation found where %2 expected", prog, + Vbool, type); *ok = 0; + } return Vbool; ###### interp binode cases @@ -1513,8 +1642,11 @@ expression operator. if (t > Vunknown) t = propagate_types(b->left, c, ok, t, 0); } - if (!vtype_compat(type, Vbool, 0)) + if (!vtype_compat(type, Vbool, 0)) { + type_err(c, "error: Comparison returns %1 but %2 expected", prog, + Vbool, type); *ok = 0; + } return Vbool; ###### interp binode cases @@ -1583,7 +1715,7 @@ precedence is handled better I might be able to discard this. | Factor ${ $0 = $<1; }$ Factor -> ( Expression ) ${ - $0 = new(binode); + $0 = new_pos(binode, $1); $0->op = Bracket; $0->right = $<2; }$ @@ -1649,16 +1781,22 @@ precedence is handled better I might be able to discard this. * unary ops fit here too */ propagate_types(b->left, c, ok, Vnum, 0); propagate_types(b->right, c, ok, Vnum, 0); - if (!vtype_compat(type, Vnum, 0)) + if (!vtype_compat(type, Vnum, 0)) { + type_err(c, "error: Arithmetic returns %1 but %2 expected", prog, + Vnum, type); *ok = 0; + } return Vnum; case Concat: /* both must be Vstr, result is Vstr */ propagate_types(b->left, c, ok, Vstr, 0); propagate_types(b->right, c, ok, Vstr, 0); - if (!vtype_compat(type, Vstr, 0)) + if (!vtype_compat(type, Vstr, 0)) { + type_err(c, "error: Concat returns %1 but %2 expected", prog, + Vstr, type); *ok = 0; + } return Vstr; case Bracket: @@ -1847,7 +1985,7 @@ list. case Block: { /* If any statement returns something other then Vnone - * then all such must return same type. + * or Vbool then all such must return same type. * As each statement may be Vnone or something else, * we must always pass Vunknown down, otherwise an incorrect * error might occur. We never return Vnone unless it is @@ -1859,11 +1997,14 @@ list. t = propagate_types(e->left, c, ok, Vunknown, bool_permitted); if (bool_permitted && t == Vbool) t = Vunknown; - if (t != Vunknown && t != Vnone) { + if (t != Vunknown && t != Vnone && t != Vbool) { if (type == Vunknown) type = t; - else if (t != type) + else if (t != type) { + type_err(c, "error: expected %1, found %2", + e->left, type, t); *ok = 0; + } } } return type; @@ -2035,12 +2176,15 @@ it is declared, and error will be raised as the name is created as case Declare: /* Both must match and not be labels, result is Vnone */ t = propagate_types(b->left, c, ok, Vnolabel, 0); - if (t > Vunknown) - propagate_types(b->right, c, ok, t, 0); - else { + if (t > Vunknown) { + if (propagate_types(b->right, c, ok, t, 0) != t) + if (b->left->type == Xvar) + type_err(c, "info: variable %v was set as %1 here.", + cast(var, b->left)->var->where_set, t, Vnone); + } else { t = propagate_types(b->right, c, ok, Vnolabel, 0); if (t > Vunknown) - t = propagate_types(b->left, c, ok, t, 0); + propagate_types(b->left, c, ok, t, 0); } return Vnone; @@ -2073,7 +2217,7 @@ function. ###### SimpleStatement Grammar | use Expression ${ - $0 = new(binode); + $0 = new_pos(binode, $1); $0->op = Use; $0->right = $<2; }$ @@ -2653,8 +2797,10 @@ analysis is a bit more interesting at this level. for (b = cast(binode, b->left); b; b = cast(binode, b->right)) { struct var *v = cast(var, b->left); - if (v->var->val.vtype == Vunknown) + if (v->var->val.vtype == Vunknown) { + v->var->where_set = b; val_init(&v->var->val, Vstr); + } } b = cast(binode, prog); do { @@ -2746,7 +2892,7 @@ Fibonacci, and performs a binary search for a number. for: togo := 10 - f1 := 1; f2 := 1; + f1 := 1; f2 := 1 print "Fibonacci:", f1,f2, then togo = togo - 1 while togo > 0: