X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Foceani.mdc;h=97c195382cd8955c4e0c16e154125e5d7fa2e93b;hp=07606745a481a5b7ec174660a6a17e0c23086db0;hb=1dd9f61bbc7e8890b5407ba084793817e55fe502;hpb=8b7383a008c8875637d37c0df18e8773cc0bd06c diff --git a/csrc/oceani.mdc b/csrc/oceani.mdc index 0760674..97c1953 100644 --- a/csrc/oceani.mdc +++ b/csrc/oceani.mdc @@ -1,4 +1,4 @@ -# Ocean Interpreter - Stoney Creek version +# Ocean Interpreter - Jamison Creek version Ocean is intended to be a compiled language, so this interpreter is not targeted at being the final product. It is, rather, an intermediate @@ -29,35 +29,31 @@ be. ## Current version -This second version of the interpreter exists to test out the -structured statement providing conditions and iteration, and simple -variable scoping. Clearly we need some minimal other functionality so -that values can be tested and instructions iterated over. All that -functionality is clearly not normative at this stage (not that -anything is **really** normative yet) and will change, so early test -code will certainly break in later versions. +This third version of the interpreter exists to test out some initial +ideas relating to types. Particularly it adds arrays (indexed from +zero) and simple structures. Basic control flow and variable scoping +are already fairly well established, as are basic numerical and +boolean operators. -The under-test parts of the language are: +Some operators that have only recently been added, and so have not +generated all that much experience yet are "and then" and "or else" as +short-circuit Boolean operators, and the "if ... else" trinary +operator which can select between two expressions based on a third +(which appears syntactically in the middle). - - conditional/looping structured statements - - the `use` statement which is needed for that - - Variable binding using ":=" and "::=", and assignment using "=". +Elements that are present purely to make a usable language, and +without any expectation that they will remain, are the "program' +clause, which provides a list of variables to received command-line +arguments, and the "print" statement which performs simple output. -Elements which are present to make a usable language are: - - - "blocks" of multiple statements. - - `pass`: a statement which does nothing. - - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can - catenate strings. `and`, `or`, `not` manipulate Booleans, and - normal comparison operators can work on all three types. - - `print`: will print the values in a list of expressions. - - `program`: is given a list of identifiers to initialize from - arguments. +The current scalar types are "number", "Boolean", and "string". +Boolean will likely stay in its current form, the other two might, but +could just as easily be changed. ## Naming Versions of the interpreter which obviously do not support a complete -language will be named after creeks and streams. This one is Stoney +language will be named after creeks and streams. This one is Jamison Creek. Once we have something reasonably resembling a complete language, the @@ -76,7 +72,7 @@ So the main requirements of the interpreter are: - Parse the program, possibly with tracing, - Analyse the parsed program to ensure consistency, - Print the program, -- Execute the program. +- Execute the program, if no parsing or consistency errors were found. This is all performed by a single C program extracted with `parsergen`. @@ -114,6 +110,7 @@ structures can be used. struct token_config config; char *file_name; int parse_error; + struct exec *prog; ## parse context }; @@ -126,6 +123,9 @@ structures can be used. #define config2context(_conf) container_of(_conf, struct parse_context, \ config) +###### Parser: reduce + struct parse_context *c = config2context(config); + ###### Parser: code #include @@ -151,8 +151,8 @@ structures can be used. ## core functions #include - static char Usage[] = "Usage: oceani --trace --print --noexec --brackets" - "--section=SectionName prog.ocn\n"; + static char Usage[] = + "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n"; static const struct option long_options[] = { {"trace", 0, NULL, 't'}, {"print", 0, NULL, 'p'}, @@ -167,19 +167,17 @@ structures can be used. int fd; int len; char *file; - struct section *s; + struct section *s, *ss; char *section = NULL; struct parse_context context = { .config = { - .ignored = (1 << TK_line_comment) - | (1 << TK_block_comment), - .number_chars = ".,_+-", + .ignored = (1 << TK_mark), + .number_chars = ".,_+- ", .word_start = "_", .word_cont = "_", }, }; int doprint=0, dotrace=0, doexec=1, brackets=0; - struct exec **prog; int opt; while ((opt = getopt_long(argc, argv, options, long_options, NULL)) != -1) { @@ -215,41 +213,39 @@ structures can be used. ## context initialization if (section) { - struct section *ss; for (ss = s; ss; ss = ss->next) { struct text sec = ss->section; if (sec.len == strlen(section) && strncmp(sec.txt, section, sec.len) == 0) break; } - if (ss) - prog = parse_oceani(ss->code, &context.config, - dotrace ? stderr : NULL); - else { + if (!ss) { fprintf(stderr, "oceani: cannot find section %s\n", section); exit(1); } } else - prog = parse_oceani(s->code, &context.config, - dotrace ? stderr : NULL); - if (!prog) { - fprintf(stderr, "oceani: fatal parser error.\n"); + ss = s; + parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL); + + if (!context.prog) { + fprintf(stderr, "oceani: no program found.\n"); context.parse_error = 1; } - if (prog && doprint) - print_exec(*prog, 0, brackets); - if (prog && doexec && !context.parse_error) { - if (!analyse_prog(*prog, &context)) { + if (context.prog && doprint) { + ## print const decls + ## print type decls + print_exec(context.prog, 0, brackets); + } + if (context.prog && doexec && !context.parse_error) { + if (!analyse_prog(context.prog, &context)) { fprintf(stderr, "oceani: type error in program - not running.\n"); exit(1); } - interp_prog(*prog, argv+optind+1); - } - if (prog) { - free_exec(*prog); - free(prog); + interp_prog(context.prog, argv+optind+1); } + free_exec(context.prog); + while (s) { struct section *t = s->next; code_free(s->code); @@ -292,12 +288,12 @@ outside the if, the variables in the different branches are distinct and can be of different types. Determining the types of all variables early is important for -processing command line arguments. These can be assigned to any type -of variable, but we must first know the correct type so any required -conversion can happen. If a variable is associated with a command -line argument but no type can be interpreted (e.g. the variable is -only ever used in a `print` statement), then the type is set to -'string'. +processing command line arguments. These can be assigned to any of +several types of variable, but we must first know the correct type so +any required conversion can happen. If a variable is associated with +a command line argument but no type can be interpreted (e.g. the +variable is only ever used in a `print` statement), then the type is +set to 'string'. Undeclared names may only appear in "use" statements and "case" expressions. These names are given a type of "label" and a unique value. @@ -305,9 +301,9 @@ This allows them to fill the role of a name in an enumerated type, which is useful for testing the `switch` statement. As we will see, the condition part of a `while` statement can return -either a Boolean or some other type. This requires that the expect -type that gets passed around comprises a type (`enum vtype`) and a -flag to indicate that `Vbool` is also permitted. +either a Boolean or some other type. This requires that the expected +type that gets passed around comprises a type and a flag to indicate +that `Tbool` is also permitted. As there are, as yet, no distinct types that are compatible, there isn't much subtlety in the analysis. When we have distinct number @@ -317,8 +313,9 @@ types, this will become more interesting. When analysis discovers an inconsistency it needs to report an error; just refusing to run the code ensures that the error doesn't cascade, -but by itself it isn't very useful. A clear understand of the sort of -error message that are useful will help guide the process of analysis. +but by itself it isn't very useful. A clear understanding of the sort +of error message that are useful will help guide the process of +analysis. At a simplistic level, the only sort of error that type analysis can report is that the type of some construct doesn't match a contextual @@ -375,8 +372,8 @@ context so indicate that parsing failed. } fmt++; switch (*fmt) { - case '%': fputc(*fmt, stderr); break; - default: fputc('?', stderr); break; + case '%': fputc(*fmt, stderr); break; // NOTEST + default: fputc('?', stderr); break; // NOTEST case '1': type_print(t1, stderr); break; @@ -397,17 +394,15 @@ context so indicate that parsing failed. c->parse_error = 1; } -## Data Structures - -One last introductory step before detailing the language elements and -providing their four requirements is to establish the data structures -to store these elements. +## Entities: declared and predeclared. -There are two key objects that we need to work with: executable -elements which comprise the program, and values which the program -works with. Between these are the variables in their various scopes -which hold the values, and types which classify the values stored and -manipulatd by executables. +There are various "things" that the language and/or the interpreter +needs to know about to parse and execute a program. These include +types, variables, values, and executable code. These are all lumped +together under the term "entities" (calling them "objects" would be +confusing) and introduced here. These will introduced and described +here. The following section will present the different specific code +elements which comprise or manipulate these various entities. ### Types @@ -421,8 +416,9 @@ better integrated into the language. Rather than requiring every numeric type to support all numeric operations (add, multiple, etc), we allow types to be able to present as one of a few standard types: integer, float, and fraction. The -existance of these conversion functions enable types to determine if -they are compatible with other types. +existence of these conversion functions eventaully enable types to +determine if they are compatible with other types, though such types +have not yet been implemented. Named type are stored in a simple linked list. Objects of each type are "values" which are often passed around by value. @@ -448,10 +444,12 @@ which are often passed around by value. int (*cmp_eq)(struct value v1, struct value v2); struct value (*dup)(struct value val); void (*free)(struct value val); + void (*free_type)(struct type *t); int (*compat)(struct type *this, struct type *other); long long (*to_int)(struct value *v); double (*to_float)(struct value *v); int (*to_mpq)(mpq_t *q, struct value *v); + ## type functions union { ## type union fields }; @@ -523,7 +521,7 @@ which are often passed around by value. else if (type->print_type) type->print_type(type, f); else - fputs("*invalid*type*", f); + fputs("*invalid*type*", f); // NOTEST } static struct value val_prepare(struct type *type) @@ -567,7 +565,7 @@ which are often passed around by value. if (v.type && v.type->print) v.type->print(v); else - printf("*Unknown*"); + printf("*Unknown*"); // NOTEST } static struct value parse_value(struct type *type, char *arg) @@ -576,16 +574,29 @@ which are often passed around by value. if (type && type->parse) return type->parse(type, arg); - rv.type = NULL; - return rv; + rv.type = NULL; // NOTEST + return rv; // NOTEST } +###### forward decls + + static void free_value(struct value v); + static int type_compat(struct type *require, struct type *have, int rules); + static void type_print(struct type *type, FILE *f); + static struct value val_init(struct type *type); + static struct value dup_value(struct value v); + static int value_cmp(struct value left, struct value right); + static void print_value(struct value v); + static struct value parse_value(struct type *type, char *arg); + ###### free context types while (context.typelist) { struct type *t = context.typelist; context.typelist = t->next; + if (t->free_type) + t->free_type(t); free(t); } @@ -606,15 +617,22 @@ with anything. There are two special cases with type compatibility, both related to the Conditional Statement which will be described later. In some cases a Boolean can be accepted as well as some other primary type, and in others any type is acceptable except a label (`Vlabel`). -A separate function encode these cases will simplify some code later. +A separate function encoding these cases will simplify some code later. When assigning command line arguments to variables, we need to be able to parse each type from a string. +The distinction beteen "prepare" and "init" needs to be explained. +"init" sets up an initial value, such as "zero" or the empty string. +"prepare" simply prepares the data structure so that if "free" gets +called on it, it won't do something silly. Normally a value will be +stored after "prepare" but before "free", but this might not happen if +there are errors. + ###### includes #include - #include "string.h" - #include "number.h" + #include "parse_string.h" + #include "parse_number.h" ###### libs myLDLIBS := libnumber.o libstring.o -lgmp @@ -674,8 +692,8 @@ to parse each type from a string. rv.type = type; switch(type->vtype) { - case Vnone: - break; + case Vnone: // NOTEST + break; // NOTEST case Vnum: mpq_init(rv.num); break; case Vstr: @@ -685,9 +703,9 @@ to parse each type from a string. case Vbool: rv.bool = 0; break; - case Vlabel: - rv.label = NULL; - break; + case Vlabel: // NOTEST + rv.label = NULL; // NOTEST + break; // NOTEST } return rv; } @@ -697,8 +715,8 @@ to parse each type from a string. struct value rv; rv.type = v.type; switch (rv.type->vtype) { - case Vnone: - break; + case Vnone: // NOTEST + break; // NOTEST case Vlabel: rv.label = v.label; break; @@ -722,13 +740,13 @@ to parse each type from a string. { int cmp; if (left.type != right.type) - return left.type - right.type; + return left.type - right.type; // NOTEST switch (left.type->vtype) { case Vlabel: cmp = left.label == right.label ? 0 : 1; break; case Vnum: cmp = mpq_cmp(left.num, right.num); break; case Vstr: cmp = text_cmp(left.str, right.str); break; case Vbool: cmp = left.bool - right.bool; break; - case Vnone: cmp = 0; + case Vnone: cmp = 0; // NOTEST } return cmp; } @@ -736,10 +754,10 @@ to parse each type from a string. static void _print_value(struct value v) { switch (v.type->vtype) { - case Vnone: - printf("*no-value*"); break; - case Vlabel: - printf("*label-%p*", v.label); break; + case Vnone: // NOTEST + printf("*no-value*"); break; // NOTEST + case Vlabel: // NOTEST + printf("*label-%p*", v.label); break; // NOTEST case Vstr: printf("%.*s", v.str.len, v.str.txt); break; case Vbool: @@ -765,10 +783,10 @@ to parse each type from a string. val.type = type; switch(type->vtype) { - case Vlabel: - case Vnone: - val.type = NULL; - break; + case Vlabel: // NOTEST + case Vnone: // NOTEST + val.type = NULL; // NOTEST + break; // NOTEST case Vstr: val.str.len = strlen(arg); val.str.txt = malloc(val.str.len); @@ -985,8 +1003,8 @@ like "if" and the code following it. ###### Grammar $void - OpenScope -> ${ scope_push(config2context(config)); }$ - + OpenScope -> ${ scope_push(c); }$ + ClosePara -> ${ var_block_close(c, CloseParallel); }$ Each variable records a scope depth and is in one of four states: @@ -1019,7 +1037,6 @@ Each variable records a scope depth and is in one of four states: in scope. It is permanently out of scope now and can be removed from the "in scope" stack. - ###### variable fields int depth, min_depth; enum { OutScope, PendingScope, CondScope, InScope } scope; @@ -1037,6 +1054,13 @@ merging variables, we need to also adjust the 'merged' pointer on any other variables that had previously been merged with the one that will no longer be primary. +A variable that is no longer the most recent instance of a name may +still have "pending" scope, if it might still be merged with most +recent instance. These variables don't really belong in the +"in_scope" list, but are not immediately removed when a new instance +is found. Instead, they are detected and ignored when considering the +list of in_scope names. + ###### variable fields struct variable *merged; @@ -1071,6 +1095,9 @@ no longer be primary. v = t->previous; free_value(t->val); + if (t->depth == 0) + // This is a global constant + free_exec(t->where_decl); free(t); } } @@ -1085,21 +1112,23 @@ the latest usage. This is determined from `min_depth`. When a conditionally visible variable gets affirmed like this, it is also merged with other conditionally visible variables with the same name. -When we parse a variable declaration we either signal an error if the +When we parse a variable declaration we either report an error if the name is currently bound, or create a new variable at the current nest depth if the name is unbound or bound to a conditionally scoped or pending-scope variable. If the previous variable was conditionally scoped, it and its homonyms becomes out-of-scope. When we parse a variable reference (including non-declarative -assignment) we signal an error if the name is not bound or is bound to +assignment) we report an error if the name is not bound or is bound to a pending-scope variable; update the scope if the name is bound to a conditionally scoped variable; or just proceed normally if the named variable is in scope. When we exit a scope, any variables bound at this level are either -marked out of scope or pending-scoped, depending on whether the -scope was sequential or parallel. +marked out of scope or pending-scoped, depending on whether the scope +was sequential or parallel. Here a "parallel" scope means the "then" +or "else" part of a conditional, or any "case" or "else" branch of a +switch. Other scopes are "sequential". When exiting a parallel scope we check if there are any variables that were previously pending and are still visible. If there are, then @@ -1158,7 +1187,7 @@ all pending-scope variables become conditionally scoped. switch (v ? v->scope : OutScope) { case OutScope: case PendingScope: - /* Signal an error - once that is possible */ + /* Caller will report the error */ return NULL; case CondScope: /* All CondScope variables of this name need to be merged @@ -1179,14 +1208,14 @@ all pending-scope variables become conditionally scoped. static void var_block_close(struct parse_context *c, enum closetype ct) { - /* close of all variables that are in_scope */ + /* Close off all variables that are in_scope */ struct variable *v, **vp, *v2; scope_pop(c); for (vp = &c->in_scope; v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth; ) { - switch (ct) { + if (v->name->var == v) switch (ct) { case CloseElse: case CloseParallel: /* handle PendingScope */ switch(v->scope) { @@ -1247,7 +1276,7 @@ all pending-scope variables become conditionally scoped. } break; } - if (v->scope == OutScope) + if (v->scope == OutScope || v->name->var != v) *vp = v->in_scope; else vp = &v->in_scope; @@ -1305,6 +1334,8 @@ subclasses, and to access these we need to be able to `cast` the static int __fput_loc(struct exec *loc, FILE *f) { + if (!loc) + return 0; // NOTEST if (loc->line >= 0) { fprintf(f, "%d:%d: ", loc->line, loc->column); return 1; @@ -1317,7 +1348,7 @@ subclasses, and to access these we need to be able to `cast` the static void fput_loc(struct exec *loc, FILE *f) { if (!__fput_loc(loc, f)) - fprintf(f, "??:??: "); + fprintf(f, "??:??: "); // NOTEST } Each different type of `exec` node needs a number of functions @@ -1329,9 +1360,9 @@ slowly. #### Freeing The parser generator requires a `free_foo` function for each struct -that stores attributes and they will be `exec`s and subtypes there-of. -So we need `free_exec` which can handle all the subtypes, and we need -`free_binode`. +that stores attributes and they will often be `exec`s and subtypes +there-of. So we need `free_exec` which can handle all the subtypes, +and we need `free_binode`. ###### ast functions @@ -1388,7 +1419,7 @@ also want to know what sort of bracketing to use. static void print_exec(struct exec *e, int indent, int bracket) { if (!e) - return; + return; // NOTEST switch (e->type) { case Xbinode: print_binode(cast(binode, e), indent, bracket); break; @@ -1426,7 +1457,9 @@ propagation is needed. ###### core functions static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok, - struct type *type, int rules) + struct type *type, int rules); + static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok, + struct type *type, int rules) { struct type *t; @@ -1447,6 +1480,16 @@ propagation is needed. return Tnone; } + static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok, + struct type *type, int rules) + { + struct type *ret = __propagate_types(prog, c, ok, type, rules); + + if (c->parse_error) + *ok = 0; + return ret; + } + #### Interpreting Interpreting an `exec` doesn't require anything but the `exec`. State @@ -1455,22 +1498,54 @@ within the `exec` tree. The exception to this is the whole `program` which needs to look at command line arguments. The `program` will be interpreted separately. -Each `exec` can return a value, which may be `Tnone` but must be non-NULL; +Each `exec` can return a value, which may be `Tnone` but must be +non-NULL; Some `exec`s will return the location of a value, which can +be updates. To support this, each exec case must store either a value +in `val` or the pointer to a value in `lval`. If `lval` is set, but a +simple value is required, `inter_exec()` will dereference `lval` to +get the value. ###### core functions + struct lrval { + struct value val, *lval; + }; + + static struct lrval _interp_exec(struct exec *e); + static struct value interp_exec(struct exec *e) { - struct value rv; + struct lrval ret = _interp_exec(e); + + if (ret.lval) + return dup_value(*ret.lval); + else + return ret.val; + } + + static struct value *linterp_exec(struct exec *e) + { + struct lrval ret = _interp_exec(e); + + return ret.lval; + } + + static struct lrval _interp_exec(struct exec *e) + { + struct lrval ret; + struct value rv, *lrv = NULL; rv.type = Tnone; - if (!e) - return rv; + if (!e) { + ret.lval = lrv; + ret.val = rv; + return ret; + } switch(e->type) { case Xbinode: { struct binode *b = cast(binode, e); - struct value left, right; + struct value left, right, *lleft; left.type = right.type = Tnone; switch (b->op) { ## interp binode cases @@ -1480,12 +1555,567 @@ Each `exec` can return a value, which may be `Tnone` but must be non-NULL; } ## interp exec cases } - return rv; + ret.lval = lrv; + ret.val = rv; + return ret; + } + +### Complex types + +Now that we have the shape of the interpreter in place we can add some +complex types and connected them in to the data structures and the +different phases of parse, analyse, print, interpret. + +Thus far we have arrays and structs. + +#### Arrays + +Arrays can be declared by giving a size and a type, as `[size]type' so +`freq:[26]number` declares `freq` to be an array of 26 numbers. The +size can be an arbitrary expression which is evaluated when the name +comes into scope. + +Arrays cannot be assigned. When pointers are introduced we will also +introduce array slices which can refer to part or all of an array - +the assignment syntax will create a slice. For now, an array can only +ever be referenced by the name it is declared with. It is likely that +a "`copy`" primitive will eventually be define which can be used to +make a copy of an array with controllable depth. + +###### type union fields + + struct { + int size; + struct variable *vsize; + struct type *member; + } array; + +###### value union fields + struct { + struct value *elmnts; + } array; + +###### value functions + + static struct value array_prepare(struct type *type) + { + struct value ret; + + ret.type = type; + ret.array.elmnts = NULL; + return ret; + } + + static struct value array_init(struct type *type) + { + struct value ret; + int i; + + ret.type = type; + if (type->array.vsize) { + mpz_t q; + mpz_init(q); + mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num), + mpq_denref(type->array.vsize->val.num)); + type->array.size = mpz_get_si(q); + mpz_clear(q); + } + ret.array.elmnts = calloc(type->array.size, + sizeof(ret.array.elmnts[0])); + for (i = 0; ret.array.elmnts && i < type->array.size; i++) + ret.array.elmnts[i] = val_init(type->array.member); + return ret; + } + + static void array_free(struct value val) + { + int i; + + if (val.array.elmnts) + for (i = 0; i < val.type->array.size; i++) + free_value(val.array.elmnts[i]); + free(val.array.elmnts); + } + + static int array_compat(struct type *require, struct type *have) + { + if (have->compat != require->compat) + return 0; + /* Both are arrays, so we can look at details */ + if (!type_compat(require->array.member, have->array.member, 0)) + return 0; + if (require->array.vsize == NULL && have->array.vsize == NULL) + return require->array.size == have->array.size; + + return require->array.vsize == have->array.vsize; + } + + static void array_print_type(struct type *type, FILE *f) + { + fputs("[", f); + if (type->array.vsize) { + struct binding *b = type->array.vsize->name; + fprintf(f, "%.*s]", b->name.len, b->name.txt); + } else + fprintf(f, "%d]", type->array.size); + type_print(type->array.member, f); + } + + static struct type array_prototype = { + .prepare = array_prepare, + .init = array_init, + .print_type = array_print_type, + .compat = array_compat, + .free = array_free, + }; + +###### type grammar + + | [ NUMBER ] Type ${ + $0 = calloc(1, sizeof(struct type)); + *($0) = array_prototype; + $0->array.member = $<4; + $0->array.vsize = NULL; + { + char tail[3]; + mpq_t num; + if (number_parse(num, tail, $2.txt) == 0) + tok_err(c, "error: unrecognised number", &$2); + else if (tail[0]) + tok_err(c, "error: unsupported number suffix", &$2); + else { + $0->array.size = mpz_get_ui(mpq_numref(num)); + if (mpz_cmp_ui(mpq_denref(num), 1) != 0) { + tok_err(c, "error: array size must be an integer", + &$2); + } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0) + tok_err(c, "error: array size is too large", + &$2); + mpq_clear(num); + } + $0->next= c->anon_typelist; + c->anon_typelist = $0; + } + }$ + + | [ IDENTIFIER ] Type ${ { + struct variable *v = var_ref(c, $2.txt); + + if (!v) + tok_err(c, "error: name undeclared", &$2); + else if (!v->constant) + tok_err(c, "error: array size must be a constant", &$2); + + $0 = calloc(1, sizeof(struct type)); + *($0) = array_prototype; + $0->array.member = $<4; + $0->array.size = 0; + $0->array.vsize = v; + $0->next= c->anon_typelist; + c->anon_typelist = $0; + } }$ + +###### parse context + + struct type *anon_typelist; + +###### free context types + + while (context.anon_typelist) { + struct type *t = context.anon_typelist; + + context.anon_typelist = t->next; + free(t); + } + +###### Binode types + Index, + +###### variable grammar + + | Variable [ Expression ] ${ { + struct binode *b = new(binode); + b->op = Index; + b->left = $<1; + b->right = $<3; + $0 = b; + } }$ + +###### print binode cases + case Index: + print_exec(b->left, -1, bracket); + printf("["); + print_exec(b->right, -1, bracket); + printf("]"); + break; + +###### propagate binode cases + case Index: + /* left must be an array, right must be a number, + * result is the member type of the array + */ + propagate_types(b->right, c, ok, Tnum, 0); + t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant); + if (!t || t->compat != array_compat) { + type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL); + return NULL; + } else { + if (!type_compat(type, t->array.member, rules)) { + type_err(c, "error: have %1 but need %2", prog, + t->array.member, rules, type); + } + return t->array.member; + } + break; + +###### interp binode cases + case Index: { + mpz_t q; + long i; + + lleft = linterp_exec(b->left); + right = interp_exec(b->right); + mpz_init(q); + mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num)); + i = mpz_get_si(q); + mpz_clear(q); + + if (i >= 0 && i < lleft->type->array.size) + lrv = &lleft->array.elmnts[i]; + else + rv = val_init(lleft->type->array.member); + break; + } + +#### Structs + +A `struct` is a data-type that contains one or more other data-types. +It differs from an array in that each member can be of a different +type, and they are accessed by name rather than by number. Thus you +cannot choose an element by calculation, you need to know what you +want up-front. + +The language makes no promises about how a given structure will be +stored in memory - it is free to rearrange fields to suit whatever +criteria seems important. + +Structs are declared separately from program code - they cannot be +declared in-line in a variable declaration like arrays can. A struct +is given a name and this name is used to identify the type - the name +is not prefixed by the word `struct` as it would be in C. + +Structs are only treated as the same if they have the same name. +Simply having the same fields in the same order is not enough. This +might change once we can create structure initializes from a list of +values. + +Each component datum is identified much like a variable is declared, +with a name, one or two colons, and a type. The type cannot be omitted +as there is no opportunity to deduce the type from usage. An initial +value can be given following an equals sign, so + +##### Example: a struct type + + struct complex: + x:number = 0 + y:number = 0 + +would declare a type called "complex" which has two number fields, +each initialised to zero. + +Struct will need to be declared separately from the code that uses +them, so we will need to be able to print out the declaration of a +struct when reprinting the whole program. So a `print_type_decl` type +function will be needed. + +###### type union fields + + struct { + int nfields; + struct field { + struct text name; + struct type *type; + struct value init; + } *fields; + } structure; + +###### value union fields + struct { + struct value *fields; + } structure; + +###### type functions + void (*print_type_decl)(struct type *type, FILE *f); + +###### value functions + + static struct value structure_prepare(struct type *type) + { + struct value ret; + + ret.type = type; + ret.structure.fields = NULL; + return ret; + } + + static struct value structure_init(struct type *type) + { + struct value ret; + int i; + + ret.type = type; + ret.structure.fields = calloc(type->structure.nfields, + sizeof(ret.structure.fields[0])); + for (i = 0; ret.structure.fields && i < type->structure.nfields; i++) + ret.structure.fields[i] = val_init(type->structure.fields[i].type); + return ret; + } + + static void structure_free(struct value val) + { + int i; + + if (val.structure.fields) + for (i = 0; i < val.type->structure.nfields; i++) + free_value(val.structure.fields[i]); + free(val.structure.fields); + } + + static void structure_free_type(struct type *t) + { + int i; + for (i = 0; i < t->structure.nfields; i++) + free_value(t->structure.fields[i].init); + free(t->structure.fields); + } + + static struct type structure_prototype = { + .prepare = structure_prepare, + .init = structure_init, + .free = structure_free, + .free_type = structure_free_type, + .print_type_decl = structure_print_type, + }; + +###### exec type + Xfieldref, + +###### ast + struct fieldref { + struct exec; + struct exec *left; + int index; + struct text name; + }; + +###### free exec cases + case Xfieldref: + free_exec(cast(fieldref, e)->left); + free(e); + break; + +###### variable grammar + + | Variable . IDENTIFIER ${ { + struct fieldref *fr = new_pos(fieldref, $2); + fr->left = $<1; + fr->name = $3.txt; + fr->index = -2; + $0 = fr; + } }$ + +###### print exec cases + + case Xfieldref: + { + struct fieldref *f = cast(fieldref, e); + print_exec(f->left, -1, bracket); + printf(".%.*s", f->name.len, f->name.txt); + break; + } + +###### ast functions + static int find_struct_index(struct type *type, struct text field) + { + int i; + for (i = 0; i < type->structure.nfields; i++) + if (text_cmp(type->structure.fields[i].name, field) == 0) + return i; + return -1; + } + +###### propagate exec cases + + case Xfieldref: + { + struct fieldref *f = cast(fieldref, prog); + struct type *st = propagate_types(f->left, c, ok, NULL, 0); + + if (!st) + type_err(c, "error: unknown type for field access", f->left, + NULL, 0, NULL); + else if (st->prepare != structure_prepare) + type_err(c, "error: field reference attempted on %1, not a struct", + f->left, st, 0, NULL); + else if (f->index == -2) { + f->index = find_struct_index(st, f->name); + if (f->index < 0) + type_err(c, "error: cannot find requested field in %1", + f->left, st, 0, NULL); + } + if (f->index >= 0) { + struct type *ft = st->structure.fields[f->index].type; + if (!type_compat(type, ft, rules)) + type_err(c, "error: have %1 but need %2", prog, + ft, rules, type); + return ft; + } + break; + } + +###### interp exec cases + case Xfieldref: + { + struct fieldref *f = cast(fieldref, e); + struct value *lleft = linterp_exec(f->left); + lrv = &lleft->structure.fields[f->index]; + break; + } + +###### ast + struct fieldlist { + struct fieldlist *prev; + struct field f; + }; + +###### ast functions + static void free_fieldlist(struct fieldlist *f) + { + if (!f) + return; + free_fieldlist(f->prev); + free_value(f->f.init); + free(f); + } + +###### top level grammar + DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ { + struct type *t = + add_type(c, $2.txt, &structure_prototype); + int cnt = 0; + struct fieldlist *f; + + for (f = $3; f; f=f->prev) + cnt += 1; + + t->structure.nfields = cnt; + t->structure.fields = calloc(cnt, sizeof(struct field)); + f = $3; + while (cnt > 0) { + cnt -= 1; + t->structure.fields[cnt] = f->f; + f->f.init = val_prepare(Tnone); + f = f->prev; + } + } }$ + + $*fieldlist + FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $ SimpleFieldList Newlines ${ $0 = $prev = $ Field ${ $0 = $prev = $ IDENTIFIER : Type = Expression ${ { + int ok; + + $0 = calloc(1, sizeof(struct fieldlist)); + $0->f.name = $1.txt; + $0->f.type = $<3; + $0->f.init = val_prepare($0->f.type); + do { + ok = 1; + propagate_types($<5, c, &ok, $3, 0); + } while (ok == 2); + if (!ok) + c->parse_error = 1; + else + $0->f.init = interp_exec($5); + } }$ + | IDENTIFIER : Type ${ + $0 = calloc(1, sizeof(struct fieldlist)); + $0->f.name = $1.txt; + $0->f.type = $<3; + $0->f.init = val_init($3); + }$ + +###### forward decls + static void structure_print_type(struct type *t, FILE *f); + +###### value functions + static void structure_print_type(struct type *t, FILE *f) + { + int i; + + fprintf(f, "struct %.*s\n", t->name.len, t->name.txt); + + for (i = 0; i < t->structure.nfields; i++) { + struct field *fl = t->structure.fields + i; + fprintf(f, " %.*s : ", fl->name.len, fl->name.txt); + type_print(fl->type, f); + if (fl->init.type->print) { + fprintf(f, " = "); + if (fl->init.type == Tstr) + fprintf(f, "\""); + print_value(fl->init); + if (fl->init.type == Tstr) + fprintf(f, "\""); + } + printf("\n"); + } } -## Language elements +###### print type decls + { + struct type *t; + int target = -1; + + while (target != 0) { + int i = 0; + for (t = context.typelist; t ; t=t->next) + if (t->print_type_decl) { + i += 1; + if (i == target) + break; + } -Each language element needs to be parsed, printed, analysed, + if (target == -1) { + target = i; + } else { + t->print_type_decl(t, stdout); + target -= 1; + } + } + } + +## Executables: the elements of code + +Each code element needs to be parsed, printed, analysed, interpreted, and freed. There are several, so let's just start with the easy ones and work our way up. @@ -1526,7 +2156,7 @@ an executable. if (number_parse($0->val.num, tail, $1.txt) == 0) mpq_init($0->val.num); if (tail[0]) - tok_err(config2context(config), "error: unsupported number suffix", + tok_err(c, "error: unsupported number suffix", &$1); } }$ @@ -1537,7 +2167,7 @@ an executable. char tail[3]; string_parse(&$1, '\\', &$0->val.str, tail); if (tail[0]) - tok_err(config2context(config), "error: unsupported string suffix", + tok_err(c, "error: unsupported string suffix", &$1); } }$ @@ -1548,7 +2178,7 @@ an executable. char tail[3]; string_parse(&$1, '\\', &$0->val.str, tail); if (tail[0]) - tok_err(config2context(config), "error: unsupported string suffix", + tok_err(c, "error: unsupported string suffix", &$1); } }$ @@ -1566,20 +2196,19 @@ an executable. } ###### propagate exec cases - case Xval: - { - struct val *val = cast(val, prog); - if (!type_compat(type, val->val.type, rules)) { - type_err(c, "error: expected %1%r found %2", - prog, type, rules, val->val.type); - *ok = 0; - } - return val->val.type; - } + case Xval: + { + struct val *val = cast(val, prog); + if (!type_compat(type, val->val.type, rules)) + type_err(c, "error: expected %1%r found %2", + prog, type, rules, val->val.type); + return val->val.type; + } ###### interp exec cases case Xval: - return dup_value(cast(val, e)->val); + rv = dup_value(cast(val, e)->val); + break; ###### ast functions static void free_val(struct val *v) @@ -1636,38 +2265,38 @@ link to find the primary instance. $*var VariableDecl -> IDENTIFIER : ${ { - struct variable *v = var_decl(config2context(config), $1.txt); + struct variable *v = var_decl(c, $1.txt); $0 = new_pos(var, $1); $0->var = v; if (v) v->where_decl = $0; else { - v = var_ref(config2context(config), $1.txt); + v = var_ref(c, $1.txt); $0->var = v; - type_err(config2context(config), "error: variable '%v' redeclared", - $0, Tnone, 0, Tnone); - type_err(config2context(config), "info: this is where '%v' was first declared", - v->where_decl, Tnone, 0, Tnone); + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); } } }$ | IDENTIFIER :: ${ { - struct variable *v = var_decl(config2context(config), $1.txt); + struct variable *v = var_decl(c, $1.txt); $0 = new_pos(var, $1); $0->var = v; if (v) { v->where_decl = $0; v->constant = 1; } else { - v = var_ref(config2context(config), $1.txt); + v = var_ref(c, $1.txt); $0->var = v; - type_err(config2context(config), "error: variable '%v' redeclared", - $0, Tnone, 0, Tnone); - type_err(config2context(config), "info: this is where '%v' was first declared", - v->where_decl, Tnone, 0, Tnone); + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); } } }$ | IDENTIFIER : Type ${ { - struct variable *v = var_decl(config2context(config), $1.txt); + struct variable *v = var_decl(c, $1.txt); $0 = new_pos(var, $1); $0->var = v; if (v) { @@ -1675,16 +2304,16 @@ link to find the primary instance. v->where_set = $0; v->val = val_prepare($<3); } else { - v = var_ref(config2context(config), $1.txt); + v = var_ref(c, $1.txt); $0->var = v; - type_err(config2context(config), "error: variable '%v' redeclared", - $0, Tnone, 0, Tnone); - type_err(config2context(config), "info: this is where '%v' was first declared", - v->where_decl, Tnone, 0, Tnone); + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); } } }$ | IDENTIFIER :: Type ${ { - struct variable *v = var_decl(config2context(config), $1.txt); + struct variable *v = var_decl(c, $1.txt); $0 = new_pos(var, $1); $0->var = v; if (v) { @@ -1693,40 +2322,43 @@ link to find the primary instance. v->val = val_prepare($<3); v->constant = 1; } else { - v = var_ref(config2context(config), $1.txt); + v = var_ref(c, $1.txt); $0->var = v; - type_err(config2context(config), "error: variable '%v' redeclared", - $0, Tnone, 0, Tnone); - type_err(config2context(config), "info: this is where '%v' was first declared", - v->where_decl, Tnone, 0, Tnone); + type_err(c, "error: variable '%v' redeclared", + $0, NULL, 0, NULL); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); } } }$ + $*exec Variable -> IDENTIFIER ${ { - struct variable *v = var_ref(config2context(config), $1.txt); + struct variable *v = var_ref(c, $1.txt); $0 = new_pos(var, $1); if (v == NULL) { /* This might be a label - allocate a var just in case */ - v = var_decl(config2context(config), $1.txt); + v = var_decl(c, $1.txt); if (v) { - v->val = val_prepare(Tlabel); - v->val.label = &v->val; + v->val = val_prepare(Tnone); + v->where_decl = $0; v->where_set = $0; } } - $0->var = v; + cast(var, $0)->var = v; } }$ + ## variable grammar $*type Type -> IDENTIFIER ${ - $0 = find_type(config2context(config), $1.txt); + $0 = find_type(c, $1.txt); if (!$0) { - tok_err(config2context(config), + tok_err(c, "error: undefined type", &$1); $0 = Tnone; } }$ + ## type grammar ###### print exec cases case Xvar: @@ -1747,9 +2379,9 @@ link to find the primary instance. struct binding *b = v->var->name; fprintf(stderr, "%.*s", b->name.len, b->name.txt); } else - fputs("???", stderr); + fputs("???", stderr); // NOTEST } else - fputs("NOTVAR", stderr); + fputs("NOTVAR", stderr); // NOTEST break; ###### propagate exec cases @@ -1759,9 +2391,8 @@ link to find the primary instance. struct var *var = cast(var, prog); struct variable *v = var->var; if (!v) { - type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone); - *ok = 0; - return Tnone; + type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST + return Tnone; // NOTEST } if (v->merged) v = v->merged; @@ -1770,9 +2401,11 @@ link to find the primary instance. prog, NULL, 0, NULL); type_err(c, "info: name was defined as a constant here", v->where_decl, NULL, 0, NULL); - *ok = 0; return v->val.type; } + if (v->val.type == Tnone && v->where_decl == prog) + type_err(c, "error: variable used but not declared: %v", + prog, NULL, 0, NULL); if (v->val.type == NULL) { if (type && *ok != 0) { v->val = val_prepare(type); @@ -1785,8 +2418,7 @@ link to find the primary instance. type_err(c, "error: expected %1%r but variable '%v' is %2", prog, type, rules, v->val.type); type_err(c, "info: this is where '%v' was set to %1", v->where_set, - v->val.type, rules, Tnone); - *ok = 0; + v->val.type, rules, NULL); } if (!type) return v->val.type; @@ -1801,7 +2433,8 @@ link to find the primary instance. if (v->merged) v = v->merged; - return dup_value(v->val); + lrv = &v->val; + break; } ###### ast functions @@ -1814,77 +2447,190 @@ link to find the primary instance. ###### free exec cases case Xvar: free_var(cast(var, e)); break; +### Expressions: Conditional + +Our first user of the `binode` will be conditional expressions, which +is a bit odd as they actually have three components. That will be +handled by having 2 binodes for each expression. The conditional +expression is the lowest precedence operatior, so it gets to define +what an "Expression" is. The next level up is "BoolExpr", which +comes next. + +Conditional expressions are of the form "value `if` condition `else` +other_value". They associate to the right, so everything to the right +of `else` is part of an else value, while only the BoolExpr to the +left of `if` is the if values. Between `if` and `else` there is no +room for ambiguity, so a full conditional expression is allowed in there. + +###### Binode types + CondExpr, + +###### Grammar + + $LEFT if $$ifelse + ## expr precedence + + $*exec + Expression -> Expression if Expression else Expression $$ifelse ${ { + struct binode *b1 = new(binode); + struct binode *b2 = new(binode); + b1->op = CondExpr; + b1->left = $<3; + b1->right = b2; + b2->op = CondExpr; + b2->left = $<1; + b2->right = $<5; + $0 = b1; + } }$ + ## expression grammar + +###### print binode cases + + case CondExpr: + b2 = cast(binode, b->right); + if (bracket) printf("("); + print_exec(b2->left, -1, bracket); + printf(" if "); + print_exec(b->left, -1, bracket); + printf(" else "); + print_exec(b2->right, -1, bracket); + if (bracket) printf(")"); + break; + +###### propagate binode cases + + case CondExpr: { + /* cond must be Tbool, others must match */ + struct binode *b2 = cast(binode, b->right); + struct type *t2; + + propagate_types(b->left, c, ok, Tbool, 0); + t = propagate_types(b2->left, c, ok, type, Rnolabel); + t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel); + return t ?: t2; + } + +###### interp binode cases + + case CondExpr: { + struct binode *b2 = cast(binode, b->right); + left = interp_exec(b->left); + if (left.bool) + rv = interp_exec(b2->left); + else + rv = interp_exec(b2->right); + } + break; + ### Expressions: Boolean -Our first user of the `binode` will be expressions, and particularly -Boolean expressions. As I haven't implemented precedence in the -parser generator yet, we need different names for each precedence -level used by expressions. The outer most or lowest level precedence -are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s -and `BFact`s. +The next class of expressions to use the `binode` will be Boolean +expressions. As I haven't implemented precedence in the parser +generator yet, we need different names for each precedence level used +by expressions. The outer most or lowest level precedence after +conditional expressions are Boolean operators which form an `BoolExpr` +out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we +have `and then` and `or else` which only evaluate the second operand +if the result would make a difference. ###### Binode types And, + AndThen, Or, + OrElse, Not, -###### Grammar +###### expr precedence + $LEFT or + $LEFT and + $LEFT not - $*exec - Expression -> Expression or BTerm ${ { +###### expression grammar + | Expression or Expression ${ { struct binode *b = new(binode); b->op = Or; b->left = $<1; b->right = $<3; $0 = b; } }$ - | BTerm ${ $0 = $<1; }$ + | Expression or else Expression ${ { + struct binode *b = new(binode); + b->op = OrElse; + b->left = $<1; + b->right = $<4; + $0 = b; + } }$ - BTerm -> BTerm and BFact ${ { + | Expression and Expression ${ { struct binode *b = new(binode); b->op = And; b->left = $<1; b->right = $<3; $0 = b; } }$ - | BFact ${ $0 = $<1; }$ + | Expression and then Expression ${ { + struct binode *b = new(binode); + b->op = AndThen; + b->left = $<1; + b->right = $<4; + $0 = b; + } }$ - BFact -> not BFact ${ { + | not Expression ${ { struct binode *b = new(binode); b->op = Not; b->right = $<2; $0 = b; } }$ - ## other BFact ###### print binode cases case And: - print_exec(b->left, -1, 0); + if (bracket) printf("("); + print_exec(b->left, -1, bracket); printf(" and "); - print_exec(b->right, -1, 0); + print_exec(b->right, -1, bracket); + if (bracket) printf(")"); + break; + case AndThen: + if (bracket) printf("("); + print_exec(b->left, -1, bracket); + printf(" and then "); + print_exec(b->right, -1, bracket); + if (bracket) printf(")"); break; case Or: - print_exec(b->left, -1, 0); + if (bracket) printf("("); + print_exec(b->left, -1, bracket); printf(" or "); - print_exec(b->right, -1, 0); + print_exec(b->right, -1, bracket); + if (bracket) printf(")"); + break; + case OrElse: + if (bracket) printf("("); + print_exec(b->left, -1, bracket); + printf(" or else "); + print_exec(b->right, -1, bracket); + if (bracket) printf(")"); break; case Not: + if (bracket) printf("("); printf("not "); - print_exec(b->right, -1, 0); + print_exec(b->right, -1, bracket); + if (bracket) printf(")"); break; ###### propagate binode cases case And: + case AndThen: case Or: + case OrElse: case Not: /* both must be Tbool, result is Tbool */ propagate_types(b->left, c, ok, Tbool, 0); propagate_types(b->right, c, ok, Tbool, 0); - if (type && type != Tbool) { + if (type && type != Tbool) type_err(c, "error: %1 operation found where %2 expected", prog, Tbool, 0, type); - *ok = 0; - } return Tbool; ###### interp binode cases @@ -1893,11 +2639,21 @@ and `BFact`s. right = interp_exec(b->right); rv.bool = rv.bool && right.bool; break; + case AndThen: + rv = interp_exec(b->left); + if (rv.bool) + rv = interp_exec(b->right); + break; case Or: rv = interp_exec(b->left); right = interp_exec(b->right); rv.bool = rv.bool || right.bool; break; + case OrElse: + rv = interp_exec(b->left); + if (!rv.bool) + rv = interp_exec(b->right); + break; case Not: rv = interp_exec(b->right); rv.bool = !rv.bool; @@ -1907,7 +2663,7 @@ and `BFact`s. Of slightly higher precedence that Boolean expressions are Comparisons. -A comparison takes arguments of any type, but the two types must be +A comparison takes arguments of any comparable type, but the two types must be the same. To simplify the parsing we introduce an `eop` which can record an @@ -1933,15 +2689,17 @@ expression operator. Eql, NEql, -###### other BFact - | Expr CMPop Expr ${ { - struct binode *b = new(binode); - b->op = $2.op; - b->left = $<1; - b->right = $<3; - $0 = b; +###### expr precedence + $LEFT < > <= >= == != CMPop + +###### expression grammar + | Expression CMPop Expression ${ { + struct binode *b = new(binode); + b->op = $2.op; + b->left = $<1; + b->right = $<3; + $0 = b; } }$ - | Expr ${ $0 = $<1; }$ ###### Grammar @@ -1961,7 +2719,8 @@ expression operator. case GtrEq: case Eql: case NEql: - print_exec(b->left, -1, 0); + if (bracket) printf("("); + print_exec(b->left, -1, bracket); switch(b->op) { case Less: printf(" < "); break; case LessEq: printf(" <= "); break; @@ -1969,9 +2728,10 @@ expression operator. case GtrEq: printf(" >= "); break; case Eql: printf(" == "); break; case NEql: printf(" != "); break; - default: abort(); + default: abort(); // NOTEST } - print_exec(b->right, -1, 0); + print_exec(b->right, -1, bracket); + if (bracket) printf(")"); break; ###### propagate binode cases @@ -1990,11 +2750,9 @@ expression operator. if (t) t = propagate_types(b->left, c, ok, t, 0); } - if (!type_compat(type, Tbool, 0)) { + if (!type_compat(type, Tbool, 0)) type_err(c, "error: Comparison returns %1 but %2 expected", prog, Tbool, rules, type); - *ok = 0; - } return Tbool; ###### interp binode cases @@ -2017,7 +2775,7 @@ expression operator. case GtrEq: rv.bool = cmp >= 0; break; case Eql: rv.bool = cmp == 0; break; case NEql: rv.bool = cmp != 0; break; - default: rv.bool = 0; break; + default: rv.bool = 0; break; // NOTEST } break; } @@ -2038,39 +2796,41 @@ precedence is handled better I might be able to discard this. ###### Binode types Plus, Minus, - Times, Divide, + Times, Divide, Rem, Concat, Absolute, Negate, Bracket, -###### Grammar +###### expr precedence + $LEFT + - Eop + $LEFT * / % ++ Top + $LEFT Uop + $TERM ( ) - $*exec - Expr -> Expr Eop Term ${ { +###### expression grammar + | Expression Eop Expression ${ { struct binode *b = new(binode); b->op = $2.op; b->left = $<1; b->right = $<3; $0 = b; } }$ - | Term ${ $0 = $<1; }$ - Term -> Term Top Factor ${ { + | Expression Top Expression ${ { struct binode *b = new(binode); b->op = $2.op; b->left = $<1; b->right = $<3; $0 = b; } }$ - | Factor ${ $0 = $<1; }$ - Factor -> ( Expression ) ${ { + | ( Expression ) ${ { struct binode *b = new_pos(binode, $1); b->op = Bracket; b->right = $<2; $0 = b; } }$ - | Uop Factor ${ { + | Uop Expression ${ { struct binode *b = new(binode); b->op = $1.op; b->right = $<2; @@ -2088,6 +2848,7 @@ precedence is handled better I might be able to discard this. Top -> * ${ $0.op = Times; }$ | / ${ $0.op = Divide; }$ + | % ${ $0.op = Rem; }$ | ++ ${ $0.op = Concat; }$ ###### print binode cases @@ -2096,28 +2857,36 @@ precedence is handled better I might be able to discard this. case Times: case Divide: case Concat: - print_exec(b->left, indent, 0); + case Rem: + if (bracket) printf("("); + print_exec(b->left, indent, bracket); switch(b->op) { - case Plus: printf(" + "); break; - case Minus: printf(" - "); break; - case Times: printf(" * "); break; - case Divide: printf(" / "); break; - case Concat: printf(" ++ "); break; - default: abort(); - } - print_exec(b->right, indent, 0); + case Plus: fputs(" + ", stdout); break; + case Minus: fputs(" - ", stdout); break; + case Times: fputs(" * ", stdout); break; + case Divide: fputs(" / ", stdout); break; + case Rem: fputs(" % ", stdout); break; + case Concat: fputs(" ++ ", stdout); break; + default: abort(); // NOTEST + } // NOTEST + print_exec(b->right, indent, bracket); + if (bracket) printf(")"); break; case Absolute: + if (bracket) printf("("); printf("+"); - print_exec(b->right, indent, 0); + print_exec(b->right, indent, bracket); + if (bracket) printf(")"); break; case Negate: + if (bracket) printf("("); printf("-"); - print_exec(b->right, indent, 0); + print_exec(b->right, indent, bracket); + if (bracket) printf(")"); break; case Bracket: printf("("); - print_exec(b->right, indent, 0); + print_exec(b->right, indent, bracket); printf(")"); break; @@ -2125,6 +2894,7 @@ precedence is handled better I might be able to discard this. case Plus: case Minus: case Times: + case Rem: case Divide: /* both must be numbers, result is Tnum */ case Absolute: @@ -2133,22 +2903,18 @@ precedence is handled better I might be able to discard this. * unary ops fit here too */ propagate_types(b->left, c, ok, Tnum, 0); propagate_types(b->right, c, ok, Tnum, 0); - if (!type_compat(type, Tnum, 0)) { + if (!type_compat(type, Tnum, 0)) type_err(c, "error: Arithmetic returns %1 but %2 expected", prog, Tnum, rules, type); - *ok = 0; - } return Tnum; case Concat: /* both must be Tstr, result is Tstr */ propagate_types(b->left, c, ok, Tstr, 0); propagate_types(b->right, c, ok, Tstr, 0); - if (!type_compat(type, Tstr, 0)) { + if (!type_compat(type, Tstr, 0)) type_err(c, "error: Concat returns %1 but %2 expected", prog, Tstr, rules, type); - *ok = 0; - } return Tstr; case Bracket: @@ -2176,6 +2942,20 @@ precedence is handled better I might be able to discard this. right = interp_exec(b->right); mpq_div(rv.num, rv.num, right.num); break; + case Rem: { + mpz_t l, r, rem; + + left = interp_exec(b->left); + right = interp_exec(b->right); + mpz_init(l); mpz_init(r); mpz_init(rem); + mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num)); + mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num)); + mpz_tdiv_r(rem, l, r); + rv = val_init(Tnum); + mpq_set_z(rv.num, rem); + mpz_clear(r); mpz_clear(l); mpz_clear(rem); + break; + } case Negate: rv = interp_exec(b->right); mpq_neg(rv.num, rv.num); @@ -2194,7 +2974,6 @@ precedence is handled better I might be able to discard this. rv.str = text_join(left.str, right.str); break; - ###### value functions static struct text text_join(struct text a, struct text b) @@ -2207,12 +2986,11 @@ precedence is handled better I might be able to discard this. return rv; } - ### Blocks, Statements, and Statement lists. Now that we have expressions out of the way we need to turn to statements. There are simple statements and more complex statements. -Simple statements do not contain newlines, complex statements do. +Simple statements do not contain (syntactic) newlines, complex statements do. Statements often come in sequences and we have corresponding simple statement lists and complex statement lists. @@ -2264,46 +3042,63 @@ is in-place. ###### Binode types Block, -###### Grammar +###### expr precedence + $TERM pass - $void - OptNL -> Newlines - | - - Newlines -> NEWLINE - | Newlines NEWLINE +###### Grammar $*binode - Open -> { - | NEWLINE { - Close -> } - | NEWLINE } - Block -> Open Statementlist Close ${ $0 = $<2; }$ - | Open Newlines Statementlist Close ${ $0 = $<3; }$ - | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$ - | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$ - | : Statementlist ${ $0 = $<2; }$ - | : SimpleStatements ${ $0 = reorder_bilist($<2); }$ - - Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$ + Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $ OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $ { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $ { IN OptNL Statementlist OUT OptNL } ${ $0 = $ ComplexStatements ${ $0 = reorder_bilist($ ComplexStatements ComplexStatement ${ - $0 = new(binode); - $0->op = Block; - $0->left = $<1; - $0->right = $<2; + if ($2 == NULL) { + $0 = $<1; + } else { + $0 = new(binode); + $0->op = Block; + $0->left = $<1; + $0->right = $<2; + } }$ - | ComplexStatements NEWLINE ${ $0 = $<1; }$ | ComplexStatement ${ - $0 = new(binode); - $0->op = Block; - $0->left = NULL; - $0->right = $<1; + if ($1 == NULL) { + $0 = NULL; + } else { + $0 = new(binode); + $0->op = Block; + $0->left = NULL; + $0->right = $<1; + } }$ $*exec - ComplexStatement -> SimpleStatements NEWLINE ${ - $0 = reorder_bilist($<1); + ComplexStatement -> SimpleStatements Newlines ${ + $0 = reorder_bilist($left = NULL; $0->right = $<1; }$ - | SimpleStatements ; ${ $0 = $<1; }$ SimpleStatement -> pass ${ $0 = NULL; }$ + | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$ ## SimpleStatement Grammar ###### print binode cases @@ -2332,10 +3127,10 @@ is in-place. if (b->left == NULL) printf("pass"); else - print_exec(b->left, indent, 0); + print_exec(b->left, indent, bracket); if (b->right) { printf("; "); - print_exec(b->right, indent, 0); + print_exec(b->right, indent, bracket); } } else { // block, one per line @@ -2367,11 +3162,9 @@ is in-place. if (t && t != Tnone && t != Tbool) { if (!type) type = t; - else if (t != type) { + else if (t != type) type_err(c, "error: expected %1%r, found %2", e->left, type, rules, t); - *ok = 0; - } } } return type; @@ -2399,6 +3192,9 @@ same solution. ###### Binode types Print, +##### expr precedence + $TERM print , + ###### SimpleStatement Grammar | print ExpressionList ${ @@ -2440,7 +3236,7 @@ same solution. while (b) { if (b->left) { printf(" "); - print_exec(b->left, -1, 0); + print_exec(b->left, -1, bracket); if (b->right) printf(","); } @@ -2511,7 +3307,8 @@ it is declared, and error will be raised as the name is created as | VariableDecl ${ if ($1->var->where_set == NULL) { - type_err(config2context(config), "Variable declared with no type or value: %v", + type_err(c, + "Variable declared with no type or value: %v", $1, NULL, 0, NULL); } else { $0 = new(binode); @@ -2525,9 +3322,9 @@ it is declared, and error will be raised as the name is created as case Assign: do_indent(indent, ""); - print_exec(b->left, indent, 0); + print_exec(b->left, indent, bracket); printf(" = "); - print_exec(b->right, indent, 0); + print_exec(b->right, indent, bracket); if (indent >= 0) printf("\n"); break; @@ -2536,7 +3333,7 @@ it is declared, and error will be raised as the name is created as { struct variable *v = cast(var, b->left)->var; do_indent(indent, ""); - print_exec(b->left, indent, 0); + print_exec(b->left, indent, bracket); if (cast(var, b->left)->var->constant) { if (v->where_decl == v->where_set) { printf("::"); @@ -2554,7 +3351,7 @@ it is declared, and error will be raised as the name is created as } if (b->right) { printf("= "); - print_exec(b->right, indent, 0); + print_exec(b->right, indent, bracket); } if (indent >= 0) printf("\n"); @@ -2579,17 +3376,15 @@ it is declared, and error will be raised as the name is created as if (propagate_types(b->right, c, ok, t, 0) != t) if (b->left->type == Xvar) type_err(c, "info: variable '%v' was set as %1 here.", - cast(var, b->left)->var->where_set, t, rules, Tnone); + cast(var, b->left)->var->where_set, t, rules, NULL); } else { t = propagate_types(b->right, c, ok, NULL, Rnolabel); if (t) propagate_types(b->left, c, ok, t, (b->op == Assign ? Rnoconstant : 0)); } - if (t && t->dup == NULL) { + if (t && t->dup == NULL) type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL); - *ok = 0; - } return Tnone; break; @@ -2597,16 +3392,15 @@ it is declared, and error will be raised as the name is created as ###### interp binode cases case Assign: - { - struct variable *v = cast(var, b->left)->var; - if (v->merged) - v = v->merged; + lleft = linterp_exec(b->left); right = interp_exec(b->right); - free_value(v->val); - v->val = right; + if (lleft) { + free_value(*lleft); + *lleft = right; + } else + free_value(right); // NOTEST right.type = NULL; break; - } case Declare: { @@ -2633,18 +3427,29 @@ function. ###### Binode types Use, +###### expr precedence + $TERM use + ###### SimpleStatement Grammar | use Expression ${ $0 = new_pos(binode, $1); $0->op = Use; $0->right = $<2; + if ($0->right->type == Xvar) { + struct var *v = cast(var, $0->right); + if (v->var->val.type == Tnone) { + /* Convert this to a label */ + v->var->val = val_prepare(Tlabel); + v->var->val.label = &v->var->val; + } + } }$ ###### print binode cases case Use: do_indent(indent, "use "); - print_exec(b->right, -1, 0); + print_exec(b->right, -1, bracket); if (indent >= 0) printf("\n"); break; @@ -2773,154 +3578,145 @@ defined. ###### ComplexStatement Grammar | CondStatement ${ $0 = $<1; }$ +###### expr precedence + $TERM for then while do + $TERM else + $TERM switch case + ###### Grammar $*cond_statement - // both ForThen and Whilepart open scopes, and CondSuffix only + // A CondStatement must end with EOL, as does CondSuffix and + // IfSuffix. + // ForPart, ThenPart, SwitchPart, CasePart are non-empty and + // may or may not end with EOL + // WhilePart and IfPart include an appropriate Suffix + + + // Both ForPart and Whilepart open scopes, and CondSuffix only // closes one - so in the first branch here we have another to close. - CondStatement -> ForThen WhilePart CondSuffix ${ - $0 = $<3; - $0->forpart = $1.forpart; $1.forpart = NULL; - $0->thenpart = $1.thenpart; $1.thenpart = NULL; - $0->condpart = $2.condpart; $2.condpart = NULL; - $0->dopart = $2.dopart; $2.dopart = NULL; - var_block_close(config2context(config), CloseSequential); + CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${ + $0 = $forpart = $thenpart = $condpart = $WP.condpart; $WP.condpart = NULL; + $0->dopart = $WP.dopart; $WP.dopart = NULL; + var_block_close(c, CloseSequential); + }$ + | ForPart OptNL WhilePart CondSuffix ${ + $0 = $forpart = $condpart = $WP.condpart; $WP.condpart = NULL; + $0->dopart = $WP.dopart; $WP.dopart = NULL; + var_block_close(c, CloseSequential); }$ | WhilePart CondSuffix ${ - $0 = $<2; - $0->condpart = $1.condpart; $1.condpart = NULL; - $0->dopart = $1.dopart; $1.dopart = NULL; + $0 = $condpart = $WP.condpart; $WP.condpart = NULL; + $0->dopart = $WP.dopart; $WP.dopart = NULL; + }$ + | SwitchPart OptNL CasePart CondSuffix ${ + $0 = $condpart = $next = $0->casepart; + $0->casepart = $condpart = $<1; + | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${ + $0 = $condpart = $next = $0->casepart; + $0->casepart = $condpart = $1.condpart; $1.condpart = NULL; - $0->thenpart = $1.thenpart; $1.thenpart = NULL; + $0 = $condpart = $IP.condpart; $IP.condpart = NULL; + $0->thenpart = $IP.thenpart; $IP.thenpart = NULL; // This is where we close an "if" statement - var_block_close(config2context(config), CloseSequential); + var_block_close(c, CloseSequential); }$ CondSuffix -> IfSuffix ${ $0 = $<1; // This is where we close scope of the whole // "for" or "while" statement - var_block_close(config2context(config), CloseSequential); - }$ - | CasePart CondSuffix ${ - $0 = $<2; - $1->next = $0->casepart; - $0->casepart = $<1; + var_block_close(c, CloseSequential); }$ - - $*casepart - CasePart -> Newlines case Expression OpenScope Block ${ - $0 = calloc(1,sizeof(struct casepart)); - $0->value = $<3; - $0->action = $<5; - var_block_close(config2context(config), CloseParallel); + | Newlines CasePart CondSuffix ${ + $0 = $next = $0->casepart; + $0->casepart = $value = $<2; - $0->action = $<4; - var_block_close(config2context(config), CloseParallel); + | CasePart CondSuffix ${ + $0 = $next = $0->casepart; + $0->casepart = $ Newlines ${ $0 = new(cond_statement); }$ - | Newlines else OpenScope Block ${ - $0 = new(cond_statement); - $0->elsepart = $<4; - var_block_close(config2context(config), CloseElse); - }$ - | else OpenScope Block ${ - $0 = new(cond_statement); - $0->elsepart = $<3; - var_block_close(config2context(config), CloseElse); - }$ - | Newlines else OpenScope CondStatement ${ + | Newlines ElsePart ${ $0 = $ else OpenBlock Newlines ${ $0 = new(cond_statement); - $0->elsepart = $<4; - var_block_close(config2context(config), CloseElse); + $0->elsepart = $elsepart = $<3; - var_block_close(config2context(config), CloseElse); + $0->elsepart = $ case Expression OpenScope ColonBlock ${ + $0 = calloc(1,sizeof(struct casepart)); + $0->value = $action = $ for OpenScope SimpleStatements ${ - $0 = reorder_bilist($<3); - }$ - | for OpenScope Block ${ - $0 = $<3; + ForPart -> for OpenBlock ${ + $0 = $ then OpenScope SimpleStatements ${ - $0 = reorder_bilist($<3); - var_block_close(config2context(config), CloseSequential); - }$ - | then OpenScope Block ${ - $0 = $<3; - var_block_close(config2context(config), CloseSequential); - }$ - - ThenPartNL -> ThenPart OptNL ${ - $0 = $<1; - }$ - - // This scope is closed in CondSuffix - WhileHead -> while OpenScope Block ${ - $0 = $<3; + ThenPart -> then OpenBlock ${ + $0 = $ ForPart OptNL ThenPartNL ${ - $0.forpart = $<1; - $0.thenpart = $<3; - }$ - | ForPart OptNL ${ - $0.forpart = $<1; - }$ - // This scope is closed in CondSuffix - WhilePart -> while OpenScope Expression Block ${ - $0.type = Xcond_statement; - $0.condpart = $<3; - $0.dopart = $<4; + WhilePart -> while UseBlock OptNL do Block ${ + $0.condpart = $ if OpenScope Expression OpenScope Block ${ - $0.type = Xcond_statement; - $0.condpart = $<3; - $0.thenpart = $<5; - var_block_close(config2context(config), CloseParallel); + IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${ + $0.condpart = $ switch OpenScope Expression ${ - $0 = $<3; + $0 = $forpart) { do_indent(indent, "for"); - if (bracket) printf(" {\n"); else printf(":\n"); + if (bracket) printf(" {\n"); else printf("\n"); print_exec(cs->forpart, indent+1, bracket); if (cs->thenpart) { if (bracket) do_indent(indent, "} then {\n"); else - do_indent(indent, "then:\n"); + do_indent(indent, "then\n"); print_exec(cs->thenpart, indent+1, bracket); } if (bracket) do_indent(indent, "}\n"); @@ -2949,12 +3745,12 @@ defined. if (bracket) do_indent(indent, "while {\n"); else - do_indent(indent, "while:\n"); + do_indent(indent, "while\n"); print_exec(cs->condpart, indent+1, bracket); if (bracket) do_indent(indent, "} do {\n"); else - do_indent(indent, "do:\n"); + do_indent(indent, "do\n"); print_exec(cs->dopart, indent+1, bracket); if (bracket) do_indent(indent, "}\n"); @@ -3019,7 +3815,7 @@ defined. if (bracket) printf(" {\n"); else - printf(":\n"); + printf("\n"); print_exec(cs->elsepart, indent+1, bracket); if (bracket) do_indent(indent, "}\n"); @@ -3113,10 +3909,10 @@ defined. interp_exec(c->dopart); if (c->thenpart) { - v = interp_exec(c->thenpart); - if (v.type != Tnone || !c->dopart) - return v; - free_value(v); + rv = interp_exec(c->thenpart); + if (rv.type != Tnone || !c->dopart) + goto Xcond_done; + free_value(rv); } } while (c->dopart); @@ -3125,15 +3921,160 @@ defined. if (value_cmp(v, cnd) == 0) { free_value(v); free_value(cnd); - return interp_exec(cp->action); + rv = interp_exec(cp->action); + goto Xcond_done; } free_value(v); } free_value(cnd); if (c->elsepart) - return interp_exec(c->elsepart); - v.type = Tnone; - return v; + rv = interp_exec(c->elsepart); + else + rv.type = Tnone; + Xcond_done: + break; + } + +### Top level structure + +All the language elements so far can be used in various places. Now +it is time to clarify what those places are. + +At the top level of a file there will be a number of declarations. +Many of the things that can be declared haven't been described yet, +such as functions, procedures, imports, and probably more. +For now there are two sorts of things that can appear at the top +level. They are predefined constants, `struct` types, and the main +program. While the syntax will allow the main program to appear +multiple times, that will trigger an error if it is actually attempted. + +The various declarations do not return anything. They store the +various declarations in the parse context. + +###### Parser: grammar + + $void + Ocean -> OptNL DeclarationList + + OptNL -> + | OptNL NEWLINE + Newlines -> NEWLINE + | Newlines NEWLINE + + DeclarationList -> Declaration + | DeclarationList Declaration + + Declaration -> ERROR Newlines ${ + tok_err(c, + "error: unhandled parse error", &$1); + }$ + | DeclareConstant + | DeclareProgram + | DeclareStruct + + ## top level grammar + +### The `const` section + +As well as being defined in with the code that uses them, constants +can be declared at the top level. These have full-file scope, so they +are always `InScope`. The value of a top level constant can be given +as an expression, and this is evaluated immediately rather than in the +later interpretation stage. Once we add functions to the language, we +will need rules concern which, if any, can be used to define a top +level constant. + +Constants are defined in a section that starts with the reserved word +`const` and then has a block with a list of assignment statements. +For syntactic consistency, these must use the double-colon syntax to +make it clear that they are constants. Type can also be given: if +not, the type will be determined during analysis, as with other +constants. + +As the types constants are inserted at the head of a list, printing +them in the same order that they were read is not straight forward. +We take a quadratic approach here and count the number of constants +(variables of depth 0), then count down from there, each time +searching through for the Nth constant for decreasing N. + +###### top level grammar + + DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines + | const { SimpleConstList } Newlines + | const IN OptNL ConstList OUT Newlines + | const SimpleConstList Newlines + + ConstList -> ConstList SimpleConstLine + | SimpleConstLine + SimpleConstList -> SimpleConstList ; Const + | Const + | SimpleConstList ; + SimpleConstLine -> SimpleConstList Newlines + | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$ + + $*type + CType -> Type ${ $0 = $<1; }$ + | ${ $0 = NULL; }$ + $void + Const -> IDENTIFIER :: CType = Expression ${ { + int ok; + struct variable *v; + + v = var_decl(c, $1.txt); + if (v) { + struct var *var = new_pos(var, $1); + v->where_decl = var; + v->where_set = var; + var->var = v; + v->constant = 1; + } else { + v = var_ref(c, $1.txt); + tok_err(c, "error: name already declared", &$1); + type_err(c, "info: this is where '%v' was first declared", + v->where_decl, NULL, 0, NULL); + } + do { + ok = 1; + propagate_types($5, c, &ok, $3, 0); + } while (ok == 2); + if (!ok) + c->parse_error = 1; + else if (v) { + v->val = interp_exec($5); + } + } }$ + +###### print const decls + { + struct variable *v; + int target = -1; + + while (target != 0) { + int i = 0; + for (v = context.in_scope; v; v=v->in_scope) + if (v->depth == 0) { + i += 1; + if (i == target) + break; + } + + if (target == -1) { + if (i) + printf("const\n"); + target = i; + } else { + printf(" %.*s :: ", v->name->name.len, v->name->name.txt); + type_print(v->val.type, stdout); + printf(" = "); + if (v->val.type == Tstr) + printf("\""); + print_value(v->val); + if (v->val.type == Tstr) + printf("\""); + printf("\n"); + target -= 1; + } + } } ### Finally the whole program. @@ -3141,7 +4082,8 @@ defined. Somewhat reminiscent of Pascal a (current) Ocean program starts with the keyword "program" and a list of variable names which are assigned values from command line arguments. Following this is a `block` which -is the code to execute. +is the code to execute. Unlike Pascal, constants and other +declarations come *before* the program. As this is the top level, several things are handled a bit differently. @@ -3152,20 +4094,24 @@ analysis is a bit more interesting at this level. ###### Binode types Program, -###### Parser: grammar +###### top level grammar + + DeclareProgram -> Program ${ { + if (c->prog) + type_err(c, "Program defined a second time", + $1, NULL, 0, NULL); + else + c->prog = $<1; + } }$ $*binode - Program -> program OpenScope Varlist Block OptNL ${ + Program -> program OpenScope Varlist ColonBlock Newlines ${ $0 = new(binode); $0->op = Program; - $0->left = reorder_bilist($<3); - $0->right = $<4; - var_block_close(config2context(config), CloseSequential); - if (config2context(config)->scope_stack) abort(); - }$ - | ERROR ${ - tok_err(config2context(config), - "error: unhandled parse error", &$1); + $0->left = reorder_bilist($right = $scope_stack && !c->parse_error) abort(); }$ Varlist -> Varlist ArgDecl ${ @@ -3178,7 +4124,7 @@ analysis is a bit more interesting at this level. $*var ArgDecl -> IDENTIFIER ${ { - struct variable *v = var_decl(config2context(config), $1.txt); + struct variable *v = var_decl(c, $1.txt); $0 = new(var); $0->var = v; } }$ @@ -3202,7 +4148,7 @@ analysis is a bit more interesting at this level. break; ###### propagate binode cases - case Program: abort(); + case Program: abort(); // NOTEST ###### core functions @@ -3212,7 +4158,7 @@ analysis is a bit more interesting at this level. int ok = 1; if (!b) - return 0; + return 0; // NOTEST do { ok = 1; propagate_types(b->right, c, &ok, Tnone, 0); @@ -3247,7 +4193,7 @@ analysis is a bit more interesting at this level. struct value v; if (!prog) - return; + return; // NOTEST al = cast(binode, p->left); while (al) { struct var *v = cast(var, al->left); @@ -3269,59 +4215,75 @@ analysis is a bit more interesting at this level. } ###### interp binode cases - case Program: abort(); + case Program: abort(); // NOTEST ## And now to test it out. -Having a language requires having a "hello world" program. I'll +Having a language requires having a "hello world" program. I'll provide a little more than that: a program that prints "Hello world" finds the GCD of two numbers, prints the first few elements of -Fibonacci, and performs a binary search for a number. +Fibonacci, performs a binary search for a number, and a few other +things which will likely grow as the languages grows. ###### File: oceani.mk - tests :: sayhello + demos :: sayhello sayhello : oceani - @echo "===== TEST =====" - ./oceani --section "test: hello" oceani.mdc 55 33 + @echo "===== DEMO =====" + ./oceani --section "demo: hello" oceani.mdc 55 33 + +###### demo: hello + + const + pi ::= 3.141_592_6 + four ::= 2 + 2 ; five ::= 10/2 + const pie ::= "I like Pie"; + cake ::= "The cake is" + ++ " a lie" -###### test: hello + struct fred + size:[four]number + name:string + alive:Boolean program A B: print "Hello World, what lovely oceans you have!" + print "Are there", five, "?" + print pi, pie, "but", cake + /* When a variable is defined in both branches of an 'if', * and used afterwards, the variables are merged. */ if A > B: bigger := "yes" - else: + else bigger := "no" print "Is", A, "bigger than", B,"? ", bigger /* If a variable is not used after the 'if', no * merge happens, so types can be different */ - if A * 2 > B: + if A > B * 2: double:string = "yes" print A, "is more than twice", B, "?", double - else: - double := A*2 - print "double", A, "is only", double + else + double := B*2 + print "double", B, "is", double a : number a = A; b:number = B - if a > 0 and b > 0: + if a > 0 and then b > 0: while a != b: if a < b: b = b - a - else: + else a = a - b print "GCD of", A, "and", B,"is", a else if a <= 0: print a, "is not positive, cannot calculate GCD" - else: + else print b, "is not positive, cannot calculate GCD" - for: + for togo := 10 f1 := 1; f2 := 1 print "Fibonacci:", f1,f2, @@ -3334,22 +4296,54 @@ Fibonacci, and performs a binary search for a number. print "" /* Binary search... */ - for: + for lo:= 0; hi := 100 target := 77 - while: + while mid := (lo + hi) / 2 if mid == target: use Found if mid < target: lo = mid - else: + else hi = mid if hi - lo < 1: use GiveUp use True - do: pass + do pass case Found: print "Yay, I found", target case GiveUp: print "Closest I found was", mid + + size::= 10 + list:[size]number + list[0] = 1234 + // "middle square" PRNG. Not particularly good, but one my + // Dad taught me - the first one I ever heard of. + for i:=1; then i = i + 1; while i < size: + n := list[i-1] * list[i-1] + list[i] = (n / 100) % 10 000 + + print "Before sort:", + for i:=0; then i = i + 1; while i < size: + print "", list[i], + print + + for i := 1; then i=i+1; while i < size: + for j:=i-1; then j=j-1; while j >= 0: + if list[j] > list[j+1]: + t:= list[j] + list[j] = list[j+1] + list[j+1] = t + print " After sort:", + for i:=0; then i = i + 1; while i < size: + print "", list[i], + print + + if 1 == 2 then print "yes"; else print "no" + + bob:fred + bob.name = "Hello" + bob.alive = (bob.name == "Hello") + print "bob", "is" if bob.alive else "isn't", "alive"