operator which can select between two expressions based on a third
(which appears syntactically in the middle).
-Elements that are present purely to make a usable language, and
-without any expectation that they will remain, are the "program'
-clause, which provides a list of variables to received command-line
-arguments, and the "print" statement which performs simple output.
+The "func" clause currently only allows a "main" function to be
+declared. That will be extended when proper function support is added.
+
+An element that is present purely to make a usable language, and
+without any expectation that they will remain, is the "print" statement
+which performs simple output.
The current scalar types are "number", "Boolean", and "string".
Boolean will likely stay in its current form, the other two might, but
- Parse the program, possibly with tracing,
- Analyse the parsed program to ensure consistency,
- Print the program,
-- Execute the program, if no parsing or consistency errors were found.
+- Execute the "main" function in the program, if no parsing or
+ consistency errors were found.
This is all performed by a single C program extracted with
`parsergen`.
that uses bracketing. So a `--bracket` command line option is needed
for that. Normally the first code section found is used, however an
alternate section can be requested so that a file (such as this one)
-can contain multiple programs This is effected with the `--section`
+can contain multiple programs. This is effected with the `--section`
option.
This code must be compiled with `-fplan9-extensions` so that anonymous
###### Parser: header
## macros
+ struct parse_context;
## ast
struct parse_context {
struct token_config config;
## core functions
#include <getopt.h>
- static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
- "--section=SectionName prog.ocn\n";
+ static char Usage[] =
+ "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
static const struct option long_options[] = {
{"trace", 0, NULL, 't'},
{"print", 0, NULL, 'p'},
{NULL, 0, NULL, 0},
};
const char *options = "tpnbs";
+
+ static void pr_err(char *msg)
+ {
+ fprintf(stderr, "%s\n", msg); // NOTEST
+ }
+
int main(int argc, char *argv[])
{
int fd;
char *section = NULL;
struct parse_context context = {
.config = {
- .ignored = (1 << TK_line_comment)
- | (1 << TK_block_comment)
- | (1 << TK_mark),
+ .ignored = (1 << TK_mark),
.number_chars = ".,_+- ",
.word_start = "_",
.word_cont = "_",
context.file_name = argv[optind];
len = lseek(fd, 0, 2);
file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
- s = code_extract(file, file+len, NULL);
+ s = code_extract(file, file+len, pr_err);
if (!s) {
fprintf(stderr, "oceani: could not find any code in %s\n",
argv[optind]);
exit(1);
}
} else
- ss = s;
+ ss = s; // NOTEST
parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
if (!context.prog) {
- fprintf(stderr, "oceani: no program found.\n");
+ fprintf(stderr, "oceani: no main function found.\n");
context.parse_error = 1;
}
if (context.prog && doprint) {
fprintf(stderr, "oceani: type error in program - not running.\n");
exit(1);
}
- interp_prog(context.prog, argv+optind+1);
+ interp_prog(&context, context.prog, argc - optind, argv+optind);
}
free_exec(context.prog);
If the same variable is declared in both branchs of an 'if/else', or
in all cases of a 'switch' then the multiple instances may be merged
-into just one variable if the variable is references after the
+into just one variable if the variable is referenced after the
conditional statement. When this happens, the types must naturally be
consistent across all the branches. When the variable is not used
outside the if, the variables in the different branches are distinct
and can be of different types.
-Determining the types of all variables early is important for
-processing command line arguments. These can be assigned to any of
-several types of variable, but we must first know the correct type so
-any required conversion can happen. If a variable is associated with
-a command line argument but no type can be interpreted (e.g. the
-variable is only ever used in a `print` statement), then the type is
-set to 'string'.
-
Undeclared names may only appear in "use" statements and "case" expressions.
These names are given a type of "label" and a unique value.
This allows them to fill the role of a name in an enumerated type, which
element where its type was set. For now we will assume that each line
of an error message indicates one location in the file, and up to 2
types. So we provide a `printf`-like function which takes a format, a
-language (a `struct exec` which has not yet been introduced), and 2
+location (a `struct exec` which has not yet been introduced), and 2
types. "`%1`" reports the first type, "`%2`" reports the second. We
will need a function to print the location, once we know how that is
-stored. As will be explained later, there are sometimes extra rules for
+stored. e As will be explained later, there are sometimes extra rules for
type matching and they might affect error messages, we need to pass those
in too.
needs to know about to parse and execute a program. These include
types, variables, values, and executable code. These are all lumped
together under the term "entities" (calling them "objects" would be
-confusing) and introduced here. These will introduced and described
-here. The following section will present the different specific code
-elements which comprise or manipulate these various entities.
+confusing) and introduced here. The following section will present the
+different specific code elements which comprise or manipulate these
+various entities.
### Types
Values come in a wide range of types, with more likely to be added.
-Each type needs to be able to parse and print its own values (for
-convenience at least) as well as to compare two values, at least for
-equality and possibly for order. For now, values might need to be
-duplicated and freed, though eventually such manipulations will be
-better integrated into the language.
+Each type needs to be able to print its own values (for convenience at
+least) as well as to compare two values, at least for equality and
+possibly for order. For now, values might need to be duplicated and
+freed, though eventually such manipulations will be better integrated
+into the language.
Rather than requiring every numeric type to support all numeric
operations (add, multiple, etc), we allow types to be able to present
as one of a few standard types: integer, float, and fraction. The
-existence of these conversion functions eventaully enable types to
+existence of these conversion functions eventually enable types to
determine if they are compatible with other types, though such types
have not yet been implemented.
-Named type are stored in a simple linked list. Objects of each type are "values"
-which are often passed around by value.
+Named type are stored in a simple linked list. Objects of each type are
+"values" which are often passed around by value.
###### ast
struct value {
- struct type *type;
union {
+ char ptr[1];
## value union fields
};
};
struct type {
struct text name;
struct type *next;
- struct value (*init)(struct type *type);
- struct value (*prepare)(struct type *type);
- struct value (*parse)(struct type *type, char *str);
- void (*print)(struct value val);
+ int size, align;
+ void (*init)(struct type *type, struct value *val);
+ void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
+ void (*print)(struct type *type, struct value *val);
void (*print_type)(struct type *type, FILE *f);
- int (*cmp_order)(struct value v1, struct value v2);
- int (*cmp_eq)(struct value v1, struct value v2);
- struct value (*dup)(struct value val);
- void (*free)(struct value val);
+ int (*cmp_order)(struct type *t1, struct type *t2,
+ struct value *v1, struct value *v2);
+ int (*cmp_eq)(struct type *t1, struct type *t2,
+ struct value *v1, struct value *v2);
+ void (*dup)(struct type *type, struct value *vold, struct value *vnew);
+ void (*free)(struct type *type, struct value *val);
void (*free_type)(struct type *t);
- int (*compat)(struct type *this, struct type *other);
long long (*to_int)(struct value *v);
double (*to_float)(struct value *v);
int (*to_mpq)(mpq_t *q, struct value *v);
*/
}
- static void free_value(struct value v)
+ static void free_value(struct type *type, struct value *v)
{
- if (v.type)
- v.type->free(v);
- }
-
- static int type_compat(struct type *require, struct type *have, int rules)
- {
- if ((rules & Rboolok) && have == Tbool)
- return 1;
- if ((rules & Rnolabel) && have == Tlabel)
- return 0;
- if (!require || !have)
- return 1;
-
- if (require->compat)
- return require->compat(require, have);
-
- return require == have;
+ if (type && v)
+ type->free(type, v);
}
static void type_print(struct type *type, FILE *f)
{
if (!type)
- fputs("*unknown*type*", f);
+ fputs("*unknown*type*", f); // NOTEST
else if (type->name.len)
fprintf(f, "%.*s", type->name.len, type->name.txt);
else if (type->print_type)
fputs("*invalid*type*", f); // NOTEST
}
- static struct value val_prepare(struct type *type)
+ static void val_init(struct type *type, struct value *val)
{
- struct value rv;
-
- if (type)
- return type->prepare(type);
- rv.type = type;
- return rv;
+ if (type && type->init)
+ type->init(type, val);
}
- static struct value val_init(struct type *type)
+ static void dup_value(struct type *type,
+ struct value *vold, struct value *vnew)
{
- struct value rv;
-
- if (type)
- return type->init(type);
- rv.type = type;
- return rv;
+ if (type && type->dup)
+ type->dup(type, vold, vnew);
}
- static struct value dup_value(struct value v)
+ static int value_cmp(struct type *tl, struct type *tr,
+ struct value *left, struct value *right)
{
- if (v.type)
- return v.type->dup(v);
- return v;
+ if (tl && tl->cmp_order)
+ return tl->cmp_order(tl, tr, left, right);
+ if (tl && tl->cmp_eq)
+ return tl->cmp_eq(tl, tr, left, right);
+ return -1; // NOTEST
}
- static int value_cmp(struct value left, struct value right)
+ static void print_value(struct type *type, struct value *v)
{
- if (left.type && left.type->cmp_order)
- return left.type->cmp_order(left, right);
- if (left.type && left.type->cmp_eq)
- return left.type->cmp_eq(left, right);
- return -1;
- }
-
- static void print_value(struct value v)
- {
- if (v.type && v.type->print)
- v.type->print(v);
+ if (type && type->print)
+ type->print(type, v);
else
printf("*Unknown*"); // NOTEST
}
- static struct value parse_value(struct type *type, char *arg)
- {
- struct value rv;
-
- if (type && type->parse)
- return type->parse(type, arg);
- rv.type = NULL; // NOTEST
- return rv; // NOTEST
- }
-
###### forward decls
- static void free_value(struct value v);
+ static void free_value(struct type *type, struct value *v);
static int type_compat(struct type *require, struct type *have, int rules);
static void type_print(struct type *type, FILE *f);
- static struct value val_init(struct type *type);
- static struct value dup_value(struct value v);
- static int value_cmp(struct value left, struct value right);
- static void print_value(struct value v);
- static struct value parse_value(struct type *type, char *arg);
+ static void val_init(struct type *type, struct value *v);
+ static void dup_value(struct type *type,
+ struct value *vold, struct value *vnew);
+ static int value_cmp(struct type *tl, struct type *tr,
+ struct value *left, struct value *right);
+ static void print_value(struct type *type, struct value *v);
###### free context types
free(t);
}
+Type can be specified for local variables, for fields in a structure,
+for formal parameters to functions, and possibly elsewhere. Different
+rules may apply in different contexts. As a minimum, a named type may
+always be used. Currently the type of a formal parameter can be
+different from types in other contexts, so we have a separate grammar
+symbol for those.
+
+###### Grammar
+
+ $*type
+ Type -> IDENTIFIER ${
+ $0 = find_type(c, $1.txt);
+ if (!$0) {
+ tok_err(c,
+ "error: undefined type", &$1);
+
+ $0 = Tnone;
+ }
+ }$
+ ## type grammar
+
+ FormalType -> Type ${ $0 = $<1; }$
+ ## formal type grammar
+
#### Base Types
Values of the base types can be numbers, which we represent as
primary type, and in others any type is acceptable except a label (`Vlabel`).
A separate function encoding these cases will simplify some code later.
-When assigning command line arguments to variables, we need to be able
-to parse each type from a string.
+###### type functions
+
+ int (*compat)(struct type *this, struct type *other);
+
+###### ast functions
+
+ static int type_compat(struct type *require, struct type *have, int rules)
+ {
+ if ((rules & Rboolok) && have == Tbool)
+ return 1;
+ if ((rules & Rnolabel) && have == Tlabel)
+ return 0;
+ if (!require || !have)
+ return 1;
-The distinction beteen "prepare" and "init" needs to be explained.
-"init" sets up an initial value, such as "zero" or the empty string.
-"prepare" simply prepares the data structure so that if "free" gets
-called on it, it won't do something silly. Normally a value will be
-stored after "prepare" but before "free", but this might not happen if
-there are errors.
+ if (require->compat)
+ return require->compat(require, have);
+
+ return require == have;
+ }
###### includes
#include <gmp.h>
- #include "string.h"
- #include "number.h"
+ #include "parse_string.h"
+ #include "parse_number.h"
###### libs
myLDLIBS := libnumber.o libstring.o -lgmp
###### value union fields
struct text str;
mpq_t num;
- int bool;
+ unsigned char bool;
void *label;
###### ast functions
- static void _free_value(struct value v)
+ static void _free_value(struct type *type, struct value *v)
{
- switch (v.type->vtype) {
+ if (!v)
+ return; // NOTEST
+ switch (type->vtype) {
case Vnone: break;
- case Vstr: free(v.str.txt); break;
- case Vnum: mpq_clear(v.num); break;
+ case Vstr: free(v->str.txt); break;
+ case Vnum: mpq_clear(v->num); break;
case Vlabel:
case Vbool: break;
}
###### value functions
- static struct value _val_prepare(struct type *type)
+ static void _val_init(struct type *type, struct value *val)
{
- struct value rv;
-
- rv.type = type;
- switch(type->vtype) {
- case Vnone:
- break;
- case Vnum:
- memset(&rv.num, 0, sizeof(rv.num));
- break;
- case Vstr:
- rv.str.txt = NULL;
- rv.str.len = 0;
- break;
- case Vbool:
- rv.bool = 0;
- break;
- case Vlabel:
- rv.label = NULL;
- break;
- }
- return rv;
- }
-
- static struct value _val_init(struct type *type)
- {
- struct value rv;
-
- rv.type = type;
switch(type->vtype) {
case Vnone: // NOTEST
break; // NOTEST
case Vnum:
- mpq_init(rv.num); break;
+ mpq_init(val->num); break;
case Vstr:
- rv.str.txt = malloc(1);
- rv.str.len = 0;
+ val->str.txt = malloc(1);
+ val->str.len = 0;
break;
case Vbool:
- rv.bool = 0;
+ val->bool = 0;
+ break;
+ case Vlabel:
+ val->label = NULL;
break;
- case Vlabel: // NOTEST
- rv.label = NULL; // NOTEST
- break; // NOTEST
}
- return rv;
}
- static struct value _dup_value(struct value v)
+ static void _dup_value(struct type *type,
+ struct value *vold, struct value *vnew)
{
- struct value rv;
- rv.type = v.type;
- switch (rv.type->vtype) {
+ switch (type->vtype) {
case Vnone: // NOTEST
break; // NOTEST
case Vlabel:
- rv.label = v.label;
+ vnew->label = vold->label;
break;
case Vbool:
- rv.bool = v.bool;
+ vnew->bool = vold->bool;
break;
case Vnum:
- mpq_init(rv.num);
- mpq_set(rv.num, v.num);
+ mpq_init(vnew->num);
+ mpq_set(vnew->num, vold->num);
break;
case Vstr:
- rv.str.len = v.str.len;
- rv.str.txt = malloc(rv.str.len);
- memcpy(rv.str.txt, v.str.txt, v.str.len);
+ vnew->str.len = vold->str.len;
+ vnew->str.txt = malloc(vnew->str.len);
+ memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
break;
}
- return rv;
}
- static int _value_cmp(struct value left, struct value right)
+ static int _value_cmp(struct type *tl, struct type *tr,
+ struct value *left, struct value *right)
{
int cmp;
- if (left.type != right.type)
- return left.type - right.type; // NOTEST
- switch (left.type->vtype) {
- case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
- case Vnum: cmp = mpq_cmp(left.num, right.num); break;
- case Vstr: cmp = text_cmp(left.str, right.str); break;
- case Vbool: cmp = left.bool - right.bool; break;
+ if (tl != tr)
+ return tl - tr; // NOTEST
+ switch (tl->vtype) {
+ case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
+ case Vnum: cmp = mpq_cmp(left->num, right->num); break;
+ case Vstr: cmp = text_cmp(left->str, right->str); break;
+ case Vbool: cmp = left->bool - right->bool; break;
case Vnone: cmp = 0; // NOTEST
}
return cmp;
}
- static void _print_value(struct value v)
+ static void _print_value(struct type *type, struct value *v)
{
- switch (v.type->vtype) {
+ switch (type->vtype) {
case Vnone: // NOTEST
printf("*no-value*"); break; // NOTEST
case Vlabel: // NOTEST
- printf("*label-%p*", v.label); break; // NOTEST
+ printf("*label-%p*", v->label); break; // NOTEST
case Vstr:
- printf("%.*s", v.str.len, v.str.txt); break;
+ printf("%.*s", v->str.len, v->str.txt); break;
case Vbool:
- printf("%s", v.bool ? "True":"False"); break;
+ printf("%s", v->bool ? "True":"False"); break;
case Vnum:
{
mpf_t fl;
mpf_init2(fl, 20);
- mpf_set_q(fl, v.num);
+ mpf_set_q(fl, v->num);
gmp_printf("%Fg", fl);
mpf_clear(fl);
break;
}
}
- static struct value _parse_value(struct type *type, char *arg)
- {
- struct value val;
- struct text tx;
- int neg = 0;
- char tail[3] = "";
-
- val.type = type;
- switch(type->vtype) {
- case Vlabel: // NOTEST
- case Vnone: // NOTEST
- val.type = NULL; // NOTEST
- break; // NOTEST
- case Vstr:
- val.str.len = strlen(arg);
- val.str.txt = malloc(val.str.len);
- memcpy(val.str.txt, arg, val.str.len);
- break;
- case Vnum:
- if (*arg == '-') {
- neg = 1;
- arg++;
- }
- tx.txt = arg; tx.len = strlen(tx.txt);
- if (number_parse(val.num, tail, tx) == 0)
- mpq_init(val.num);
- else if (neg)
- mpq_neg(val.num, val.num);
- if (tail[0]) {
- printf("Unsupported suffix: %s\n", arg);
- val.type = NULL;
- }
- break;
- case Vbool:
- if (strcasecmp(arg, "true") == 0 ||
- strcmp(arg, "1") == 0)
- val.bool = 1;
- else if (strcasecmp(arg, "false") == 0 ||
- strcmp(arg, "0") == 0)
- val.bool = 0;
- else {
- printf("Bad bool: %s\n", arg);
- val.type = NULL;
- }
- break;
- }
- return val;
- }
-
- static void _free_value(struct value v);
+ static void _free_value(struct type *type, struct value *v);
static struct type base_prototype = {
.init = _val_init,
- .prepare = _val_prepare,
- .parse = _parse_value,
.print = _print_value,
.cmp_order = _value_cmp,
.cmp_eq = _value_cmp,
static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
###### ast functions
- static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
+ static struct type *add_base_type(struct parse_context *c, char *n,
+ enum vtype vt, int size)
{
struct text txt = { n, strlen(n) };
struct type *t;
t = add_type(c, txt, &base_prototype);
t->vtype = vt;
+ t->size = size;
+ t->align = size > sizeof(void*) ? sizeof(void*) : size;
+ if (t->size & (t->align - 1))
+ t->size = (t->size | (t->align - 1)) + 1;
return t;
}
###### context initialization
- Tbool = add_base_type(&context, "Boolean", Vbool);
- Tstr = add_base_type(&context, "string", Vstr);
- Tnum = add_base_type(&context, "number", Vnum);
- Tnone = add_base_type(&context, "none", Vnone);
- Tlabel = add_base_type(&context, "label", Vlabel);
+ Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
+ Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
+ Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
+ Tnone = add_base_type(&context, "none", Vnone, 0);
+ Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
### Variables
-Variables are scoped named values. We store the names in a linked
-list of "bindings" sorted lexically, and use sequential search and
+Variables are scoped named values. We store the names in a linked list
+of "bindings" sorted in lexical order, and use sequential search and
insertion sort.
###### ast
###### ast
struct variable {
struct variable *previous;
- struct value val;
+ struct type *type;
struct binding *name;
struct exec *where_decl;// where name was declared
struct exec *where_set; // where type was set
the child-count of the parent frame is incremented. This child-count
is used to distinguish between the first of a set of parallel scopes,
in which declared variables must not be in scope, and subsequent
-branches, whether they must already be conditionally scoped.
+branches, whether they may already be conditionally scoped.
To push a new frame *before* any code in the frame is parsed, we need a
grammar reduction. This is most easily achieved with a grammar
element which derives the empty string, and creates the new scope when
-it is recognized. This can be placed, for example, between a keyword
+it is recognised. This can be placed, for example, between a keyword
like "if" and the code following it.
###### ast
$void
OpenScope -> ${ scope_push(c); }$
+ ClosePara -> ${ var_block_close(c, CloseParallel); }$
Each variable records a scope depth and is in one of four states:
struct variable *in_scope;
All variables with the same name are linked together using the
-'previous' link. Those variable that have
-been affirmatively merged all have a 'merged' pointer that points to
-one primary variable - the most recently declared instance. When
-merging variables, we need to also adjust the 'merged' pointer on any
-other variables that had previously been merged with the one that will
-no longer be primary.
+'previous' link. Those variable that have been affirmatively merged all
+have a 'merged' pointer that points to one primary variable - the most
+recently declared instance. When merging variables, we need to also
+adjust the 'merged' pointer on any other variables that had previously
+been merged with the one that will no longer be primary.
A variable that is no longer the most recent instance of a name may
still have "pending" scope, if it might still be merged with most
is found. Instead, they are detected and ignored when considering the
list of in_scope names.
+The storage of the value of a variable will be described later. For now
+we just need to know that when a variable goes out of scope, it might
+need to be freed. For this we need to be able to find it, so assume that
+`var_value()` will provide that.
+
###### variable fields
struct variable *merged;
if (primary->merged)
// shouldn't happen
- primary = primary->merged;
+ primary = primary->merged; // NOTEST
for (v = primary->previous; v; v=v->previous)
if (v == secondary || v == secondary->merged ||
}
}
+###### forward decls
+ static struct value *var_value(struct parse_context *c, struct variable *v);
+
###### free context vars
while (context.varlist) {
struct variable *t = v;
v = t->previous;
- free_value(t->val);
+ free_value(t->type, var_value(&context, t));
if (t->depth == 0)
// This is a global constant
free_exec(t->where_decl);
pending-scope variable. If the previous variable was conditionally
scoped, it and its homonyms becomes out-of-scope.
-When we parse a variable reference (including non-declarative
-assignment) we report an error if the name is not bound or is bound to
+When we parse a variable reference (including non-declarative assignment
+"foo = bar") we report an error if the name is not bound or is bound to
a pending-scope variable; update the scope if the name is bound to a
conditionally scoped variable; or just proceed normally if the named
variable is in scope.
v->scope = InScope;
v->in_scope = c->in_scope;
c->in_scope = v;
- v->val = val_prepare(NULL);
return v;
}
else if (v->previous &&
v->previous->scope == PendingScope)
v->scope = PendingScope;
- else if (v->val.type == Tlabel)
+ else if (v->type == Tlabel)
v->scope = PendingScope;
else if (v->name->var == v)
v->scope = OutScope;
for (v2 = v;
v2 && v2->scope == PendingScope;
v2 = v2->previous)
- if (v2->val.type != Tlabel)
+ if (v2->type != Tlabel)
v2->scope = OutScope;
break;
case OutScope: break;
}
break;
case CloseSequential:
- if (v->val.type == Tlabel)
+ if (v->type == Tlabel)
v->scope = PendingScope;
switch (v->scope) {
case InScope:
for (v2 = v;
v2 && v2->scope == PendingScope;
v2 = v2->previous)
- if (v2->val.type == Tlabel) {
+ if (v2->type == Tlabel) {
v2->scope = CondScope;
v2->min_depth = c->scope_depth;
} else
}
}
+#### Storing Values
+
+The value of a variable is store separately from the variable, on an
+analogue of a stack frame. There are (currently) two frames that can be
+active. A global frame which currently only stores constants, and a
+stacked frame which stores local variables. Each variable knows if it
+is global or not, and what its index into the frame is.
+
+Values in the global frame are known immediately they are relevant, so
+the frame needs to be reallocated as it grows so it can store those
+values. The local frame doesn't get values until the interpreted phase
+is started, so there is no need to allocate until the size is known.
+
+###### variable fields
+ short frame_pos;
+ short global;
+
+###### parse context
+
+ short global_size, global_alloc;
+ short local_size;
+ void *global, *local;
+
+###### ast functions
+
+ static struct value *var_value(struct parse_context *c, struct variable *v)
+ {
+ if (!v->global) {
+ if (!c->local || !v->type)
+ return NULL;
+ if (v->frame_pos + v->type->size > c->local_size) {
+ printf("INVALID frame_pos\n"); // NOTEST
+ exit(2); // NOTEST
+ }
+ return c->local + v->frame_pos;
+ }
+ if (c->global_size > c->global_alloc) {
+ int old = c->global_alloc;
+ c->global_alloc = (c->global_size | 1023) + 1024;
+ c->global = realloc(c->global, c->global_alloc);
+ memset(c->global + old, 0, c->global_alloc - old);
+ }
+ return c->global + v->frame_pos;
+ }
+
+ static struct value *global_alloc(struct parse_context *c, struct type *t,
+ struct variable *v, struct value *init)
+ {
+ struct value *ret;
+ struct variable scratch;
+
+ if (t->prepare_type)
+ t->prepare_type(c, t, 1);
+
+ if (c->global_size & (t->align - 1))
+ c->global_size = (c->global_size + t->align) & ~(t->align-1);
+ if (!v) {
+ v = &scratch;
+ v->type = t;
+ }
+ v->frame_pos = c->global_size;
+ v->global = 1;
+ c->global_size += v->type->size;
+ ret = var_value(c, v);
+ if (init)
+ memcpy(ret, init, t->size);
+ else
+ val_init(t, ret);
+ return ret;
+ }
+
+As global values are found -- struct field initializers, labels etc --
+`global_alloc()` is called to record the value in the global frame.
+
+When the program is fully parsed, we need to walk the list of variables
+to find any that weren't merged away and that aren't global, and to
+calculate the frame size and assign a frame position for each variable.
+For this we have `scope_finalize()`.
+
+###### ast functions
+
+ static void scope_finalize(struct parse_context *c)
+ {
+ struct binding *b;
+
+ for (b = c->varlist; b; b = b->next) {
+ struct variable *v;
+ for (v = b->var; v; v = v->previous) {
+ struct type *t = v->type;
+ if (v->merged && v->merged != v)
+ continue;
+ if (v->global)
+ continue;
+ if (c->local_size & (t->align - 1))
+ c->local_size = (c->local_size + t->align) & ~(t->align-1);
+ v->frame_pos = c->local_size;
+ c->local_size += v->type->size;
+ }
+ }
+ c->local = calloc(1, c->local_size);
+ }
+
+###### free context vars
+ free(context.global);
+ free(context.local);
+
### Executables
Executables can be lots of different things. In many cases an
executable is just an operation combined with one or two other
-executables. This allows for expressions and lists etc. Other times
-an executable is something quite specific like a constant or variable
-name. So we define a `struct exec` to be a general executable with a
-type, and a `struct binode` which is a subclass of `exec`, forms a
-node in a binary tree, and holds an operation. There will be other
-subclasses, and to access these we need to be able to `cast` the
-`exec` into the various other types.
+executables. This allows for expressions and lists etc. Other times an
+executable is something quite specific like a constant or variable name.
+So we define a `struct exec` to be a general executable with a type, and
+a `struct binode` which is a subclass of `exec`, forms a node in a
+binary tree, and holds an operation. There will be other subclasses,
+and to access these we need to be able to `cast` the `exec` into the
+various other types. The first field in any `struct exec` is the type
+from the `exec_types` enum.
###### macros
#define cast(structname, pointer) ({ \
}
if (loc->type == Xbinode)
return __fput_loc(cast(binode,loc)->left, f) ||
- __fput_loc(cast(binode,loc)->right, f);
- return 0;
+ __fput_loc(cast(binode,loc)->right, f); // NOTEST
+ return 0; // NOTEST
}
static void fput_loc(struct exec *loc, FILE *f)
{
fprintf(f, "??:??: "); // NOTEST
}
-Each different type of `exec` node needs a number of functions
-defined, a bit like methods. We must be able to be able to free it,
-print it, analyse it and execute it. Once we have specific `exec`
-types we will need to parse them too. Let's take this a bit more
-slowly.
+Each different type of `exec` node needs a number of functions defined,
+a bit like methods. We must be able to free it, print it, analyse it
+and execute it. Once we have specific `exec` types we will need to
+parse them too. Let's take this a bit more slowly.
#### Freeing
#### Analysing
-As discussed, analysis involves propagating type requirements around
-the program and looking for errors.
+As discussed, analysis involves propagating type requirements around the
+program and looking for errors.
So `propagate_types` is passed an expected type (being a `struct type`
pointer together with some `val_rules` flags) that the `exec` is
Interpreting an `exec` doesn't require anything but the `exec`. State
is stored in variables and each variable will be directly linked from
-within the `exec` tree. The exception to this is the whole `program`
-which needs to look at command line arguments. The `program` will be
+within the `exec` tree. The exception to this is the `main` function
+which needs to look at command line arguments. This function will be
interpreted separately.
-Each `exec` can return a value, which may be `Tnone` but must be
-non-NULL; Some `exec`s will return the location of a value, which can
-be updates. To support this, each exec case must store either a value
-in `val` or the pointer to a value in `lval`. If `lval` is set, but a
-simple value is required, `inter_exec()` will dereference `lval` to
-get the value.
+Each `exec` can return a value combined with a type in `struct lrval`.
+The type may be `Tnone` but must be non-NULL. Some `exec`s will return
+the location of a value, which can be updated, in `lval`. Others will
+set `lval` to NULL indicating that there is a value of appropriate type
+in `rval`.
###### core functions
struct lrval {
- struct value val, *lval;
+ struct type *type;
+ struct value rval, *lval;
};
- static struct lrval _interp_exec(struct exec *e);
+ static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
- static struct value interp_exec(struct exec *e)
+ static struct value interp_exec(struct parse_context *c, struct exec *e,
+ struct type **typeret)
{
- struct lrval ret = _interp_exec(e);
+ struct lrval ret = _interp_exec(c, e);
+ if (!ret.type) abort();
+ if (typeret)
+ *typeret = ret.type;
if (ret.lval)
- return dup_value(*ret.lval);
- else
- return ret.val;
+ dup_value(ret.type, ret.lval, &ret.rval);
+ return ret.rval;
}
- static struct value *linterp_exec(struct exec *e)
+ static struct value *linterp_exec(struct parse_context *c, struct exec *e,
+ struct type **typeret)
{
- struct lrval ret = _interp_exec(e);
+ struct lrval ret = _interp_exec(c, e);
+ if (ret.lval)
+ *typeret = ret.type;
+ else
+ free_value(ret.type, &ret.rval);
return ret.lval;
}
- static struct lrval _interp_exec(struct exec *e)
+ static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
{
struct lrval ret;
- struct value rv, *lrv = NULL;
- rv.type = Tnone;
+ struct value rv = {}, *lrv = NULL;
+ struct type *rvtype;
+
+ rvtype = ret.type = Tnone;
if (!e) {
ret.lval = lrv;
- ret.val = rv;
+ ret.rval = rv;
return ret;
}
{
struct binode *b = cast(binode, e);
struct value left, right, *lleft;
- left.type = right.type = Tnone;
+ struct type *ltype, *rtype;
+ ltype = rtype = Tnone;
switch (b->op) {
## interp binode cases
}
- free_value(left); free_value(right);
+ free_value(ltype, &left);
+ free_value(rtype, &right);
break;
}
## interp exec cases
}
ret.lval = lrv;
- ret.val = rv;
+ ret.rval = rv;
+ ret.type = rvtype;
return ret;
}
Arrays can be declared by giving a size and a type, as `[size]type' so
`freq:[26]number` declares `freq` to be an array of 26 numbers. The
-size can be an arbitrary expression which is evaluated when the name
-comes into scope.
+size can be either a literal number, or a named constant. Some day an
+arbitrary expression will be supported.
+
+As a formal parameter to a function, the array can be declared with a
+new variable as the size: `name:[size::number]string`. The `size`
+variable is set to the size of the array and must be a constant. As
+`number` is the only supported type, it can be left out:
+`name:[size::]string`.
Arrays cannot be assigned. When pointers are introduced we will also
introduce array slices which can refer to part or all of an array -
the assignment syntax will create a slice. For now, an array can only
ever be referenced by the name it is declared with. It is likely that
a "`copy`" primitive will eventually be define which can be used to
-make a copy of an array with controllable depth.
+make a copy of an array with controllable recursive depth.
+
+For now we have two sorts of array, those with fixed size either because
+it is given as a literal number or because it is a struct member (which
+cannot have a runtime-changing size), and those with a size that is
+determined at runtime - local variables with a const size. The former
+have their size calculated at parse time, the latter at run time.
+
+For the latter type, the `size` field of the type is the size of a
+pointer, and the array is reallocated every time it comes into scope.
+
+We differentiate struct fields with a const size from local variables
+with a const size by whether they are prepared at parse time or not.
###### type union fields
struct {
- int size;
+ int unspec; // size is unspecified - vsize must be set.
+ short size;
+ short static_size;
struct variable *vsize;
struct type *member;
} array;
###### value union fields
- struct {
- struct value *elmnts;
- } array;
+ void *array; // used if not static_size
###### value functions
- static struct value array_prepare(struct type *type)
+ static void array_prepare_type(struct parse_context *c, struct type *type,
+ int parse_time)
{
- struct value ret;
+ struct value *vsize;
+ mpz_t q;
+ if (!type->array.vsize || type->array.static_size)
+ return;
- ret.type = type;
- ret.array.elmnts = NULL;
- return ret;
+ vsize = var_value(c, type->array.vsize);
+ mpz_init(q);
+ mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
+ type->array.size = mpz_get_si(q);
+ mpz_clear(q);
+
+ if (parse_time) {
+ type->array.static_size = 1;
+ type->size = type->array.size * type->array.member->size;
+ type->align = type->array.member->align;
+ }
}
- static struct value array_init(struct type *type)
+ static void array_init(struct type *type, struct value *val)
{
- struct value ret;
int i;
+ void *ptr = val->ptr;
- ret.type = type;
- if (type->array.vsize) {
- mpz_t q;
- mpz_init(q);
- mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
- mpq_denref(type->array.vsize->val.num));
- type->array.size = mpz_get_si(q);
- mpz_clear(q);
+ if (!val)
+ return;
+ if (!type->array.static_size) {
+ val->array = calloc(type->array.size,
+ type->array.member->size);
+ ptr = val->array;
+ }
+ for (i = 0; i < type->array.size; i++) {
+ struct value *v;
+ v = (void*)ptr + i * type->array.member->size;
+ val_init(type->array.member, v);
}
- ret.array.elmnts = calloc(type->array.size,
- sizeof(ret.array.elmnts[0]));
- for (i = 0; ret.array.elmnts && i < type->array.size; i++)
- ret.array.elmnts[i] = val_init(type->array.member);
- return ret;
}
- static void array_free(struct value val)
+ static void array_free(struct type *type, struct value *val)
{
int i;
-
- if (val.array.elmnts)
- for (i = 0; i < val.type->array.size; i++)
- free_value(val.array.elmnts[i]);
- free(val.array.elmnts);
+ void *ptr = val->ptr;
+
+ if (!type->array.static_size)
+ ptr = val->array;
+ for (i = 0; i < type->array.size; i++) {
+ struct value *v;
+ v = (void*)ptr + i * type->array.member->size;
+ free_value(type->array.member, v);
+ }
+ if (!type->array.static_size)
+ free(ptr);
}
static int array_compat(struct type *require, struct type *have)
/* Both are arrays, so we can look at details */
if (!type_compat(require->array.member, have->array.member, 0))
return 0;
+ if (have->array.unspec && require->array.unspec) {
+ if (have->array.vsize && require->array.vsize &&
+ have->array.vsize != require->array.vsize)
+ /* sizes might not be the same */
+ return 0;
+ return 1;
+ }
+ if (have->array.unspec || require->array.unspec)
+ return 1;
if (require->array.vsize == NULL && have->array.vsize == NULL)
return require->array.size == have->array.size;
fputs("[", f);
if (type->array.vsize) {
struct binding *b = type->array.vsize->name;
- fprintf(f, "%.*s]", b->name.len, b->name.txt);
+ fprintf(f, "%.*s%s]", b->name.len, b->name.txt,
+ type->array.unspec ? "::" : "");
} else
fprintf(f, "%d]", type->array.size);
type_print(type->array.member, f);
}
static struct type array_prototype = {
- .prepare = array_prepare,
.init = array_init,
+ .prepare_type = array_prepare_type,
.print_type = array_print_type,
.compat = array_compat,
.free = array_free,
+ .size = sizeof(void*),
+ .align = sizeof(void*),
};
+###### declare terminals
+ $TERM [ ]
+
###### type grammar
- | [ NUMBER ] Type ${
- $0 = calloc(1, sizeof(struct type));
- *($0) = array_prototype;
- $0->array.member = $<4;
- $0->array.vsize = NULL;
- {
+ | [ NUMBER ] Type ${ {
char tail[3];
mpq_t num;
+ struct text noname = { "", 0 };
+ struct type *t;
+
+ $0 = t = add_type(c, noname, &array_prototype);
+ t->array.member = $<4;
+ t->array.vsize = NULL;
if (number_parse(num, tail, $2.txt) == 0)
tok_err(c, "error: unrecognised number", &$2);
else if (tail[0])
tok_err(c, "error: unsupported number suffix", &$2);
else {
- $0->array.size = mpz_get_ui(mpq_numref(num));
+ t->array.size = mpz_get_ui(mpq_numref(num));
if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
tok_err(c, "error: array size must be an integer",
&$2);
&$2);
mpq_clear(num);
}
- $0->next= c->anon_typelist;
- c->anon_typelist = $0;
- }
- }$
+ t->array.static_size = 1;
+ t->size = t->array.size * t->array.member->size;
+ t->align = t->array.member->align;
+ } }$
| [ IDENTIFIER ] Type ${ {
struct variable *v = var_ref(c, $2.txt);
+ struct text noname = { "", 0 };
if (!v)
tok_err(c, "error: name undeclared", &$2);
else if (!v->constant)
tok_err(c, "error: array size must be a constant", &$2);
- $0 = calloc(1, sizeof(struct type));
- *($0) = array_prototype;
+ $0 = add_type(c, noname, &array_prototype);
$0->array.member = $<4;
$0->array.size = 0;
$0->array.vsize = v;
- $0->next= c->anon_typelist;
- c->anon_typelist = $0;
} }$
-###### parse context
-
- struct type *anon_typelist;
+###### Grammar
+ $*type
+ OptType -> Type ${ $0 = $<1; }$
+ | ${ $0 = NULL; }$
-###### free context types
+###### formal type grammar
- while (context.anon_typelist) {
- struct type *t = context.anon_typelist;
+ | [ IDENTIFIER :: OptType ] Type ${ {
+ struct variable *v = var_decl(c, $ID.txt);
+ struct text noname = { "", 0 };
- context.anon_typelist = t->next;
- free(t);
- }
+ v->type = $<OT;
+ v->constant = 1;
+ if (!v->type)
+ v->type = Tnum;
+ $0 = add_type(c, noname, &array_prototype);
+ $0->array.member = $<6;
+ $0->array.size = 0;
+ $0->array.unspec = 1;
+ $0->array.vsize = v;
+ } }$
###### Binode types
Index,
case Index: {
mpz_t q;
long i;
+ void *ptr;
- lleft = linterp_exec(b->left);
- right = interp_exec(b->right);
+ lleft = linterp_exec(c, b->left, <ype);
+ right = interp_exec(c, b->right, &rtype);
mpz_init(q);
mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
i = mpz_get_si(q);
mpz_clear(q);
- if (i >= 0 && i < lleft->type->array.size)
- lrv = &lleft->array.elmnts[i];
+ if (ltype->array.static_size)
+ ptr = lleft;
else
- rv = val_init(lleft->type->array.member);
+ ptr = *(void**)lleft;
+ rvtype = ltype->array.member;
+ if (i >= 0 && i < ltype->array.size)
+ lrv = ptr + i * rvtype->size;
+ else
+ val_init(ltype->array.member, &rv);
+ ltype = NULL;
break;
}
Structs are only treated as the same if they have the same name.
Simply having the same fields in the same order is not enough. This
-might change once we can create structure initializes from a list of
+might change once we can create structure initializers from a list of
values.
Each component datum is identified much like a variable is declared,
struct field {
struct text name;
struct type *type;
- struct value init;
+ struct value *init;
+ int offset;
} *fields;
} structure;
-###### value union fields
- struct {
- struct value *fields;
- } structure;
-
###### type functions
void (*print_type_decl)(struct type *type, FILE *f);
###### value functions
- static struct value structure_prepare(struct type *type)
- {
- struct value ret;
-
- ret.type = type;
- ret.structure.fields = NULL;
- return ret;
- }
-
- static struct value structure_init(struct type *type)
+ static void structure_init(struct type *type, struct value *val)
{
- struct value ret;
int i;
- ret.type = type;
- ret.structure.fields = calloc(type->structure.nfields,
- sizeof(ret.structure.fields[0]));
- for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
- ret.structure.fields[i] = val_init(type->structure.fields[i].type);
- return ret;
+ for (i = 0; i < type->structure.nfields; i++) {
+ struct value *v;
+ v = (void*) val->ptr + type->structure.fields[i].offset;
+ if (type->structure.fields[i].init)
+ dup_value(type->structure.fields[i].type,
+ type->structure.fields[i].init,
+ v);
+ else
+ val_init(type->structure.fields[i].type, v);
+ }
}
- static void structure_free(struct value val)
+ static void structure_free(struct type *type, struct value *val)
{
int i;
- if (val.structure.fields)
- for (i = 0; i < val.type->structure.nfields; i++)
- free_value(val.structure.fields[i]);
- free(val.structure.fields);
+ for (i = 0; i < type->structure.nfields; i++) {
+ struct value *v;
+ v = (void*)val->ptr + type->structure.fields[i].offset;
+ free_value(type->structure.fields[i].type, v);
+ }
}
static void structure_free_type(struct type *t)
{
int i;
for (i = 0; i < t->structure.nfields; i++)
- free_value(t->structure.fields[i].init);
+ if (t->structure.fields[i].init) {
+ free_value(t->structure.fields[i].type,
+ t->structure.fields[i].init);
+ }
free(t->structure.fields);
}
static struct type structure_prototype = {
- .prepare = structure_prepare,
.init = structure_init,
.free = structure_free,
.free_type = structure_free_type,
free(e);
break;
+###### declare terminals
+ $TERM struct .
+
###### variable grammar
| Variable . IDENTIFIER ${ {
if (!st)
type_err(c, "error: unknown type for field access", f->left,
NULL, 0, NULL);
- else if (st->prepare != structure_prepare)
+ else if (st->init != structure_init)
type_err(c, "error: field reference attempted on %1, not a struct",
f->left, st, 0, NULL);
else if (f->index == -2) {
case Xfieldref:
{
struct fieldref *f = cast(fieldref, e);
- struct value *lleft = linterp_exec(f->left);
- lrv = &lleft->structure.fields[f->index];
+ struct type *ltype;
+ struct value *lleft = linterp_exec(c, f->left, <ype);
+ lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
+ rvtype = ltype->structure.fields[f->index].type;
break;
}
if (!f)
return;
free_fieldlist(f->prev);
- free_value(f->f.init);
+ if (f->f.init) {
+ free_value(f->f.type, f->f.init);
+ free(f->f.init);
+ }
free(f);
}
###### top level grammar
- DeclareStruct -> struct IDENTIFIER FieldBlock ${ {
+ DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
struct type *t =
add_type(c, $2.txt, &structure_prototype);
int cnt = 0;
t->structure.fields = calloc(cnt, sizeof(struct field));
f = $3;
while (cnt > 0) {
+ int a = f->f.type->align;
cnt -= 1;
t->structure.fields[cnt] = f->f;
- f->f.init = val_prepare(Tnone);
+ if (t->size & (a-1))
+ t->size = (t->size | (a-1)) + 1;
+ t->structure.fields[cnt].offset = t->size;
+ t->size += ((f->f.type->size - 1) | (a-1)) + 1;
+ if (a > t->align)
+ t->align = a;
+ f->f.init = NULL;
f = f->prev;
}
} }$
- | DeclareStruct NEWLINE
- $void
- Newlines -> NEWLINE
- | Newlines NEWLINE
- Open -> {
- | Newlines {
- Close -> }
- | Newlines }
$*fieldlist
- FieldBlock -> Open FieldList } ${ $0 = $<2; }$
- | Open SimpleFieldList } ${ $0 = $<2; }$
- | : FieldList $$NEWLINE ${ $0 = $<2; }$
-
- FieldList -> FieldLines ${ $0 = $<1; }$
- | Newlines FieldLines ${ $0 = $<2; }$
- FieldLines -> SimpleFieldListLine ${ $0 = $<1; }$
- | FieldLines SimpleFieldListLine ${
- $2->prev = $<1;
- $0 = $<2;
+ FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
+ | { SimpleFieldList } ${ $0 = $<SFL; }$
+ | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
+ | SimpleFieldList EOL ${ $0 = $<SFL; }$
+
+ FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
+ | FieldLines SimpleFieldList Newlines ${
+ $SFL->prev = $<FL;
+ $0 = $<SFL;
}$
- SimpleFieldListLine -> SimpleFieldList NEWLINE ${ $0 = $<1; }$
- | SimpleFieldListLine NEWLINE ${ $0 = $<1; }$
- | ERROR NEWLINE ${ tok_err(c, "Syntax error in struct field", &$1); }$
-
- SimpleFieldList -> Field ${ $0 = $<1; }$
+ SimpleFieldList -> Field ${ $0 = $<F; }$
| SimpleFieldList ; Field ${
- $3->prev = $<1;
- $0 = $<3;
+ $F->prev = $<SFL;
+ $0 = $<F;
}$
| SimpleFieldList ; ${
- $0 = $<1;
+ $0 = $<SFL;
}$
+ | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
Field -> IDENTIFIER : Type = Expression ${ {
int ok;
$0 = calloc(1, sizeof(struct fieldlist));
$0->f.name = $1.txt;
$0->f.type = $<3;
- $0->f.init = val_prepare($0->f.type);
+ $0->f.init = NULL;
do {
ok = 1;
propagate_types($<5, c, &ok, $3, 0);
} while (ok == 2);
if (!ok)
c->parse_error = 1;
- else
- $0->f.init = interp_exec($5);
+ else {
+ struct value vl = interp_exec(c, $5, NULL);
+ $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
+ }
} }$
| IDENTIFIER : Type ${
$0 = calloc(1, sizeof(struct fieldlist));
$0->f.name = $1.txt;
$0->f.type = $<3;
- $0->f.init = val_init($3);
+ if ($0->f.type->prepare_type)
+ $0->f.type->prepare_type(c, $0->f.type, 1);
}$
###### forward decls
{
int i;
- fprintf(f, "struct %.*s:\n", t->name.len, t->name.txt);
+ fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
for (i = 0; i < t->structure.nfields; i++) {
struct field *fl = t->structure.fields + i;
fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
type_print(fl->type, f);
- if (fl->init.type->print) {
+ if (fl->type->print && fl->init) {
fprintf(f, " = ");
- if (fl->init.type == Tstr)
+ if (fl->type == Tstr)
fprintf(f, "\"");
- print_value(fl->init);
- if (fl->init.type == Tstr)
+ print_value(fl->type, fl->init);
+ if (fl->type == Tstr)
fprintf(f, "\"");
}
printf("\n");
}
}
+### Functions
+
+A function is a named chunk of code which can be passed parameters and
+can return results. Each function has an implicit type which includes
+the set of parameters and the return value. As yet these types cannot
+be declared separate from the function itself.
+
+In fact, only one function is currently possible - `main`. `main` is
+passed an array of strings together with the size of the array, and
+doesn't return anything. The strings are command line arguments.
+
+The parameters can be specified either in parentheses as a list, such as
+
+##### Example: function 1
+
+ func main(av:[ac::number]string)
+ code block
+
+or as an indented list of one parameter per line
+
+##### Example: function 2
+
+ func main
+ argv:[argc::number]string
+ do
+ code block
+
+###### Binode types
+ Func, List,
+
+###### Grammar
+
+ $TERM func main
+
+ $*binode
+ MainFunction -> func main ( OpenScope Args ) Block Newlines ${
+ $0 = new(binode);
+ $0->op = Func;
+ $0->left = reorder_bilist($<Ar);
+ $0->right = $<Bl;
+ var_block_close(c, CloseSequential);
+ if (c->scope_stack && !c->parse_error) abort();
+ }$
+ | func main IN OpenScope OptNL Args OUT OptNL do Block Newlines ${
+ $0 = new(binode);
+ $0->op = Func;
+ $0->left = reorder_bilist($<Ar);
+ $0->right = $<Bl;
+ var_block_close(c, CloseSequential);
+ if (c->scope_stack && !c->parse_error) abort();
+ }$
+ | func main NEWLINE OpenScope OptNL do Block Newlines ${
+ $0 = new(binode);
+ $0->op = Func;
+ $0->left = NULL;
+ $0->right = $<Bl;
+ var_block_close(c, CloseSequential);
+ if (c->scope_stack && !c->parse_error) abort();
+ }$
+
+ Args -> ${ $0 = NULL; }$
+ | Varlist ${ $0 = $<1; }$
+ | Varlist ; ${ $0 = $<1; }$
+ | Varlist NEWLINE ${ $0 = $<1; }$
+
+ Varlist -> Varlist ; ArgDecl ${
+ $0 = new(binode);
+ $0->op = List;
+ $0->left = $<Vl;
+ $0->right = $<AD;
+ }$
+ | ArgDecl ${
+ $0 = new(binode);
+ $0->op = List;
+ $0->left = NULL;
+ $0->right = $<AD;
+ }$
+
+ $*var
+ ArgDecl -> IDENTIFIER : FormalType ${ {
+ struct variable *v = var_decl(c, $1.txt);
+ $0 = new(var);
+ $0->var = v;
+ v->type = $<FT;
+ } }$
+
## Executables: the elements of code
Each code element needs to be parsed, printed, analysed,
###### ast
struct val {
struct exec;
+ struct type *vtype;
struct value val;
};
+###### ast functions
+ struct val *new_val(struct type *T, struct token tk)
+ {
+ struct val *v = new_pos(val, tk);
+ v->vtype = T;
+ return v;
+ }
+
###### Grammar
+ $TERM True False
+
$*val
Value -> True ${
- $0 = new_pos(val, $1);
- $0->val.type = Tbool;
+ $0 = new_val(Tbool, $1);
$0->val.bool = 1;
}$
| False ${
- $0 = new_pos(val, $1);
- $0->val.type = Tbool;
+ $0 = new_val(Tbool, $1);
$0->val.bool = 0;
}$
| NUMBER ${
- $0 = new_pos(val, $1);
- $0->val.type = Tnum;
+ $0 = new_val(Tnum, $1);
{
char tail[3];
if (number_parse($0->val.num, tail, $1.txt) == 0)
}
}$
| STRING ${
- $0 = new_pos(val, $1);
- $0->val.type = Tstr;
+ $0 = new_val(Tstr, $1);
{
char tail[3];
string_parse(&$1, '\\', &$0->val.str, tail);
}
}$
| MULTI_STRING ${
- $0 = new_pos(val, $1);
- $0->val.type = Tstr;
+ $0 = new_val(Tstr, $1);
{
char tail[3];
string_parse(&$1, '\\', &$0->val.str, tail);
case Xval:
{
struct val *v = cast(val, e);
- if (v->val.type == Tstr)
+ if (v->vtype == Tstr)
printf("\"");
- print_value(v->val);
- if (v->val.type == Tstr)
+ print_value(v->vtype, &v->val);
+ if (v->vtype == Tstr)
printf("\"");
break;
}
case Xval:
{
struct val *val = cast(val, prog);
- if (!type_compat(type, val->val.type, rules))
+ if (!type_compat(type, val->vtype, rules))
type_err(c, "error: expected %1%r found %2",
- prog, type, rules, val->val.type);
- return val->val.type;
+ prog, type, rules, val->vtype);
+ return val->vtype;
}
###### interp exec cases
case Xval:
- rv = dup_value(cast(val, e)->val);
+ rvtype = cast(val, e)->vtype;
+ dup_value(rvtype, &cast(val, e)->val, &rv);
break;
###### ast functions
static void free_val(struct val *v)
{
- if (!v)
- return;
- free_value(v->val);
+ if (v)
+ free_value(v->vtype, &v->val);
free(v);
}
case Xval: free_val(cast(val, e)); break;
###### ast functions
- // Move all nodes from 'b' to 'rv', reversing the order.
+ // Move all nodes from 'b' to 'rv', reversing their order.
// In 'b' 'left' is a list, and 'right' is the last node.
// In 'rv', left' is the first node and 'right' is a list.
static struct binode *reorder_bilist(struct binode *b)
Just as we used a `val` to wrap a value into an `exec`, we similarly
need a `var` to wrap a `variable` into an exec. While each `val`
-contained a copy of the value, each `var` hold a link to the variable
+contained a copy of the value, each `var` holds a link to the variable
because it really is the same variable no matter where it appears.
When a variable is used, we need to remember to follow the `->merged`
link to find the primary instance.
###### Grammar
+ $TERM : ::
+
$*var
VariableDecl -> IDENTIFIER : ${ {
struct variable *v = var_decl(c, $1.txt);
if (v) {
v->where_decl = $0;
v->where_set = $0;
- v->val = val_prepare($<3);
+ v->type = $<Type;
} else {
v = var_ref(c, $1.txt);
$0->var = v;
if (v) {
v->where_decl = $0;
v->where_set = $0;
- v->val = val_prepare($<3);
+ v->type = $<Type;
v->constant = 1;
} else {
v = var_ref(c, $1.txt);
/* This might be a label - allocate a var just in case */
v = var_decl(c, $1.txt);
if (v) {
- v->val = val_prepare(Tnone);
+ v->type = Tnone;
v->where_decl = $0;
v->where_set = $0;
}
} }$
## variable grammar
- $*type
- Type -> IDENTIFIER ${
- $0 = find_type(c, $1.txt);
- if (!$0) {
- tok_err(c,
- "error: undefined type", &$1);
-
- $0 = Tnone;
- }
- }$
- ## type grammar
-
###### print exec cases
case Xvar:
{
###### format cases
case 'v':
- if (loc->type == Xvar) {
+ if (loc && loc->type == Xvar) {
struct var *v = cast(var, loc);
if (v->var) {
struct binding *b = v->var->name;
prog, NULL, 0, NULL);
type_err(c, "info: name was defined as a constant here",
v->where_decl, NULL, 0, NULL);
- return v->val.type;
+ return v->type;
}
- if (v->val.type == Tnone && v->where_decl == prog)
+ if (v->type == Tnone && v->where_decl == prog)
type_err(c, "error: variable used but not declared: %v",
prog, NULL, 0, NULL);
- if (v->val.type == NULL) {
+ if (v->type == NULL) {
if (type && *ok != 0) {
- v->val = val_prepare(type);
+ v->type = type;
v->where_set = prog;
*ok = 2;
}
return type;
}
- if (!type_compat(type, v->val.type, rules)) {
+ if (!type_compat(type, v->type, rules)) {
type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
- type, rules, v->val.type);
+ type, rules, v->type);
type_err(c, "info: this is where '%v' was set to %1", v->where_set,
- v->val.type, rules, NULL);
+ v->type, rules, NULL);
}
if (!type)
- return v->val.type;
+ return v->type;
return type;
}
if (v->merged)
v = v->merged;
- lrv = &v->val;
+ lrv = var_value(c, v);
+ rvtype = v->type;
break;
}
Our first user of the `binode` will be conditional expressions, which
is a bit odd as they actually have three components. That will be
handled by having 2 binodes for each expression. The conditional
-expression is the lowest precedence operatior, so it gets to define
-what an "Expression" is. The next level up is "BoolExpr", which
-comes next.
+expression is the lowest precedence operator which is why we define it
+first - to start the precedence list.
Conditional expressions are of the form "value `if` condition `else`
other_value". They associate to the right, so everything to the right
-of `else` is part of an else value, while only the BoolExpr to the
-left of `if` is the if values. Between `if` and `else` there is no
-room for ambiguity, so a full conditional expression is allowed in there.
+of `else` is part of an else value, while only a higher-precedence to
+the left of `if` is the if values. Between `if` and `else` there is no
+room for ambiguity, so a full conditional expression is allowed in
+there.
###### Binode types
CondExpr,
case CondExpr: {
struct binode *b2 = cast(binode, b->right);
- left = interp_exec(b->left);
+ left = interp_exec(c, b->left, <ype);
if (left.bool)
- rv = interp_exec(b2->left);
+ rv = interp_exec(c, b2->left, &rvtype);
else
- rv = interp_exec(b2->right);
+ rv = interp_exec(c, b2->right, &rvtype);
}
break;
### Expressions: Boolean
The next class of expressions to use the `binode` will be Boolean
-expressions. As I haven't implemented precedence in the parser
-generator yet, we need different names for each precedence level used
-by expressions. The outer most or lowest level precedence after
-conditional expressions are Boolean operators which form an `BoolExpr`
-out of `BTerm`s and `BFact`s. As well as `or` `and`, and `not` we
-have `and then` and `or else` which only evaluate the second operand
-if the result would make a difference.
+expressions. "`and then`" and "`or else`" are similar to `and` and `or`
+have same corresponding precendence. The difference is that they don't
+evaluate the second expression if not necessary.
###### Binode types
And,
###### interp binode cases
case And:
- rv = interp_exec(b->left);
- right = interp_exec(b->right);
+ rv = interp_exec(c, b->left, &rvtype);
+ right = interp_exec(c, b->right, &rtype);
rv.bool = rv.bool && right.bool;
break;
case AndThen:
- rv = interp_exec(b->left);
+ rv = interp_exec(c, b->left, &rvtype);
if (rv.bool)
- rv = interp_exec(b->right);
+ rv = interp_exec(c, b->right, NULL);
break;
case Or:
- rv = interp_exec(b->left);
- right = interp_exec(b->right);
+ rv = interp_exec(c, b->left, &rvtype);
+ right = interp_exec(c, b->right, &rtype);
rv.bool = rv.bool || right.bool;
break;
case OrElse:
- rv = interp_exec(b->left);
+ rv = interp_exec(c, b->left, &rvtype);
if (!rv.bool)
- rv = interp_exec(b->right);
+ rv = interp_exec(c, b->right, NULL);
break;
case Not:
- rv = interp_exec(b->right);
+ rv = interp_exec(c, b->right, &rvtype);
rv.bool = !rv.bool;
break;
### Expressions: Comparison
-Of slightly higher precedence that Boolean expressions are
-Comparisons.
-A comparison takes arguments of any comparable type, but the two types must be
-the same.
+Of slightly higher precedence that Boolean expressions are Comparisons.
+A comparison takes arguments of any comparable type, but the two types
+must be the same.
To simplify the parsing we introduce an `eop` which can record an
-expression operator.
+expression operator, and the `CMPop` non-terminal will match one of them.
###### ast
struct eop {
case NEql:
{
int cmp;
- left = interp_exec(b->left);
- right = interp_exec(b->right);
- cmp = value_cmp(left, right);
- rv.type = Tbool;
+ left = interp_exec(c, b->left, <ype);
+ right = interp_exec(c, b->right, &rtype);
+ cmp = value_cmp(ltype, rtype, &left, &right);
+ rvtype = Tbool;
switch (b->op) {
case Less: rv.bool = cmp < 0; break;
case LessEq: rv.bool = cmp <= 0; break;
case GtrEq: rv.bool = cmp >= 0; break;
case Eql: rv.bool = cmp == 0; break;
case NEql: rv.bool = cmp != 0; break;
- default: rv.bool = 0; break; // NOTEST
+ default: rv.bool = 0; break; // NOTEST
}
break;
}
### Expressions: The rest
-The remaining expressions with the highest precedence are arithmetic
-and string concatenation. They are `Expr`, `Term`, and `Factor`.
-The `Factor` is where the `Value` and `Variable` that we already have
-are included.
+The remaining expressions with the highest precedence are arithmetic,
+string concatenation, and string conversion. String concatenation
+(`++`) has the same precedence as multiplication and division, but lower
+than the uniary.
+
+String conversion is a temporary feature until I get a better type
+system. `$` is a prefix operator which expects a string and returns
+a number.
`+` and `-` are both infix and prefix operations (where they are
absolute value and negation). These have different operator names.
We also have a 'Bracket' operator which records where parentheses were
-found. This makes it easy to reproduce these when printing. Once
-precedence is handled better I might be able to discard this.
+found. This makes it easy to reproduce these when printing. Possibly I
+should only insert brackets were needed for precedence.
###### Binode types
Plus, Minus,
Times, Divide, Rem,
Concat,
Absolute, Negate,
+ StringConv,
Bracket,
###### expr precedence
$LEFT + - Eop
$LEFT * / % ++ Top
- $LEFT Uop
+ $LEFT Uop $
+ $TERM ( )
###### expression grammar
| Expression Eop Expression ${ {
Uop -> + ${ $0.op = Absolute; }$
| - ${ $0.op = Negate; }$
+ | $ ${ $0.op = StringConv; }$
Top -> * ${ $0.op = Times; }$
| / ${ $0.op = Divide; }$
if (bracket) printf(")");
break;
case Absolute:
- if (bracket) printf("(");
- printf("+");
- print_exec(b->right, indent, bracket);
- if (bracket) printf(")");
- break;
case Negate:
+ case StringConv:
if (bracket) printf("(");
- printf("-");
+ switch (b->op) {
+ case Absolute: fputs("+", stdout); break;
+ case Negate: fputs("-", stdout); break;
+ case StringConv: fputs("$", stdout); break;
+ default: abort(); // NOTEST
+ } // NOTEST
print_exec(b->right, indent, bracket);
if (bracket) printf(")");
break;
Tstr, rules, type);
return Tstr;
+ case StringConv:
+ /* op must be string, result is number */
+ propagate_types(b->left, c, ok, Tstr, 0);
+ if (!type_compat(type, Tnum, 0))
+ type_err(c,
+ "error: Can only convert string to number, not %1",
+ prog, type, 0, NULL);
+ return Tnum;
+
case Bracket:
return propagate_types(b->right, c, ok, type, 0);
###### interp binode cases
case Plus:
- rv = interp_exec(b->left);
- right = interp_exec(b->right);
+ rv = interp_exec(c, b->left, &rvtype);
+ right = interp_exec(c, b->right, &rtype);
mpq_add(rv.num, rv.num, right.num);
break;
case Minus:
- rv = interp_exec(b->left);
- right = interp_exec(b->right);
+ rv = interp_exec(c, b->left, &rvtype);
+ right = interp_exec(c, b->right, &rtype);
mpq_sub(rv.num, rv.num, right.num);
break;
case Times:
- rv = interp_exec(b->left);
- right = interp_exec(b->right);
+ rv = interp_exec(c, b->left, &rvtype);
+ right = interp_exec(c, b->right, &rtype);
mpq_mul(rv.num, rv.num, right.num);
break;
case Divide:
- rv = interp_exec(b->left);
- right = interp_exec(b->right);
+ rv = interp_exec(c, b->left, &rvtype);
+ right = interp_exec(c, b->right, &rtype);
mpq_div(rv.num, rv.num, right.num);
break;
case Rem: {
mpz_t l, r, rem;
- left = interp_exec(b->left);
- right = interp_exec(b->right);
+ left = interp_exec(c, b->left, <ype);
+ right = interp_exec(c, b->right, &rtype);
mpz_init(l); mpz_init(r); mpz_init(rem);
mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
mpz_tdiv_r(rem, l, r);
- rv = val_init(Tnum);
+ val_init(Tnum, &rv);
mpq_set_z(rv.num, rem);
mpz_clear(r); mpz_clear(l); mpz_clear(rem);
+ rvtype = ltype;
break;
}
case Negate:
- rv = interp_exec(b->right);
+ rv = interp_exec(c, b->right, &rvtype);
mpq_neg(rv.num, rv.num);
break;
case Absolute:
- rv = interp_exec(b->right);
+ rv = interp_exec(c, b->right, &rvtype);
mpq_abs(rv.num, rv.num);
break;
case Bracket:
- rv = interp_exec(b->right);
+ rv = interp_exec(c, b->right, &rvtype);
break;
case Concat:
- left = interp_exec(b->left);
- right = interp_exec(b->right);
- rv.type = Tstr;
+ left = interp_exec(c, b->left, <ype);
+ right = interp_exec(c, b->right, &rtype);
+ rvtype = Tstr;
rv.str = text_join(left.str, right.str);
break;
+ case StringConv:
+ right = interp_exec(c, b->right, &rvtype);
+ rtype = Tstr;
+ rvtype = Tnum;
+
+ struct text tx = right.str;
+ char tail[3];
+ int neg = 0;
+ if (tx.txt[0] == '-') {
+ neg = 1;
+ tx.txt++;
+ tx.len--;
+ }
+ if (number_parse(rv.num, tail, tx) == 0)
+ mpq_init(rv.num);
+ else if (neg)
+ mpq_neg(rv.num, rv.num);
+ if (tail[0])
+ printf("Unsupported suffix: %.*s\n", tx.len, tx.txt);
+
+ break;
###### value functions
A simple statement list needs no extra syntax. A complex statement
list has two syntactic forms. It can be enclosed in braces (much like
-C blocks), or it can be introduced by a colon and continue until an
+C blocks), or it can be introduced by an indent and continue until an
unindented newline (much like Python blocks). With this extra syntax
it is referred to as a block.
###### Grammar
- $*binode
- Block -> Open Statementlist Close ${ $0 = $<2; }$
- | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
- | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
- | : Statementlist $$OUT ${ $0 = $<2; }$
+ $TERM { } ;
- Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
- | Newlines ComplexStatements ${ $0 = reorder_bilist($<2); }$
+ $*binode
+ Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
+ | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
+ | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
+ | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
+ | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
+
+ OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
+ | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
+ | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
+ | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
+ | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
+
+ UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
+ | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
+ | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
+
+ ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
+ | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
+ | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
+ | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
+ | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
+
+ Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
ComplexStatements -> ComplexStatements ComplexStatement ${
if ($2 == NULL) {
}$
$*exec
- ComplexStatement -> SimpleStatementLine ${
- $0 = reorder_bilist($<1);
+ ComplexStatement -> SimpleStatements Newlines ${
+ $0 = reorder_bilist($<SS);
+ }$
+ | SimpleStatements ; Newlines ${
+ $0 = reorder_bilist($<SS);
}$
- | ComplexStatement NEWLINE ${ $0 = $<1; }$
## ComplexStatement Grammar
$*binode
$0->right = $<1;
}$
- SimpleStatementLine -> SimpleStatements NEWLINE ${ $0 = $<1; }$
- | SimpleStatements ; NEWLINE ${ $0 = $<1; }$
- | SimpleStatementLine NEWLINE ${ $0 = $<1; }$
-
+ $TERM pass
SimpleStatement -> pass ${ $0 = NULL; }$
| ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
## SimpleStatement Grammar
###### interp binode cases
case Block:
- while (rv.type == Tnone &&
+ while (rvtype == Tnone &&
b) {
if (b->left)
- rv = interp_exec(b->left);
+ rv = interp_exec(c, b->left, &rvtype);
b = cast(binode, b->right);
}
break;
###### Binode types
Print,
+##### expr precedence
+ $TERM print ,
+
###### SimpleStatement Grammar
| print ExpressionList ${
if (b->left) {
if (sep)
putchar(sep);
- left = interp_exec(b->left);
- print_value(left);
- free_value(left);
+ left = interp_exec(c, b->left, <ype);
+ print_value(ltype, &left);
+ free_value(ltype, &left);
if (b->right)
sep = ' ';
} else if (sep)
eol = 0;
- left.type = Tnone;
+ ltype = Tnone;
if (eol)
printf("\n");
break;
###### Assignment statement
An assignment will assign a value to a variable, providing it hasn't
-be declared as a constant. The analysis phase ensures that the type
+been declared as a constant. The analysis phase ensures that the type
will be correct so the interpreter just needs to perform the
calculation. There is a form of assignment which declares a new
variable as well as assigning a value. If a name is assigned before
Assign,
Declare,
+###### declare terminals
+ $TERM =
+
###### SimpleStatement Grammar
| Variable = Expression ${
$0 = new(binode);
if (cast(var, b->left)->var->constant) {
if (v->where_decl == v->where_set) {
printf("::");
- type_print(v->val.type, stdout);
+ type_print(v->type, stdout);
printf(" ");
} else
printf(" ::");
} else {
if (v->where_decl == v->where_set) {
printf(":");
- type_print(v->val.type, stdout);
+ type_print(v->type, stdout);
printf(" ");
} else
printf(" :");
###### interp binode cases
case Assign:
- lleft = linterp_exec(b->left);
- right = interp_exec(b->right);
+ lleft = linterp_exec(c, b->left, <ype);
+ right = interp_exec(c, b->right, &rtype);
if (lleft) {
- free_value(*lleft);
- *lleft = right;
- } else
- free_value(right); // NOTEST
- right.type = NULL;
+ free_value(ltype, lleft);
+ dup_value(ltype, &right, lleft);
+ ltype = NULL;
+ }
break;
case Declare:
{
struct variable *v = cast(var, b->left)->var;
+ struct value *val;
if (v->merged)
v = v->merged;
- if (b->right)
- right = interp_exec(b->right);
- else
- right = val_init(v->val.type);
- free_value(v->val);
- v->val = right;
- right.type = NULL;
+ val = var_value(c, v);
+ free_value(v->type, val);
+ if (v->type->prepare_type)
+ v->type->prepare_type(c, v->type, 0);
+ if (b->right) {
+ right = interp_exec(c, b->right, &rtype);
+ memcpy(val, &right, rtype->size);
+ rtype = Tnone;
+ } else {
+ val_init(v->type, val);
+ }
break;
}
###### Binode types
Use,
+###### expr precedence
+ $TERM use
+
###### SimpleStatement Grammar
| use Expression ${
$0 = new_pos(binode, $1);
$0->right = $<2;
if ($0->right->type == Xvar) {
struct var *v = cast(var, $0->right);
- if (v->var->val.type == Tnone) {
+ if (v->var->type == Tnone) {
/* Convert this to a label */
- v->var->val = val_prepare(Tlabel);
- v->var->val.label = &v->var->val;
+ struct value *val;
+
+ v->var->type = Tlabel;
+ val = global_alloc(c, Tlabel, v->var, NULL);
+ val->label = val;
}
}
}$
###### interp binode cases
case Use:
- rv = interp_exec(b->right);
+ rv = interp_exec(c, b->right, &rvtype);
break;
### The Conditional Statement
case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
###### ComplexStatement Grammar
- | CondStatement $$NEWLINE ${ $0 = $<1; }$
+ | CondStatement ${ $0 = $<1; }$
+
+###### expr precedence
+ $TERM for then while do
+ $TERM else
+ $TERM switch case
###### Grammar
$*cond_statement
- // both ForThen and Whilepart open scopes, and CondSuffix only
+ // A CondStatement must end with EOL, as does CondSuffix and
+ // IfSuffix.
+ // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
+ // may or may not end with EOL
+ // WhilePart and IfPart include an appropriate Suffix
+
+ // Both ForPart and Whilepart open scopes, and CondSuffix only
// closes one - so in the first branch here we have another to close.
- CondStatement -> ForPart ThenPart WhilePart CondSuffix ${
- $0 = $<4;
- $0->forpart = $<1;
- $0->thenpart = $<2;
- $0->condpart = $3.condpart; $3.condpart = NULL;
- $0->dopart = $3.dopart; $3.dopart = NULL;
+ CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
+ $0 = $<CS;
+ $0->forpart = $<FP;
+ $0->thenpart = $<TP;
+ $0->condpart = $WP.condpart; $WP.condpart = NULL;
+ $0->dopart = $WP.dopart; $WP.dopart = NULL;
var_block_close(c, CloseSequential);
}$
- | ForPart WhilePart CondSuffix ${
- $0 = $<3;
- $0->forpart = $<1;
- $0->thenpart = NULL;
- $0->condpart = $2.condpart; $2.condpart = NULL;
- $0->dopart = $2.dopart; $2.dopart = NULL;
+ | ForPart OptNL WhilePart CondSuffix ${
+ $0 = $<CS;
+ $0->forpart = $<FP;
+ $0->condpart = $WP.condpart; $WP.condpart = NULL;
+ $0->dopart = $WP.dopart; $WP.dopart = NULL;
var_block_close(c, CloseSequential);
}$
| WhilePart CondSuffix ${
- $0 = $<2;
- $0->condpart = $1.condpart; $1.condpart = NULL;
- $0->dopart = $1.dopart; $1.dopart = NULL;
+ $0 = $<CS;
+ $0->condpart = $WP.condpart; $WP.condpart = NULL;
+ $0->dopart = $WP.dopart; $WP.dopart = NULL;
+ }$
+ | SwitchPart OptNL CasePart CondSuffix ${
+ $0 = $<CS;
+ $0->condpart = $<SP;
+ $CP->next = $0->casepart;
+ $0->casepart = $<CP;
}$
- | SwitchPart CasePart CondSuffix ${
- $0 = $<3;
- $0->condpart = $<1;
- $2->next = $0->casepart;
- $0->casepart = $<2;
+ | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
+ $0 = $<CS;
+ $0->condpart = $<SP;
+ $CP->next = $0->casepart;
+ $0->casepart = $<CP;
}$
| IfPart IfSuffix ${
- $0 = $<2;
- $0->condpart = $1.condpart; $1.condpart = NULL;
- $0->thenpart = $1.thenpart; $1.thenpart = NULL;
+ $0 = $<IS;
+ $0->condpart = $IP.condpart; $IP.condpart = NULL;
+ $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
// This is where we close an "if" statement
var_block_close(c, CloseSequential);
}$
// "for" or "while" statement
var_block_close(c, CloseSequential);
}$
+ | Newlines CasePart CondSuffix ${
+ $0 = $<CS;
+ $CP->next = $0->casepart;
+ $0->casepart = $<CP;
+ }$
| CasePart CondSuffix ${
- $0 = $<2;
- $1->next = $0->casepart;
- $0->casepart = $<1;
+ $0 = $<CS;
+ $CP->next = $0->casepart;
+ $0->casepart = $<CP;
}$
- $*casepart
- CasePart -> case Expression OpenScope Block ${
- $0 = calloc(1,sizeof(struct casepart));
- $0->value = $<2;
- $0->action = $<4;
- var_block_close(c, CloseParallel);
- }$
- | CasePart NEWLINE ${ $0 = $<1; }$
+ IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
+ | Newlines ElsePart ${ $0 = $<EP; }$
+ | ElsePart ${$0 = $<EP; }$
- $*cond_statement
- IfSuffix -> ${ $0 = new(cond_statement); }$
- | IfSuffix NEWLINE ${ $0 = $<1; }$
- | else OpenScope Block ${
+ ElsePart -> else OpenBlock Newlines ${
$0 = new(cond_statement);
- $0->elsepart = $<3;
+ $0->elsepart = $<OB;
var_block_close(c, CloseElse);
}$
| else OpenScope CondStatement ${
$0 = new(cond_statement);
- $0->elsepart = $<3;
+ $0->elsepart = $<CS;
var_block_close(c, CloseElse);
}$
+ $*casepart
+ CasePart -> case Expression OpenScope ColonBlock ${
+ $0 = calloc(1,sizeof(struct casepart));
+ $0->value = $<Ex;
+ $0->action = $<Bl;
+ var_block_close(c, CloseParallel);
+ }$
+
$*exec
// These scopes are closed in CondSuffix
- ForPart -> for OpenScope SimpleStatements ; ${
- $0 = reorder_bilist($<3);
+ ForPart -> for OpenBlock ${
+ $0 = $<Bl;
}$
- | for OpenScope SimpleStatements NEWLINE ${
- $0 = reorder_bilist($<3);
- }$
- | for OpenScope Block ${
- $0 = $<3;
- }$
- | ForPart NEWLINE ${ $0 = $<1; }$
- ThenPart -> then OpenScope SimpleStatements ; ${
- $0 = reorder_bilist($<3);
+ ThenPart -> then OpenBlock ${
+ $0 = $<OB;
var_block_close(c, CloseSequential);
}$
- | then OpenScope SimpleStatements NEWLINE ${
- $0 = reorder_bilist($<3);
- var_block_close(c, CloseSequential);
- }$
- | then OpenScope Block ${
- $0 = $<3;
- var_block_close(c, CloseSequential);
- }$
- | ThenPart NEWLINE ${ $0 = $<1; }$
-
- // This scope is closed in CondSuffix
- WhileHead -> while OpenScope Block ${ $0 = $<3; }$
- | WhileHead NEWLINE ${ $0 = $<1; }$
$cond_statement
// This scope is closed in CondSuffix
- WhilePart -> while OpenScope Expression Block ${
- $0.type = Xcond_statement;
- $0.condpart = $<3;
- $0.dopart = $<4;
+ WhilePart -> while UseBlock OptNL do Block ${
+ $0.condpart = $<UB;
+ $0.dopart = $<Bl;
}$
- | WhileHead do Block ${
- $0.type = Xcond_statement;
- $0.condpart = $<1;
- $0.dopart = $<3;
+ | while OpenScope Expression ColonBlock ${
+ $0.condpart = $<Exp;
+ $0.dopart = $<Bl;
}$
- | WhilePart NEWLINE ${ $0 = $<1; }$
- IfPart -> if OpenScope Expression OpenScope Block ${
- $0.type = Xcond_statement;
- $0.condpart = $<3;
- $0.thenpart = $<5;
- var_block_close(c, CloseParallel);
+ IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
+ $0.condpart = $<UB;
+ $0.thenpart = $<Bl;
}$
- | if OpenScope Block then OpenScope Block ${
- $0.type = Xcond_statement;
- $0.condpart = $<3;
- $0.thenpart = $<6;
- var_block_close(c, CloseParallel);
+ | if OpenScope Expression OpenScope ColonBlock ClosePara ${
+ $0.condpart = $<Ex;
+ $0.thenpart = $<Bl;
+ }$
+ | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
+ $0.condpart = $<Ex;
+ $0.thenpart = $<Bl;
}$
- | IfPart NEWLINE ${ $0 = $<1; }$
$*exec
// This scope is closed in CondSuffix
SwitchPart -> switch OpenScope Expression ${
- $0 = $<3;
+ $0 = $<Ex;
}$
- | switch OpenScope Block ${
- $0 = $<3;
+ | switch UseBlock ${
+ $0 = $<Bl;
}$
- | SwitchPart NEWLINE ${ $0 = $<1; }$
###### print exec cases
struct casepart *cp;
if (cs->forpart) {
do_indent(indent, "for");
- if (bracket) printf(" {\n"); else printf(":\n");
+ if (bracket) printf(" {\n"); else printf("\n");
print_exec(cs->forpart, indent+1, bracket);
if (cs->thenpart) {
if (bracket)
do_indent(indent, "} then {\n");
else
- do_indent(indent, "then:\n");
+ do_indent(indent, "then\n");
print_exec(cs->thenpart, indent+1, bracket);
}
if (bracket) do_indent(indent, "}\n");
if (bracket)
do_indent(indent, "while {\n");
else
- do_indent(indent, "while:\n");
+ do_indent(indent, "while\n");
print_exec(cs->condpart, indent+1, bracket);
if (bracket)
do_indent(indent, "} do {\n");
else
- do_indent(indent, "do:\n");
+ do_indent(indent, "do\n");
print_exec(cs->dopart, indent+1, bracket);
if (bracket)
do_indent(indent, "}\n");
if (bracket)
printf(" {\n");
else
- printf(":\n");
+ printf("\n");
print_exec(cs->elsepart, indent+1, bracket);
if (bracket)
do_indent(indent, "}\n");
case Xcond_statement:
{
struct value v, cnd;
+ struct type *vtype, *cndtype;
struct casepart *cp;
- struct cond_statement *c = cast(cond_statement, e);
+ struct cond_statement *cs = cast(cond_statement, e);
- if (c->forpart)
- interp_exec(c->forpart);
+ if (cs->forpart)
+ interp_exec(c, cs->forpart, NULL);
do {
- if (c->condpart)
- cnd = interp_exec(c->condpart);
+ if (cs->condpart)
+ cnd = interp_exec(c, cs->condpart, &cndtype);
else
- cnd.type = Tnone;
- if (!(cnd.type == Tnone ||
- (cnd.type == Tbool && cnd.bool != 0)))
+ cndtype = Tnone;
+ if (!(cndtype == Tnone ||
+ (cndtype == Tbool && cnd.bool != 0)))
break;
// cnd is Tnone or Tbool, doesn't need to be freed
- if (c->dopart)
- interp_exec(c->dopart);
+ if (cs->dopart)
+ interp_exec(c, cs->dopart, NULL);
- if (c->thenpart) {
- rv = interp_exec(c->thenpart);
- if (rv.type != Tnone || !c->dopart)
+ if (cs->thenpart) {
+ rv = interp_exec(c, cs->thenpart, &rvtype);
+ if (rvtype != Tnone || !cs->dopart)
goto Xcond_done;
- free_value(rv);
+ free_value(rvtype, &rv);
+ rvtype = Tnone;
}
- } while (c->dopart);
-
- for (cp = c->casepart; cp; cp = cp->next) {
- v = interp_exec(cp->value);
- if (value_cmp(v, cnd) == 0) {
- free_value(v);
- free_value(cnd);
- rv = interp_exec(cp->action);
+ } while (cs->dopart);
+
+ for (cp = cs->casepart; cp; cp = cp->next) {
+ v = interp_exec(c, cp->value, &vtype);
+ if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
+ free_value(vtype, &v);
+ free_value(cndtype, &cnd);
+ rv = interp_exec(c, cp->action, &rvtype);
goto Xcond_done;
}
- free_value(v);
+ free_value(vtype, &v);
}
- free_value(cnd);
- if (c->elsepart)
- rv = interp_exec(c->elsepart);
+ free_value(cndtype, &cnd);
+ if (cs->elsepart)
+ rv = interp_exec(c, cs->elsepart, &rvtype);
else
- rv.type = Tnone;
+ rvtype = Tnone;
Xcond_done:
break;
}
Many of the things that can be declared haven't been described yet,
such as functions, procedures, imports, and probably more.
For now there are two sorts of things that can appear at the top
-level. They are predefined constants, `struct` types, and the main
-program. While the syntax will allow the main program to appear
+level. They are predefined constants, `struct` types, and the `main`
+function. While the syntax will allow the `main` function to appear
multiple times, that will trigger an error if it is actually attempted.
The various declarations do not return anything. They store the
###### Parser: grammar
$void
- Ocean -> DeclarationList
- | Newlines DeclarationList
+ Ocean -> OptNL DeclarationList
+
+ ## declare terminals
+
+ OptNL ->
+ | OptNL NEWLINE
+ Newlines -> NEWLINE
+ | Newlines NEWLINE
DeclarationList -> Declaration
| DeclarationList Declaration
- Declaration -> Declaration NEWLINE
- | DeclareConstant
- | DeclareProgram
- | DeclareStruct
- | ERROR NEWLINE ${
+ Declaration -> ERROR Newlines ${
tok_err(c,
"error: unhandled parse error", &$1);
}$
+ | DeclareConstant
+ | DeclareFunction
+ | DeclareStruct
## top level grammar
+ ## Grammar
+
### The `const` section
As well as being defined in with the code that uses them, constants
###### top level grammar
- DeclareConstant -> const Open ConstList Close
- | const Open SimpleConstList }
- | const : ConstList $$NEWLINE
- | const SimpleConstList NEWLINE
+ $TERM const
- ConstList -> ConstLines
- | Newlines ConstLines
- ConstLines -> ConstLines SimpleConstLine
+ DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
+ | const { SimpleConstList } Newlines
+ | const IN OptNL ConstList OUT Newlines
+ | const SimpleConstList Newlines
+
+ ConstList -> ConstList SimpleConstLine
| SimpleConstLine
SimpleConstList -> SimpleConstList ; Const
| Const
| SimpleConstList ;
- SimpleConstLine -> SimpleConstList NEWLINE
- | SimpleConstLine NEWLINE
- | ERROR NEWLINE ${ tok_err(c, "Syntax error in constant", &$1); }$
+ SimpleConstLine -> SimpleConstList Newlines
+ | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
$*type
CType -> Type ${ $0 = $<1; }$
if (!ok)
c->parse_error = 1;
else if (v) {
- v->val = interp_exec($5);
+ struct value res = interp_exec(c, $5, &v->type);
+ global_alloc(c, v->type, v, &res);
}
} }$
if (target == -1) {
if (i)
- printf("const:\n");
+ printf("const\n");
target = i;
} else {
+ struct value *val = var_value(&context, v);
printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
- type_print(v->val.type, stdout);
+ type_print(v->type, stdout);
printf(" = ");
- if (v->val.type == Tstr)
+ if (v->type == Tstr)
printf("\"");
- print_value(v->val);
- if (v->val.type == Tstr)
+ print_value(v->type, val);
+ if (v->type == Tstr)
printf("\"");
printf("\n");
target -= 1;
}
}
-### Finally the whole program.
+### Finally the whole `main` function.
-Somewhat reminiscent of Pascal a (current) Ocean program starts with
-the keyword "program" and a list of variable names which are assigned
-values from command line arguments. Following this is a `block` which
-is the code to execute. Unlike Pascal, constants and other
-declarations come *before* the program.
+An Ocean program can currently have only one function - `main` - and
+that must exist. It expects an array of strings with a provided size.
+Following this is a `block` which is the code to execute.
As this is the top level, several things are handled a bit
differently.
-The whole program is not interpreted by `interp_exec` as that isn't
+The function is not interpreted by `interp_exec` as that isn't
passed the argument list which the program requires. Similarly type
analysis is a bit more interesting at this level.
-###### Binode types
- Program,
-
###### top level grammar
- DeclareProgram -> Program ${ {
+ DeclareFunction -> MainFunction ${ {
if (c->prog)
- type_err(c, "Program defined a second time",
+ type_err(c, "\"main\" defined a second time",
$1, NULL, 0, NULL);
else
c->prog = $<1;
} }$
- $*binode
- Program -> program OpenScope Varlist Block ${
- $0 = new(binode);
- $0->op = Program;
- $0->left = reorder_bilist($<3);
- $0->right = $<4;
- var_block_close(c, CloseSequential);
- if (c->scope_stack && !c->parse_error) abort();
- }$
-
- Varlist -> Varlist ArgDecl ${
- $0 = new(binode);
- $0->op = Program;
- $0->left = $<1;
- $0->right = $<2;
- }$
- | ${ $0 = NULL; }$
-
- $*var
- ArgDecl -> IDENTIFIER ${ {
- struct variable *v = var_decl(c, $1.txt);
- $0 = new(var);
- $0->var = v;
- } }$
-
- ## Grammar
-
###### print binode cases
- case Program:
- do_indent(indent, "program");
+ case Func:
+ case List:
+ do_indent(indent, "func main(");
for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
+ struct variable *v = cast(var, b2->left)->var;
printf(" ");
print_exec(b2->left, 0, 0);
+ printf(":");
+ type_print(v->type, stdout);
}
if (bracket)
- printf(" {\n");
+ printf(") {\n");
else
- printf(":\n");
+ printf(")\n");
print_exec(b->right, indent+1, bracket);
if (bracket)
do_indent(indent, "}\n");
break;
###### propagate binode cases
- case Program: abort(); // NOTEST
+ case List:
+ case Func: abort(); // NOTEST
###### core functions
static int analyse_prog(struct exec *prog, struct parse_context *c)
{
- struct binode *b = cast(binode, prog);
+ struct binode *bp = cast(binode, prog);
+ struct binode *b;
int ok = 1;
+ int arg = 0;
+ struct type *argv_type;
+ struct text argv_type_name = { " argv", 5 };
- if (!b)
+ if (!bp)
return 0; // NOTEST
- do {
- ok = 1;
- propagate_types(b->right, c, &ok, Tnone, 0);
- } while (ok == 2);
- if (!ok)
- return 0;
- for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
- struct var *v = cast(var, b->left);
- if (!v->var->val.type) {
- v->var->where_set = b;
- v->var->val = val_prepare(Tstr);
+ argv_type = add_type(c, argv_type_name, &array_prototype);
+ argv_type->array.member = Tstr;
+ argv_type->array.unspec = 1;
+
+ for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
+ ok = 1;
+ switch (arg++) {
+ case 0: /* argv */
+ propagate_types(b->left, c, &ok, argv_type, 0);
+ break;
+ default: /* invalid */ // NOTEST
+ propagate_types(b->left, c, &ok, Tnone, 0); // NOTEST
}
}
- b = cast(binode, prog);
+
do {
ok = 1;
- propagate_types(b->right, c, &ok, Tnone, 0);
+ propagate_types(bp->right, c, &ok, Tnone, 0);
} while (ok == 2);
if (!ok)
return 0;
/* Make sure everything is still consistent */
- propagate_types(b->right, c, &ok, Tnone, 0);
- return !!ok;
+ propagate_types(bp->right, c, &ok, Tnone, 0);
+ if (!ok)
+ return 0;
+ scope_finalize(c);
+ return 1;
}
- static void interp_prog(struct exec *prog, char **argv)
+ static void interp_prog(struct parse_context *c, struct exec *prog,
+ int argc, char **argv)
{
struct binode *p = cast(binode, prog);
struct binode *al;
+ int anum = 0;
struct value v;
+ struct type *vtype;
if (!prog)
return; // NOTEST
al = cast(binode, p->left);
while (al) {
struct var *v = cast(var, al->left);
- struct value *vl = &v->var->val;
-
- if (argv[0] == NULL) {
- printf("Not enough args\n");
- exit(1);
+ struct value *vl = var_value(c, v->var);
+ struct value arg;
+ struct type *t;
+ mpq_t argcq;
+ int i;
+
+ switch (anum++) {
+ case 0: /* argv */
+ t = v->var->type;
+ mpq_init(argcq);
+ mpq_set_ui(argcq, argc, 1);
+ memcpy(var_value(c, t->array.vsize), &argcq, sizeof(argcq));
+ t->prepare_type(c, t, 0);
+ array_init(v->var->type, vl);
+ for (i = 0; i < argc; i++) {
+ struct value *vl2 = vl->array + i * v->var->type->array.member->size;
+
+
+ arg.str.txt = argv[i];
+ arg.str.len = strlen(argv[i]);
+ free_value(Tstr, vl2);
+ dup_value(Tstr, &arg, vl2);
+ }
+ break;
}
al = cast(binode, al->right);
- free_value(*vl);
- *vl = parse_value(vl->type, argv[0]);
- if (vl->type == NULL)
- exit(1);
- argv++;
}
- v = interp_exec(p->right);
- free_value(v);
+ v = interp_exec(c, p->right, &vtype);
+ free_value(vtype, &v);
}
###### interp binode cases
- case Program: abort(); // NOTEST
+ case List:
+ case Func: abort(); // NOTEST
## And now to test it out.
###### demo: hello
- const:
+ const
pi ::= 3.141_592_6
four ::= 2 + 2 ; five ::= 10/2
const pie ::= "I like Pie";
cake ::= "The cake is"
++ " a lie"
- struct fred:
+ struct fred
size:[four]number
name:string
alive:Boolean
- program A B:
+ func main
+ argv:[argc::]string
+ do
print "Hello World, what lovely oceans you have!"
print "Are there", five, "?"
print pi, pie, "but", cake
+ A := $argv[1]; B := $argv[2]
+
/* When a variable is defined in both branches of an 'if',
* and used afterwards, the variables are merged.
*/
if A > B:
bigger := "yes"
- else:
+ else
bigger := "no"
print "Is", A, "bigger than", B,"? ", bigger
/* If a variable is not used after the 'if', no
if A > B * 2:
double:string = "yes"
print A, "is more than twice", B, "?", double
- else:
+ else
double := B*2
print "double", B, "is", double
while a != b:
if a < b:
b = b - a
- else:
+ else
a = a - b
print "GCD of", A, "and", B,"is", a
else if a <= 0:
print a, "is not positive, cannot calculate GCD"
- else:
+ else
print b, "is not positive, cannot calculate GCD"
- for:
+ for
togo := 10
f1 := 1; f2 := 1
print "Fibonacci:", f1,f2,
print ""
/* Binary search... */
- for:
+ for
lo:= 0; hi := 100
target := 77
- while:
+ while
mid := (lo + hi) / 2
if mid == target:
use Found
if mid < target:
lo = mid
- else:
+ else
hi = mid
if hi - lo < 1:
use GiveUp
use True
- do: pass
+ do pass
case Found:
print "Yay, I found", target
case GiveUp:
print "", list[i],
print
- if 1 == 2: print "yes" else: print "no"
+ if 1 == 2 then print "yes"; else print "no"
bob:fred
bob.name = "Hello"