1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs. This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
126 ###### Parser: reduce
127 struct parse_context *c = config2context(config);
135 #include <sys/mman.h>
154 static char Usage[] =
155 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
170 struct section *s, *ss;
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_mark),
175 .number_chars = ".,_+- ",
180 int doprint=0, dotrace=0, doexec=1, brackets=0;
182 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
185 case 't': dotrace=1; break;
186 case 'p': doprint=1; break;
187 case 'n': doexec=0; break;
188 case 'b': brackets=1; break;
189 case 's': section = optarg; break;
190 default: fprintf(stderr, Usage);
194 if (optind >= argc) {
195 fprintf(stderr, "oceani: no input file given\n");
198 fd = open(argv[optind], O_RDONLY);
200 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
203 context.file_name = argv[optind];
204 len = lseek(fd, 0, 2);
205 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
206 s = code_extract(file, file+len, NULL);
208 fprintf(stderr, "oceani: could not find any code in %s\n",
213 ## context initialization
216 for (ss = s; ss; ss = ss->next) {
217 struct text sec = ss->section;
218 if (sec.len == strlen(section) &&
219 strncmp(sec.txt, section, sec.len) == 0)
223 fprintf(stderr, "oceani: cannot find section %s\n",
229 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
232 fprintf(stderr, "oceani: no program found.\n");
233 context.parse_error = 1;
235 if (context.prog && doprint) {
238 print_exec(context.prog, 0, brackets);
240 if (context.prog && doexec && !context.parse_error) {
241 if (!analyse_prog(context.prog, &context)) {
242 fprintf(stderr, "oceani: type error in program - not running.\n");
245 interp_prog(context.prog, argv+optind+1);
247 free_exec(context.prog);
250 struct section *t = s->next;
256 ## free context types
257 exit(context.parse_error ? 1 : 0);
262 The four requirements of parse, analyse, print, interpret apply to
263 each language element individually so that is how most of the code
266 Three of the four are fairly self explanatory. The one that requires
267 a little explanation is the analysis step.
269 The current language design does not require the types of variables to
270 be declared, but they must still have a single type. Different
271 operations impose different requirements on the variables, for example
272 addition requires both arguments to be numeric, and assignment
273 requires the variable on the left to have the same type as the
274 expression on the right.
276 Analysis involves propagating these type requirements around and
277 consequently setting the type of each variable. If any requirements
278 are violated (e.g. a string is compared with a number) or if a
279 variable needs to have two different types, then an error is raised
280 and the program will not run.
282 If the same variable is declared in both branchs of an 'if/else', or
283 in all cases of a 'switch' then the multiple instances may be merged
284 into just one variable if the variable is referenced after the
285 conditional statement. When this happens, the types must naturally be
286 consistent across all the branches. When the variable is not used
287 outside the if, the variables in the different branches are distinct
288 and can be of different types.
290 Determining the types of all variables early is important for
291 processing command line arguments. These can be assigned to any of
292 several types of variable, but we must first know the correct type so
293 any required conversion can happen. If a variable is associated with
294 a command line argument but no type can be interpreted (e.g. the
295 variable is only ever used in a `print` statement), then the type is
298 Undeclared names may only appear in "use" statements and "case" expressions.
299 These names are given a type of "label" and a unique value.
300 This allows them to fill the role of a name in an enumerated type, which
301 is useful for testing the `switch` statement.
303 As we will see, the condition part of a `while` statement can return
304 either a Boolean or some other type. This requires that the expected
305 type that gets passed around comprises a type and a flag to indicate
306 that `Tbool` is also permitted.
308 As there are, as yet, no distinct types that are compatible, there
309 isn't much subtlety in the analysis. When we have distinct number
310 types, this will become more interesting.
314 When analysis discovers an inconsistency it needs to report an error;
315 just refusing to run the code ensures that the error doesn't cascade,
316 but by itself it isn't very useful. A clear understanding of the sort
317 of error message that are useful will help guide the process of
320 At a simplistic level, the only sort of error that type analysis can
321 report is that the type of some construct doesn't match a contextual
322 requirement. For example, in `4 + "hello"` the addition provides a
323 contextual requirement for numbers, but `"hello"` is not a number. In
324 this particular example no further information is needed as the types
325 are obvious from local information. When a variable is involved that
326 isn't the case. It may be helpful to explain why the variable has a
327 particular type, by indicating the location where the type was set,
328 whether by declaration or usage.
330 Using a recursive-descent analysis we can easily detect a problem at
331 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
332 will detect that one argument is not a number and the usage of `hello`
333 will detect that a number was wanted, but not provided. In this
334 (early) version of the language, we will generate error reports at
335 multiple locations, so the use of `hello` will report an error and
336 explain were the value was set, and the addition will report an error
337 and say why numbers are needed. To be able to report locations for
338 errors, each language element will need to record a file location
339 (line and column) and each variable will need to record the language
340 element where its type was set. For now we will assume that each line
341 of an error message indicates one location in the file, and up to 2
342 types. So we provide a `printf`-like function which takes a format, a
343 location (a `struct exec` which has not yet been introduced), and 2
344 types. "`%1`" reports the first type, "`%2`" reports the second. We
345 will need a function to print the location, once we know how that is
346 stored. e As will be explained later, there are sometimes extra rules for
347 type matching and they might affect error messages, we need to pass those
350 As well as type errors, we sometimes need to report problems with
351 tokens, which might be unexpected or might name a type that has not
352 been defined. For these we have `tok_err()` which reports an error
353 with a given token. Each of the error functions sets the flag in the
354 context so indicate that parsing failed.
358 static void fput_loc(struct exec *loc, FILE *f);
360 ###### core functions
362 static void type_err(struct parse_context *c,
363 char *fmt, struct exec *loc,
364 struct type *t1, int rules, struct type *t2)
366 fprintf(stderr, "%s:", c->file_name);
367 fput_loc(loc, stderr);
368 for (; *fmt ; fmt++) {
375 case '%': fputc(*fmt, stderr); break; // NOTEST
376 default: fputc('?', stderr); break; // NOTEST
378 type_print(t1, stderr);
381 type_print(t2, stderr);
390 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
392 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
393 t->txt.len, t->txt.txt);
397 ## Entities: declared and predeclared.
399 There are various "things" that the language and/or the interpreter
400 needs to know about to parse and execute a program. These include
401 types, variables, values, and executable code. These are all lumped
402 together under the term "entities" (calling them "objects" would be
403 confusing) and introduced here. The following section will present the
404 different specific code elements which comprise or manipulate these
409 Values come in a wide range of types, with more likely to be added.
410 Each type needs to be able to parse and print its own values (for
411 convenience at least) as well as to compare two values, at least for
412 equality and possibly for order. For now, values might need to be
413 duplicated and freed, though eventually such manipulations will be
414 better integrated into the language.
416 Rather than requiring every numeric type to support all numeric
417 operations (add, multiple, etc), we allow types to be able to present
418 as one of a few standard types: integer, float, and fraction. The
419 existence of these conversion functions eventually enable types to
420 determine if they are compatible with other types, though such types
421 have not yet been implemented.
423 Named type are stored in a simple linked list. Objects of each type are
424 "values" which are often passed around by value.
431 ## value union fields
439 void (*init)(struct type *type, struct value *val);
440 int (*parse)(struct type *type, char *str, struct value *val);
441 void (*print)(struct type *type, struct value *val);
442 void (*print_type)(struct type *type, FILE *f);
443 int (*cmp_order)(struct type *t1, struct type *t2,
444 struct value *v1, struct value *v2);
445 int (*cmp_eq)(struct type *t1, struct type *t2,
446 struct value *v1, struct value *v2);
447 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
448 void (*free)(struct type *type, struct value *val);
449 void (*free_type)(struct type *t);
450 long long (*to_int)(struct value *v);
451 double (*to_float)(struct value *v);
452 int (*to_mpq)(mpq_t *q, struct value *v);
461 struct type *typelist;
465 static struct type *find_type(struct parse_context *c, struct text s)
467 struct type *l = c->typelist;
470 text_cmp(l->name, s) != 0)
475 static struct type *add_type(struct parse_context *c, struct text s,
480 n = calloc(1, sizeof(*n));
483 n->next = c->typelist;
488 static void free_type(struct type *t)
490 /* The type is always a reference to something in the
491 * context, so we don't need to free anything.
495 static void free_value(struct type *type, struct value *v)
501 static void type_print(struct type *type, FILE *f)
504 fputs("*unknown*type*", f);
505 else if (type->name.len)
506 fprintf(f, "%.*s", type->name.len, type->name.txt);
507 else if (type->print_type)
508 type->print_type(type, f);
510 fputs("*invalid*type*", f); // NOTEST
513 static void val_init(struct type *type, struct value *val)
515 if (type && type->init)
516 type->init(type, val);
519 static void dup_value(struct type *type,
520 struct value *vold, struct value *vnew)
522 if (type && type->dup)
523 type->dup(type, vold, vnew);
526 static int value_cmp(struct type *tl, struct type *tr,
527 struct value *left, struct value *right)
529 if (tl && tl->cmp_order)
530 return tl->cmp_order(tl, tr, left, right);
531 if (tl && tl->cmp_eq)
532 return tl->cmp_eq(tl, tr, left, right);
536 static void print_value(struct type *type, struct value *v)
538 if (type && type->print)
539 type->print(type, v);
541 printf("*Unknown*"); // NOTEST
544 static int parse_value(struct type *type, char *arg,
547 if (type && type->parse)
548 return type->parse(type, arg, val);
552 static struct value *val_alloc(struct type *t, struct value *init)
558 ret = calloc(1, t->size);
560 memcpy(ret, init, t->size);
568 static void free_value(struct type *type, struct value *v);
569 static int type_compat(struct type *require, struct type *have, int rules);
570 static void type_print(struct type *type, FILE *f);
571 static void val_init(struct type *type, struct value *v);
572 static void dup_value(struct type *type,
573 struct value *vold, struct value *vnew);
574 static int value_cmp(struct type *tl, struct type *tr,
575 struct value *left, struct value *right);
576 static void print_value(struct type *type, struct value *v);
577 static int parse_value(struct type *type, char *arg, struct value *val);
579 ###### free context types
581 while (context.typelist) {
582 struct type *t = context.typelist;
584 context.typelist = t->next;
592 Values of the base types can be numbers, which we represent as
593 multi-precision fractions, strings, Booleans and labels. When
594 analysing the program we also need to allow for places where no value
595 is meaningful (type `Tnone`) and where we don't know what type to
596 expect yet (type is `NULL`).
598 Values are never shared, they are always copied when used, and freed
599 when no longer needed.
601 When propagating type information around the program, we need to
602 determine if two types are compatible, where type `NULL` is compatible
603 with anything. There are two special cases with type compatibility,
604 both related to the Conditional Statement which will be described
605 later. In some cases a Boolean can be accepted as well as some other
606 primary type, and in others any type is acceptable except a label (`Vlabel`).
607 A separate function encoding these cases will simplify some code later.
611 int (*compat)(struct type *this, struct type *other);
615 static int type_compat(struct type *require, struct type *have, int rules)
617 if ((rules & Rboolok) && have == Tbool)
619 if ((rules & Rnolabel) && have == Tlabel)
621 if (!require || !have)
625 return require->compat(require, have);
627 return require == have;
630 When assigning command line arguments to variables, we need to be able
631 to parse each type from a string.
635 #include "parse_string.h"
636 #include "parse_number.h"
639 myLDLIBS := libnumber.o libstring.o -lgmp
640 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
642 ###### type union fields
643 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
645 ###### value union fields
652 static void _free_value(struct type *type, struct value *v)
656 switch (type->vtype) {
658 case Vstr: free(v->str.txt); break;
659 case Vnum: mpq_clear(v->num); break;
665 ###### value functions
667 static void _val_init(struct type *type, struct value *val)
669 switch(type->vtype) {
670 case Vnone: // NOTEST
673 mpq_init(val->num); break;
675 val->str.txt = malloc(1);
681 case Vlabel: // NOTEST
682 val->label = NULL; // NOTEST
687 static void _dup_value(struct type *type,
688 struct value *vold, struct value *vnew)
690 switch (type->vtype) {
691 case Vnone: // NOTEST
694 vnew->label = vold->label;
697 vnew->bool = vold->bool;
701 mpq_set(vnew->num, vold->num);
704 vnew->str.len = vold->str.len;
705 vnew->str.txt = malloc(vnew->str.len);
706 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
711 static int _value_cmp(struct type *tl, struct type *tr,
712 struct value *left, struct value *right)
716 return tl - tr; // NOTEST
718 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
719 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
720 case Vstr: cmp = text_cmp(left->str, right->str); break;
721 case Vbool: cmp = left->bool - right->bool; break;
722 case Vnone: cmp = 0; // NOTEST
727 static void _print_value(struct type *type, struct value *v)
729 switch (type->vtype) {
730 case Vnone: // NOTEST
731 printf("*no-value*"); break; // NOTEST
732 case Vlabel: // NOTEST
733 printf("*label-%p*", v->label); break; // NOTEST
735 printf("%.*s", v->str.len, v->str.txt); break;
737 printf("%s", v->bool ? "True":"False"); break;
742 mpf_set_q(fl, v->num);
743 gmp_printf("%Fg", fl);
750 static int _parse_value(struct type *type, char *arg, struct value *val)
756 switch(type->vtype) {
757 case Vlabel: // NOTEST
758 case Vnone: // NOTEST
761 val->str.len = strlen(arg);
762 val->str.txt = malloc(val->str.len);
763 memcpy(val->str.txt, arg, val->str.len);
770 tx.txt = arg; tx.len = strlen(tx.txt);
771 if (number_parse(val->num, tail, tx) == 0)
774 mpq_neg(val->num, val->num);
776 printf("Unsupported suffix: %s\n", arg);
781 if (strcasecmp(arg, "true") == 0 ||
782 strcmp(arg, "1") == 0)
784 else if (strcasecmp(arg, "false") == 0 ||
785 strcmp(arg, "0") == 0)
788 printf("Bad bool: %s\n", arg);
796 static void _free_value(struct type *type, struct value *v);
798 static struct type base_prototype = {
800 .parse = _parse_value,
801 .print = _print_value,
802 .cmp_order = _value_cmp,
803 .cmp_eq = _value_cmp,
808 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
811 static struct type *add_base_type(struct parse_context *c, char *n,
812 enum vtype vt, int size)
814 struct text txt = { n, strlen(n) };
817 t = add_type(c, txt, &base_prototype);
820 t->align = size > sizeof(void*) ? sizeof(void*) : size;
821 if (t->size & (t->align - 1))
822 t->size = (t->size | (t->align - 1)) + 1;
826 ###### context initialization
828 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
829 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
830 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
831 Tnone = add_base_type(&context, "none", Vnone, 0);
832 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
836 Variables are scoped named values. We store the names in a linked list
837 of "bindings" sorted in lexical order, and use sequential search and
844 struct binding *next; // in lexical order
848 This linked list is stored in the parse context so that "reduce"
849 functions can find or add variables, and so the analysis phase can
850 ensure that every variable gets a type.
854 struct binding *varlist; // In lexical order
858 static struct binding *find_binding(struct parse_context *c, struct text s)
860 struct binding **l = &c->varlist;
865 (cmp = text_cmp((*l)->name, s)) < 0)
869 n = calloc(1, sizeof(*n));
876 Each name can be linked to multiple variables defined in different
877 scopes. Each scope starts where the name is declared and continues
878 until the end of the containing code block. Scopes of a given name
879 cannot nest, so a declaration while a name is in-scope is an error.
881 ###### binding fields
882 struct variable *var;
886 struct variable *previous;
889 struct binding *name;
890 struct exec *where_decl;// where name was declared
891 struct exec *where_set; // where type was set
895 While the naming seems strange, we include local constants in the
896 definition of variables. A name declared `var := value` can
897 subsequently be changed, but a name declared `var ::= value` cannot -
900 ###### variable fields
903 Scopes in parallel branches can be partially merged. More
904 specifically, if a given name is declared in both branches of an
905 if/else then its scope is a candidate for merging. Similarly if
906 every branch of an exhaustive switch (e.g. has an "else" clause)
907 declares a given name, then the scopes from the branches are
908 candidates for merging.
910 Note that names declared inside a loop (which is only parallel to
911 itself) are never visible after the loop. Similarly names defined in
912 scopes which are not parallel, such as those started by `for` and
913 `switch`, are never visible after the scope. Only variables defined in
914 both `then` and `else` (including the implicit then after an `if`, and
915 excluding `then` used with `for`) and in all `case`s and `else` of a
916 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
918 Labels, which are a bit like variables, follow different rules.
919 Labels are not explicitly declared, but if an undeclared name appears
920 in a context where a label is legal, that effectively declares the
921 name as a label. The declaration remains in force (or in scope) at
922 least to the end of the immediately containing block and conditionally
923 in any larger containing block which does not declare the name in some
924 other way. Importantly, the conditional scope extension happens even
925 if the label is only used in one parallel branch of a conditional --
926 when used in one branch it is treated as having been declared in all
929 Merge candidates are tentatively visible beyond the end of the
930 branching statement which creates them. If the name is used, the
931 merge is affirmed and they become a single variable visible at the
932 outer layer. If not - if it is redeclared first - the merge lapses.
934 To track scopes we have an extra stack, implemented as a linked list,
935 which roughly parallels the parse stack and which is used exclusively
936 for scoping. When a new scope is opened, a new frame is pushed and
937 the child-count of the parent frame is incremented. This child-count
938 is used to distinguish between the first of a set of parallel scopes,
939 in which declared variables must not be in scope, and subsequent
940 branches, whether they may already be conditionally scoped.
942 To push a new frame *before* any code in the frame is parsed, we need a
943 grammar reduction. This is most easily achieved with a grammar
944 element which derives the empty string, and creates the new scope when
945 it is recognised. This can be placed, for example, between a keyword
946 like "if" and the code following it.
950 struct scope *parent;
956 struct scope *scope_stack;
959 static void scope_pop(struct parse_context *c)
961 struct scope *s = c->scope_stack;
963 c->scope_stack = s->parent;
968 static void scope_push(struct parse_context *c)
970 struct scope *s = calloc(1, sizeof(*s));
972 c->scope_stack->child_count += 1;
973 s->parent = c->scope_stack;
981 OpenScope -> ${ scope_push(c); }$
982 ClosePara -> ${ var_block_close(c, CloseParallel); }$
984 Each variable records a scope depth and is in one of four states:
986 - "in scope". This is the case between the declaration of the
987 variable and the end of the containing block, and also between
988 the usage with affirms a merge and the end of that block.
990 The scope depth is not greater than the current parse context scope
991 nest depth. When the block of that depth closes, the state will
992 change. To achieve this, all "in scope" variables are linked
993 together as a stack in nesting order.
995 - "pending". The "in scope" block has closed, but other parallel
996 scopes are still being processed. So far, every parallel block at
997 the same level that has closed has declared the name.
999 The scope depth is the depth of the last parallel block that
1000 enclosed the declaration, and that has closed.
1002 - "conditionally in scope". The "in scope" block and all parallel
1003 scopes have closed, and no further mention of the name has been
1004 seen. This state includes a secondary nest depth which records the
1005 outermost scope seen since the variable became conditionally in
1006 scope. If a use of the name is found, the variable becomes "in
1007 scope" and that secondary depth becomes the recorded scope depth.
1008 If the name is declared as a new variable, the old variable becomes
1009 "out of scope" and the recorded scope depth stays unchanged.
1011 - "out of scope". The variable is neither in scope nor conditionally
1012 in scope. It is permanently out of scope now and can be removed from
1013 the "in scope" stack.
1015 ###### variable fields
1016 int depth, min_depth;
1017 enum { OutScope, PendingScope, CondScope, InScope } scope;
1018 struct variable *in_scope;
1020 ###### parse context
1022 struct variable *in_scope;
1024 All variables with the same name are linked together using the
1025 'previous' link. Those variable that have been affirmatively merged all
1026 have a 'merged' pointer that points to one primary variable - the most
1027 recently declared instance. When merging variables, we need to also
1028 adjust the 'merged' pointer on any other variables that had previously
1029 been merged with the one that will no longer be primary.
1031 A variable that is no longer the most recent instance of a name may
1032 still have "pending" scope, if it might still be merged with most
1033 recent instance. These variables don't really belong in the
1034 "in_scope" list, but are not immediately removed when a new instance
1035 is found. Instead, they are detected and ignored when considering the
1036 list of in_scope names.
1038 ###### variable fields
1039 struct variable *merged;
1041 ###### ast functions
1043 static void variable_merge(struct variable *primary, struct variable *secondary)
1047 if (primary->merged)
1049 primary = primary->merged;
1051 for (v = primary->previous; v; v=v->previous)
1052 if (v == secondary || v == secondary->merged ||
1053 v->merged == secondary ||
1054 (v->merged && v->merged == secondary->merged)) {
1055 v->scope = OutScope;
1056 v->merged = primary;
1060 ###### free context vars
1062 while (context.varlist) {
1063 struct binding *b = context.varlist;
1064 struct variable *v = b->var;
1065 context.varlist = b->next;
1068 struct variable *t = v;
1071 free_value(t->type, t->val);
1074 // This is a global constant
1075 free_exec(t->where_decl);
1080 #### Manipulating Bindings
1082 When a name is conditionally visible, a new declaration discards the
1083 old binding - the condition lapses. Conversely a usage of the name
1084 affirms the visibility and extends it to the end of the containing
1085 block - i.e. the block that contains both the original declaration and
1086 the latest usage. This is determined from `min_depth`. When a
1087 conditionally visible variable gets affirmed like this, it is also
1088 merged with other conditionally visible variables with the same name.
1090 When we parse a variable declaration we either report an error if the
1091 name is currently bound, or create a new variable at the current nest
1092 depth if the name is unbound or bound to a conditionally scoped or
1093 pending-scope variable. If the previous variable was conditionally
1094 scoped, it and its homonyms becomes out-of-scope.
1096 When we parse a variable reference (including non-declarative assignment
1097 "foo = bar") we report an error if the name is not bound or is bound to
1098 a pending-scope variable; update the scope if the name is bound to a
1099 conditionally scoped variable; or just proceed normally if the named
1100 variable is in scope.
1102 When we exit a scope, any variables bound at this level are either
1103 marked out of scope or pending-scoped, depending on whether the scope
1104 was sequential or parallel. Here a "parallel" scope means the "then"
1105 or "else" part of a conditional, or any "case" or "else" branch of a
1106 switch. Other scopes are "sequential".
1108 When exiting a parallel scope we check if there are any variables that
1109 were previously pending and are still visible. If there are, then
1110 there weren't redeclared in the most recent scope, so they cannot be
1111 merged and must become out-of-scope. If it is not the first of
1112 parallel scopes (based on `child_count`), we check that there was a
1113 previous binding that is still pending-scope. If there isn't, the new
1114 variable must now be out-of-scope.
1116 When exiting a sequential scope that immediately enclosed parallel
1117 scopes, we need to resolve any pending-scope variables. If there was
1118 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1119 we need to mark all pending-scope variable as out-of-scope. Otherwise
1120 all pending-scope variables become conditionally scoped.
1123 enum closetype { CloseSequential, CloseParallel, CloseElse };
1125 ###### ast functions
1127 static struct variable *var_decl(struct parse_context *c, struct text s)
1129 struct binding *b = find_binding(c, s);
1130 struct variable *v = b->var;
1132 switch (v ? v->scope : OutScope) {
1134 /* Caller will report the error */
1138 v && v->scope == CondScope;
1140 v->scope = OutScope;
1144 v = calloc(1, sizeof(*v));
1145 v->previous = b->var;
1148 v->min_depth = v->depth = c->scope_depth;
1150 v->in_scope = c->in_scope;
1156 static struct variable *var_ref(struct parse_context *c, struct text s)
1158 struct binding *b = find_binding(c, s);
1159 struct variable *v = b->var;
1160 struct variable *v2;
1162 switch (v ? v->scope : OutScope) {
1165 /* Caller will report the error */
1168 /* All CondScope variables of this name need to be merged
1169 * and become InScope
1171 v->depth = v->min_depth;
1173 for (v2 = v->previous;
1174 v2 && v2->scope == CondScope;
1176 variable_merge(v, v2);
1184 static void var_block_close(struct parse_context *c, enum closetype ct)
1186 /* Close off all variables that are in_scope */
1187 struct variable *v, **vp, *v2;
1190 for (vp = &c->in_scope;
1191 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1193 if (v->name->var == v) switch (ct) {
1195 case CloseParallel: /* handle PendingScope */
1199 if (c->scope_stack->child_count == 1)
1200 v->scope = PendingScope;
1201 else if (v->previous &&
1202 v->previous->scope == PendingScope)
1203 v->scope = PendingScope;
1204 else if (v->type == Tlabel)
1205 v->scope = PendingScope;
1206 else if (v->name->var == v)
1207 v->scope = OutScope;
1208 if (ct == CloseElse) {
1209 /* All Pending variables with this name
1210 * are now Conditional */
1212 v2 && v2->scope == PendingScope;
1214 v2->scope = CondScope;
1219 v2 && v2->scope == PendingScope;
1221 if (v2->type != Tlabel)
1222 v2->scope = OutScope;
1224 case OutScope: break;
1227 case CloseSequential:
1228 if (v->type == Tlabel)
1229 v->scope = PendingScope;
1232 v->scope = OutScope;
1235 /* There was no 'else', so we can only become
1236 * conditional if we know the cases were exhaustive,
1237 * and that doesn't mean anything yet.
1238 * So only labels become conditional..
1241 v2 && v2->scope == PendingScope;
1243 if (v2->type == Tlabel) {
1244 v2->scope = CondScope;
1245 v2->min_depth = c->scope_depth;
1247 v2->scope = OutScope;
1250 case OutScope: break;
1254 if (v->scope == OutScope || v->name->var != v)
1263 Executables can be lots of different things. In many cases an
1264 executable is just an operation combined with one or two other
1265 executables. This allows for expressions and lists etc. Other times an
1266 executable is something quite specific like a constant or variable name.
1267 So we define a `struct exec` to be a general executable with a type, and
1268 a `struct binode` which is a subclass of `exec`, forms a node in a
1269 binary tree, and holds an operation. There will be other subclasses,
1270 and to access these we need to be able to `cast` the `exec` into the
1271 various other types. The first field in any `struct exec` is the type
1272 from the `exec_types` enum.
1275 #define cast(structname, pointer) ({ \
1276 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1277 if (__mptr && *__mptr != X##structname) abort(); \
1278 (struct structname *)( (char *)__mptr);})
1280 #define new(structname) ({ \
1281 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1282 __ptr->type = X##structname; \
1283 __ptr->line = -1; __ptr->column = -1; \
1286 #define new_pos(structname, token) ({ \
1287 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1288 __ptr->type = X##structname; \
1289 __ptr->line = token.line; __ptr->column = token.col; \
1298 enum exec_types type;
1306 struct exec *left, *right;
1309 ###### ast functions
1311 static int __fput_loc(struct exec *loc, FILE *f)
1315 if (loc->line >= 0) {
1316 fprintf(f, "%d:%d: ", loc->line, loc->column);
1319 if (loc->type == Xbinode)
1320 return __fput_loc(cast(binode,loc)->left, f) ||
1321 __fput_loc(cast(binode,loc)->right, f);
1324 static void fput_loc(struct exec *loc, FILE *f)
1326 if (!__fput_loc(loc, f))
1327 fprintf(f, "??:??: "); // NOTEST
1330 Each different type of `exec` node needs a number of functions defined,
1331 a bit like methods. We must be able to free it, print it, analyse it
1332 and execute it. Once we have specific `exec` types we will need to
1333 parse them too. Let's take this a bit more slowly.
1337 The parser generator requires a `free_foo` function for each struct
1338 that stores attributes and they will often be `exec`s and subtypes
1339 there-of. So we need `free_exec` which can handle all the subtypes,
1340 and we need `free_binode`.
1342 ###### ast functions
1344 static void free_binode(struct binode *b)
1349 free_exec(b->right);
1353 ###### core functions
1354 static void free_exec(struct exec *e)
1363 ###### forward decls
1365 static void free_exec(struct exec *e);
1367 ###### free exec cases
1368 case Xbinode: free_binode(cast(binode, e)); break;
1372 Printing an `exec` requires that we know the current indent level for
1373 printing line-oriented components. As will become clear later, we
1374 also want to know what sort of bracketing to use.
1376 ###### ast functions
1378 static void do_indent(int i, char *str)
1385 ###### core functions
1386 static void print_binode(struct binode *b, int indent, int bracket)
1390 ## print binode cases
1394 static void print_exec(struct exec *e, int indent, int bracket)
1400 print_binode(cast(binode, e), indent, bracket); break;
1405 ###### forward decls
1407 static void print_exec(struct exec *e, int indent, int bracket);
1411 As discussed, analysis involves propagating type requirements around the
1412 program and looking for errors.
1414 So `propagate_types` is passed an expected type (being a `struct type`
1415 pointer together with some `val_rules` flags) that the `exec` is
1416 expected to return, and returns the type that it does return, either
1417 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1418 by reference. It is set to `0` when an error is found, and `2` when
1419 any change is made. If it remains unchanged at `1`, then no more
1420 propagation is needed.
1424 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1428 if (rules & Rnolabel)
1429 fputs(" (labels not permitted)", stderr);
1432 ###### core functions
1434 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1435 struct type *type, int rules);
1436 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1437 struct type *type, int rules)
1444 switch (prog->type) {
1447 struct binode *b = cast(binode, prog);
1449 ## propagate binode cases
1453 ## propagate exec cases
1458 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1459 struct type *type, int rules)
1461 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1470 Interpreting an `exec` doesn't require anything but the `exec`. State
1471 is stored in variables and each variable will be directly linked from
1472 within the `exec` tree. The exception to this is the whole `program`
1473 which needs to look at command line arguments. The `program` will be
1474 interpreted separately.
1476 Each `exec` can return a value combined with a type in `struct lrval`.
1477 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1478 the location of a value, which can be updated, in `lval`. Others will
1479 set `lval` to NULL indicating that there is a value of appropriate type
1483 ###### core functions
1487 struct value rval, *lval;
1490 static struct lrval _interp_exec(struct exec *e);
1492 static struct value interp_exec(struct exec *e, struct type **typeret)
1494 struct lrval ret = _interp_exec(e);
1496 if (!ret.type) abort();
1498 *typeret = ret.type;
1500 dup_value(ret.type, ret.lval, &ret.rval);
1504 static struct value *linterp_exec(struct exec *e, struct type **typeret)
1506 struct lrval ret = _interp_exec(e);
1509 *typeret = ret.type;
1513 static struct lrval _interp_exec(struct exec *e)
1516 struct value rv = {}, *lrv = NULL;
1517 struct type *rvtype;
1519 rvtype = ret.type = Tnone;
1529 struct binode *b = cast(binode, e);
1530 struct value left, right, *lleft;
1531 struct type *ltype, *rtype;
1532 ltype = rtype = Tnone;
1534 ## interp binode cases
1536 free_value(ltype, &left);
1537 free_value(rtype, &right);
1540 ## interp exec cases
1550 Now that we have the shape of the interpreter in place we can add some
1551 complex types and connected them in to the data structures and the
1552 different phases of parse, analyse, print, interpret.
1554 Thus far we have arrays and structs.
1556 Some complex types need do not exist in a name table, so they are kept
1557 on a linked list in the context (`anon_typelist`). This allows them to
1558 be freed when parsing is complete.
1562 Arrays can be declared by giving a size and a type, as `[size]type' so
1563 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1564 size can be either a literal number, or a named constant. Some day an
1565 arbitrary expression will be supported.
1567 Arrays cannot be assigned. When pointers are introduced we will also
1568 introduce array slices which can refer to part or all of an array -
1569 the assignment syntax will create a slice. For now, an array can only
1570 ever be referenced by the name it is declared with. It is likely that
1571 a "`copy`" primitive will eventually be define which can be used to
1572 make a copy of an array with controllable recursive depth.
1574 ###### type union fields
1578 struct variable *vsize;
1579 struct type *member;
1582 ###### value union fields
1585 ###### value functions
1587 static void array_init(struct type *type, struct value *val)
1591 if (type->array.vsize) {
1594 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val->num),
1595 mpq_denref(type->array.vsize->val->num));
1596 type->array.size = mpz_get_si(q);
1599 type->size = type->array.size * type->array.member->size;
1600 type->align = type->array.member->align;
1604 for (i = 0; i < type->array.size; i++) {
1606 v = (void*)val->ptr + i * type->array.member->size;
1607 val_init(type->array.member, v);
1611 static void array_free(struct type *type, struct value *val)
1615 for (i = 0; i < type->array.size; i++) {
1617 v = (void*)val->ptr + i * type->array.member->size;
1618 free_value(type->array.member, v);
1622 static int array_compat(struct type *require, struct type *have)
1624 if (have->compat != require->compat)
1626 /* Both are arrays, so we can look at details */
1627 if (!type_compat(require->array.member, have->array.member, 0))
1629 if (require->array.vsize == NULL && have->array.vsize == NULL)
1630 return require->array.size == have->array.size;
1632 return require->array.vsize == have->array.vsize;
1635 static void array_print_type(struct type *type, FILE *f)
1638 if (type->array.vsize) {
1639 struct binding *b = type->array.vsize->name;
1640 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1642 fprintf(f, "%d]", type->array.size);
1643 type_print(type->array.member, f);
1646 static struct type array_prototype = {
1648 .print_type = array_print_type,
1649 .compat = array_compat,
1655 | [ NUMBER ] Type ${
1656 $0 = calloc(1, sizeof(struct type));
1657 *($0) = array_prototype;
1658 $0->array.member = $<4;
1659 $0->array.vsize = NULL;
1663 if (number_parse(num, tail, $2.txt) == 0)
1664 tok_err(c, "error: unrecognised number", &$2);
1666 tok_err(c, "error: unsupported number suffix", &$2);
1668 $0->array.size = mpz_get_ui(mpq_numref(num));
1669 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1670 tok_err(c, "error: array size must be an integer",
1672 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1673 tok_err(c, "error: array size is too large",
1677 $0->next = c->anon_typelist;
1678 c->anon_typelist = $0;
1682 | [ IDENTIFIER ] Type ${ {
1683 struct variable *v = var_ref(c, $2.txt);
1686 tok_err(c, "error: name undeclared", &$2);
1687 else if (!v->constant)
1688 tok_err(c, "error: array size must be a constant", &$2);
1690 $0 = calloc(1, sizeof(struct type));
1691 *($0) = array_prototype;
1692 $0->array.member = $<4;
1694 $0->array.vsize = v;
1695 $0->next = c->anon_typelist;
1696 c->anon_typelist = $0;
1699 ###### parse context
1701 struct type *anon_typelist;
1703 ###### free context types
1705 while (context.anon_typelist) {
1706 struct type *t = context.anon_typelist;
1708 context.anon_typelist = t->next;
1715 ###### variable grammar
1717 | Variable [ Expression ] ${ {
1718 struct binode *b = new(binode);
1725 ###### print binode cases
1727 print_exec(b->left, -1, bracket);
1729 print_exec(b->right, -1, bracket);
1733 ###### propagate binode cases
1735 /* left must be an array, right must be a number,
1736 * result is the member type of the array
1738 propagate_types(b->right, c, ok, Tnum, 0);
1739 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1740 if (!t || t->compat != array_compat) {
1741 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1744 if (!type_compat(type, t->array.member, rules)) {
1745 type_err(c, "error: have %1 but need %2", prog,
1746 t->array.member, rules, type);
1748 return t->array.member;
1752 ###### interp binode cases
1757 lleft = linterp_exec(b->left, <ype);
1758 right = interp_exec(b->right, &rtype);
1760 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1764 rvtype = ltype->array.member;
1765 if (i >= 0 && i < ltype->array.size)
1766 lrv = (void*)lleft + i * rvtype->size;
1768 val_init(ltype->array.member, &rv);
1775 A `struct` is a data-type that contains one or more other data-types.
1776 It differs from an array in that each member can be of a different
1777 type, and they are accessed by name rather than by number. Thus you
1778 cannot choose an element by calculation, you need to know what you
1781 The language makes no promises about how a given structure will be
1782 stored in memory - it is free to rearrange fields to suit whatever
1783 criteria seems important.
1785 Structs are declared separately from program code - they cannot be
1786 declared in-line in a variable declaration like arrays can. A struct
1787 is given a name and this name is used to identify the type - the name
1788 is not prefixed by the word `struct` as it would be in C.
1790 Structs are only treated as the same if they have the same name.
1791 Simply having the same fields in the same order is not enough. This
1792 might change once we can create structure initializers from a list of
1795 Each component datum is identified much like a variable is declared,
1796 with a name, one or two colons, and a type. The type cannot be omitted
1797 as there is no opportunity to deduce the type from usage. An initial
1798 value can be given following an equals sign, so
1800 ##### Example: a struct type
1806 would declare a type called "complex" which has two number fields,
1807 each initialised to zero.
1809 Struct will need to be declared separately from the code that uses
1810 them, so we will need to be able to print out the declaration of a
1811 struct when reprinting the whole program. So a `print_type_decl` type
1812 function will be needed.
1814 ###### type union fields
1826 ###### type functions
1827 void (*print_type_decl)(struct type *type, FILE *f);
1829 ###### value functions
1831 static void structure_init(struct type *type, struct value *val)
1835 for (i = 0; i < type->structure.nfields; i++) {
1837 v = (void*) val->ptr + type->structure.fields[i].offset;
1838 val_init(type->structure.fields[i].type, v);
1842 static void structure_free(struct type *type, struct value *val)
1846 for (i = 0; i < type->structure.nfields; i++) {
1848 v = (void*)val->ptr + type->structure.fields[i].offset;
1849 free_value(type->structure.fields[i].type, v);
1853 static void structure_free_type(struct type *t)
1856 for (i = 0; i < t->structure.nfields; i++)
1857 if (t->structure.fields[i].init) {
1858 free_value(t->structure.fields[i].type,
1859 t->structure.fields[i].init);
1860 free(t->structure.fields[i].init);
1862 free(t->structure.fields);
1865 static struct type structure_prototype = {
1866 .init = structure_init,
1867 .free = structure_free,
1868 .free_type = structure_free_type,
1869 .print_type_decl = structure_print_type,
1883 ###### free exec cases
1885 free_exec(cast(fieldref, e)->left);
1889 ###### variable grammar
1891 | Variable . IDENTIFIER ${ {
1892 struct fieldref *fr = new_pos(fieldref, $2);
1899 ###### print exec cases
1903 struct fieldref *f = cast(fieldref, e);
1904 print_exec(f->left, -1, bracket);
1905 printf(".%.*s", f->name.len, f->name.txt);
1909 ###### ast functions
1910 static int find_struct_index(struct type *type, struct text field)
1913 for (i = 0; i < type->structure.nfields; i++)
1914 if (text_cmp(type->structure.fields[i].name, field) == 0)
1919 ###### propagate exec cases
1923 struct fieldref *f = cast(fieldref, prog);
1924 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1927 type_err(c, "error: unknown type for field access", f->left,
1929 else if (st->init != structure_init)
1930 type_err(c, "error: field reference attempted on %1, not a struct",
1931 f->left, st, 0, NULL);
1932 else if (f->index == -2) {
1933 f->index = find_struct_index(st, f->name);
1935 type_err(c, "error: cannot find requested field in %1",
1936 f->left, st, 0, NULL);
1938 if (f->index >= 0) {
1939 struct type *ft = st->structure.fields[f->index].type;
1940 if (!type_compat(type, ft, rules))
1941 type_err(c, "error: have %1 but need %2", prog,
1948 ###### interp exec cases
1951 struct fieldref *f = cast(fieldref, e);
1953 struct value *lleft = linterp_exec(f->left, <ype);
1954 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
1955 rvtype = ltype->structure.fields[f->index].type;
1961 struct fieldlist *prev;
1965 ###### ast functions
1966 static void free_fieldlist(struct fieldlist *f)
1970 free_fieldlist(f->prev);
1972 free_value(f->f.type, f->f.init);
1978 ###### top level grammar
1979 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
1981 add_type(c, $2.txt, &structure_prototype);
1983 struct fieldlist *f;
1985 for (f = $3; f; f=f->prev)
1988 t->structure.nfields = cnt;
1989 t->structure.fields = calloc(cnt, sizeof(struct field));
1992 int a = f->f.type->align;
1994 t->structure.fields[cnt] = f->f;
1995 if (t->size & (a-1))
1996 t->size = (t->size | (a-1)) + 1;
1997 t->structure.fields[cnt].offset = t->size;
1998 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2007 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2008 | { SimpleFieldList } ${ $0 = $<SFL; }$
2009 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2010 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2012 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2013 | FieldLines SimpleFieldList Newlines ${
2018 SimpleFieldList -> Field ${ $0 = $<F; }$
2019 | SimpleFieldList ; Field ${
2023 | SimpleFieldList ; ${
2026 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2028 Field -> IDENTIFIER : Type = Expression ${ {
2031 $0 = calloc(1, sizeof(struct fieldlist));
2032 $0->f.name = $1.txt;
2037 propagate_types($<5, c, &ok, $3, 0);
2042 struct value vl = interp_exec($5, NULL);
2043 $0->f.init = val_alloc($0->f.type, &vl);
2046 | IDENTIFIER : Type ${
2047 $0 = calloc(1, sizeof(struct fieldlist));
2048 $0->f.name = $1.txt;
2050 $0->f.init = val_alloc($0->f.type, NULL);
2053 ###### forward decls
2054 static void structure_print_type(struct type *t, FILE *f);
2056 ###### value functions
2057 static void structure_print_type(struct type *t, FILE *f)
2061 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2063 for (i = 0; i < t->structure.nfields; i++) {
2064 struct field *fl = t->structure.fields + i;
2065 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2066 type_print(fl->type, f);
2067 if (fl->type->print && fl->init) {
2069 if (fl->type == Tstr)
2071 print_value(fl->type, fl->init);
2072 if (fl->type == Tstr)
2079 ###### print type decls
2084 while (target != 0) {
2086 for (t = context.typelist; t ; t=t->next)
2087 if (t->print_type_decl) {
2096 t->print_type_decl(t, stdout);
2102 ## Executables: the elements of code
2104 Each code element needs to be parsed, printed, analysed,
2105 interpreted, and freed. There are several, so let's just start with
2106 the easy ones and work our way up.
2110 We have already met values as separate objects. When manifest
2111 constants appear in the program text, that must result in an executable
2112 which has a constant value. So the `val` structure embeds a value in
2125 ###### ast functions
2126 struct val *new_val(struct type *T, struct token tk)
2128 struct val *v = new_pos(val, tk);
2137 $0 = new_val(Tbool, $1);
2141 $0 = new_val(Tbool, $1);
2145 $0 = new_val(Tnum, $1);
2148 if (number_parse($0->val.num, tail, $1.txt) == 0)
2149 mpq_init($0->val.num);
2151 tok_err(c, "error: unsupported number suffix",
2156 $0 = new_val(Tstr, $1);
2159 string_parse(&$1, '\\', &$0->val.str, tail);
2161 tok_err(c, "error: unsupported string suffix",
2166 $0 = new_val(Tstr, $1);
2169 string_parse(&$1, '\\', &$0->val.str, tail);
2171 tok_err(c, "error: unsupported string suffix",
2176 ###### print exec cases
2179 struct val *v = cast(val, e);
2180 if (v->vtype == Tstr)
2182 print_value(v->vtype, &v->val);
2183 if (v->vtype == Tstr)
2188 ###### propagate exec cases
2191 struct val *val = cast(val, prog);
2192 if (!type_compat(type, val->vtype, rules))
2193 type_err(c, "error: expected %1%r found %2",
2194 prog, type, rules, val->vtype);
2198 ###### interp exec cases
2200 rvtype = cast(val, e)->vtype;
2201 dup_value(rvtype, &cast(val, e)->val, &rv);
2204 ###### ast functions
2205 static void free_val(struct val *v)
2208 free_value(v->vtype, &v->val);
2212 ###### free exec cases
2213 case Xval: free_val(cast(val, e)); break;
2215 ###### ast functions
2216 // Move all nodes from 'b' to 'rv', reversing their order.
2217 // In 'b' 'left' is a list, and 'right' is the last node.
2218 // In 'rv', left' is the first node and 'right' is a list.
2219 static struct binode *reorder_bilist(struct binode *b)
2221 struct binode *rv = NULL;
2224 struct exec *t = b->right;
2228 b = cast(binode, b->left);
2238 Just as we used a `val` to wrap a value into an `exec`, we similarly
2239 need a `var` to wrap a `variable` into an exec. While each `val`
2240 contained a copy of the value, each `var` holds a link to the variable
2241 because it really is the same variable no matter where it appears.
2242 When a variable is used, we need to remember to follow the `->merged`
2243 link to find the primary instance.
2251 struct variable *var;
2257 VariableDecl -> IDENTIFIER : ${ {
2258 struct variable *v = var_decl(c, $1.txt);
2259 $0 = new_pos(var, $1);
2264 v = var_ref(c, $1.txt);
2266 type_err(c, "error: variable '%v' redeclared",
2268 type_err(c, "info: this is where '%v' was first declared",
2269 v->where_decl, NULL, 0, NULL);
2272 | IDENTIFIER :: ${ {
2273 struct variable *v = var_decl(c, $1.txt);
2274 $0 = new_pos(var, $1);
2280 v = var_ref(c, $1.txt);
2282 type_err(c, "error: variable '%v' redeclared",
2284 type_err(c, "info: this is where '%v' was first declared",
2285 v->where_decl, NULL, 0, NULL);
2288 | IDENTIFIER : Type ${ {
2289 struct variable *v = var_decl(c, $1.txt);
2290 $0 = new_pos(var, $1);
2298 v = var_ref(c, $1.txt);
2300 type_err(c, "error: variable '%v' redeclared",
2302 type_err(c, "info: this is where '%v' was first declared",
2303 v->where_decl, NULL, 0, NULL);
2306 | IDENTIFIER :: Type ${ {
2307 struct variable *v = var_decl(c, $1.txt);
2308 $0 = new_pos(var, $1);
2317 v = var_ref(c, $1.txt);
2319 type_err(c, "error: variable '%v' redeclared",
2321 type_err(c, "info: this is where '%v' was first declared",
2322 v->where_decl, NULL, 0, NULL);
2327 Variable -> IDENTIFIER ${ {
2328 struct variable *v = var_ref(c, $1.txt);
2329 $0 = new_pos(var, $1);
2331 /* This might be a label - allocate a var just in case */
2332 v = var_decl(c, $1.txt);
2340 cast(var, $0)->var = v;
2345 Type -> IDENTIFIER ${
2346 $0 = find_type(c, $1.txt);
2349 "error: undefined type", &$1);
2356 ###### print exec cases
2359 struct var *v = cast(var, e);
2361 struct binding *b = v->var->name;
2362 printf("%.*s", b->name.len, b->name.txt);
2369 if (loc->type == Xvar) {
2370 struct var *v = cast(var, loc);
2372 struct binding *b = v->var->name;
2373 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2375 fputs("???", stderr); // NOTEST
2377 fputs("NOTVAR", stderr); // NOTEST
2380 ###### propagate exec cases
2384 struct var *var = cast(var, prog);
2385 struct variable *v = var->var;
2387 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2388 return Tnone; // NOTEST
2392 if (v->constant && (rules & Rnoconstant)) {
2393 type_err(c, "error: Cannot assign to a constant: %v",
2394 prog, NULL, 0, NULL);
2395 type_err(c, "info: name was defined as a constant here",
2396 v->where_decl, NULL, 0, NULL);
2399 if (v->type == Tnone && v->where_decl == prog)
2400 type_err(c, "error: variable used but not declared: %v",
2401 prog, NULL, 0, NULL);
2402 if (v->type == NULL) {
2403 if (type && *ok != 0) {
2406 v->where_set = prog;
2411 if (!type_compat(type, v->type, rules)) {
2412 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2413 type, rules, v->type);
2414 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2415 v->type, rules, NULL);
2422 ###### interp exec cases
2425 struct var *var = cast(var, e);
2426 struct variable *v = var->var;
2435 ###### ast functions
2437 static void free_var(struct var *v)
2442 ###### free exec cases
2443 case Xvar: free_var(cast(var, e)); break;
2445 ### Expressions: Conditional
2447 Our first user of the `binode` will be conditional expressions, which
2448 is a bit odd as they actually have three components. That will be
2449 handled by having 2 binodes for each expression. The conditional
2450 expression is the lowest precedence operator which is why we define it
2451 first - to start the precedence list.
2453 Conditional expressions are of the form "value `if` condition `else`
2454 other_value". They associate to the right, so everything to the right
2455 of `else` is part of an else value, while only a higher-precedence to
2456 the left of `if` is the if values. Between `if` and `else` there is no
2457 room for ambiguity, so a full conditional expression is allowed in
2469 Expression -> Expression if Expression else Expression $$ifelse ${ {
2470 struct binode *b1 = new(binode);
2471 struct binode *b2 = new(binode);
2480 ## expression grammar
2482 ###### print binode cases
2485 b2 = cast(binode, b->right);
2486 if (bracket) printf("(");
2487 print_exec(b2->left, -1, bracket);
2489 print_exec(b->left, -1, bracket);
2491 print_exec(b2->right, -1, bracket);
2492 if (bracket) printf(")");
2495 ###### propagate binode cases
2498 /* cond must be Tbool, others must match */
2499 struct binode *b2 = cast(binode, b->right);
2502 propagate_types(b->left, c, ok, Tbool, 0);
2503 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2504 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2508 ###### interp binode cases
2511 struct binode *b2 = cast(binode, b->right);
2512 left = interp_exec(b->left, <ype);
2514 rv = interp_exec(b2->left, &rvtype);
2516 rv = interp_exec(b2->right, &rvtype);
2520 ### Expressions: Boolean
2522 The next class of expressions to use the `binode` will be Boolean
2523 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2524 have same corresponding precendence. The difference is that they don't
2525 evaluate the second expression if not necessary.
2534 ###### expr precedence
2539 ###### expression grammar
2540 | Expression or Expression ${ {
2541 struct binode *b = new(binode);
2547 | Expression or else Expression ${ {
2548 struct binode *b = new(binode);
2555 | Expression and Expression ${ {
2556 struct binode *b = new(binode);
2562 | Expression and then Expression ${ {
2563 struct binode *b = new(binode);
2570 | not Expression ${ {
2571 struct binode *b = new(binode);
2577 ###### print binode cases
2579 if (bracket) printf("(");
2580 print_exec(b->left, -1, bracket);
2582 print_exec(b->right, -1, bracket);
2583 if (bracket) printf(")");
2586 if (bracket) printf("(");
2587 print_exec(b->left, -1, bracket);
2588 printf(" and then ");
2589 print_exec(b->right, -1, bracket);
2590 if (bracket) printf(")");
2593 if (bracket) printf("(");
2594 print_exec(b->left, -1, bracket);
2596 print_exec(b->right, -1, bracket);
2597 if (bracket) printf(")");
2600 if (bracket) printf("(");
2601 print_exec(b->left, -1, bracket);
2602 printf(" or else ");
2603 print_exec(b->right, -1, bracket);
2604 if (bracket) printf(")");
2607 if (bracket) printf("(");
2609 print_exec(b->right, -1, bracket);
2610 if (bracket) printf(")");
2613 ###### propagate binode cases
2619 /* both must be Tbool, result is Tbool */
2620 propagate_types(b->left, c, ok, Tbool, 0);
2621 propagate_types(b->right, c, ok, Tbool, 0);
2622 if (type && type != Tbool)
2623 type_err(c, "error: %1 operation found where %2 expected", prog,
2627 ###### interp binode cases
2629 rv = interp_exec(b->left, &rvtype);
2630 right = interp_exec(b->right, &rtype);
2631 rv.bool = rv.bool && right.bool;
2634 rv = interp_exec(b->left, &rvtype);
2636 rv = interp_exec(b->right, NULL);
2639 rv = interp_exec(b->left, &rvtype);
2640 right = interp_exec(b->right, &rtype);
2641 rv.bool = rv.bool || right.bool;
2644 rv = interp_exec(b->left, &rvtype);
2646 rv = interp_exec(b->right, NULL);
2649 rv = interp_exec(b->right, &rvtype);
2653 ### Expressions: Comparison
2655 Of slightly higher precedence that Boolean expressions are Comparisons.
2656 A comparison takes arguments of any comparable type, but the two types
2659 To simplify the parsing we introduce an `eop` which can record an
2660 expression operator, and the `CMPop` non-terminal will match one of them.
2667 ###### ast functions
2668 static void free_eop(struct eop *e)
2682 ###### expr precedence
2683 $LEFT < > <= >= == != CMPop
2685 ###### expression grammar
2686 | Expression CMPop Expression ${ {
2687 struct binode *b = new(binode);
2697 CMPop -> < ${ $0.op = Less; }$
2698 | > ${ $0.op = Gtr; }$
2699 | <= ${ $0.op = LessEq; }$
2700 | >= ${ $0.op = GtrEq; }$
2701 | == ${ $0.op = Eql; }$
2702 | != ${ $0.op = NEql; }$
2704 ###### print binode cases
2712 if (bracket) printf("(");
2713 print_exec(b->left, -1, bracket);
2715 case Less: printf(" < "); break;
2716 case LessEq: printf(" <= "); break;
2717 case Gtr: printf(" > "); break;
2718 case GtrEq: printf(" >= "); break;
2719 case Eql: printf(" == "); break;
2720 case NEql: printf(" != "); break;
2721 default: abort(); // NOTEST
2723 print_exec(b->right, -1, bracket);
2724 if (bracket) printf(")");
2727 ###### propagate binode cases
2734 /* Both must match but not be labels, result is Tbool */
2735 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2737 propagate_types(b->right, c, ok, t, 0);
2739 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2741 t = propagate_types(b->left, c, ok, t, 0);
2743 if (!type_compat(type, Tbool, 0))
2744 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2745 Tbool, rules, type);
2748 ###### interp binode cases
2757 left = interp_exec(b->left, <ype);
2758 right = interp_exec(b->right, &rtype);
2759 cmp = value_cmp(ltype, rtype, &left, &right);
2762 case Less: rv.bool = cmp < 0; break;
2763 case LessEq: rv.bool = cmp <= 0; break;
2764 case Gtr: rv.bool = cmp > 0; break;
2765 case GtrEq: rv.bool = cmp >= 0; break;
2766 case Eql: rv.bool = cmp == 0; break;
2767 case NEql: rv.bool = cmp != 0; break;
2768 default: rv.bool = 0; break; // NOTEST
2773 ### Expressions: The rest
2775 The remaining expressions with the highest precedence are arithmetic,
2776 string concatenation, and string conversion. String concatenation
2777 (`++`) has the same precedence as multiplication and division, but lower
2780 String conversion is a temporary feature until I get a better type
2781 system. `$` is a prefix operator which expects a string and returns
2784 `+` and `-` are both infix and prefix operations (where they are
2785 absolute value and negation). These have different operator names.
2787 We also have a 'Bracket' operator which records where parentheses were
2788 found. This makes it easy to reproduce these when printing. Possibly I
2789 should only insert brackets were needed for precedence.
2799 ###### expr precedence
2805 ###### expression grammar
2806 | Expression Eop Expression ${ {
2807 struct binode *b = new(binode);
2814 | Expression Top Expression ${ {
2815 struct binode *b = new(binode);
2822 | ( Expression ) ${ {
2823 struct binode *b = new_pos(binode, $1);
2828 | Uop Expression ${ {
2829 struct binode *b = new(binode);
2834 | Value ${ $0 = $<1; }$
2835 | Variable ${ $0 = $<1; }$
2838 Eop -> + ${ $0.op = Plus; }$
2839 | - ${ $0.op = Minus; }$
2841 Uop -> + ${ $0.op = Absolute; }$
2842 | - ${ $0.op = Negate; }$
2843 | $ ${ $0.op = StringConv; }$
2845 Top -> * ${ $0.op = Times; }$
2846 | / ${ $0.op = Divide; }$
2847 | % ${ $0.op = Rem; }$
2848 | ++ ${ $0.op = Concat; }$
2850 ###### print binode cases
2857 if (bracket) printf("(");
2858 print_exec(b->left, indent, bracket);
2860 case Plus: fputs(" + ", stdout); break;
2861 case Minus: fputs(" - ", stdout); break;
2862 case Times: fputs(" * ", stdout); break;
2863 case Divide: fputs(" / ", stdout); break;
2864 case Rem: fputs(" % ", stdout); break;
2865 case Concat: fputs(" ++ ", stdout); break;
2866 default: abort(); // NOTEST
2868 print_exec(b->right, indent, bracket);
2869 if (bracket) printf(")");
2874 if (bracket) printf("(");
2876 case Absolute: fputs("+", stdout); break;
2877 case Negate: fputs("-", stdout); break;
2878 case StringConv: fputs("$", stdout); break;
2879 default: abort(); // NOTEST
2881 print_exec(b->right, indent, bracket);
2882 if (bracket) printf(")");
2886 print_exec(b->right, indent, bracket);
2890 ###### propagate binode cases
2896 /* both must be numbers, result is Tnum */
2899 /* as propagate_types ignores a NULL,
2900 * unary ops fit here too */
2901 propagate_types(b->left, c, ok, Tnum, 0);
2902 propagate_types(b->right, c, ok, Tnum, 0);
2903 if (!type_compat(type, Tnum, 0))
2904 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2909 /* both must be Tstr, result is Tstr */
2910 propagate_types(b->left, c, ok, Tstr, 0);
2911 propagate_types(b->right, c, ok, Tstr, 0);
2912 if (!type_compat(type, Tstr, 0))
2913 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2918 /* op must be string, result is number */
2919 propagate_types(b->left, c, ok, Tstr, 0);
2920 if (!type_compat(type, Tnum, 0))
2922 "error: Can only convert string to number, not %1",
2923 prog, type, 0, NULL);
2927 return propagate_types(b->right, c, ok, type, 0);
2929 ###### interp binode cases
2932 rv = interp_exec(b->left, &rvtype);
2933 right = interp_exec(b->right, &rtype);
2934 mpq_add(rv.num, rv.num, right.num);
2937 rv = interp_exec(b->left, &rvtype);
2938 right = interp_exec(b->right, &rtype);
2939 mpq_sub(rv.num, rv.num, right.num);
2942 rv = interp_exec(b->left, &rvtype);
2943 right = interp_exec(b->right, &rtype);
2944 mpq_mul(rv.num, rv.num, right.num);
2947 rv = interp_exec(b->left, &rvtype);
2948 right = interp_exec(b->right, &rtype);
2949 mpq_div(rv.num, rv.num, right.num);
2954 left = interp_exec(b->left, <ype);
2955 right = interp_exec(b->right, &rtype);
2956 mpz_init(l); mpz_init(r); mpz_init(rem);
2957 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2958 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2959 mpz_tdiv_r(rem, l, r);
2960 val_init(Tnum, &rv);
2961 mpq_set_z(rv.num, rem);
2962 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2967 rv = interp_exec(b->right, &rvtype);
2968 mpq_neg(rv.num, rv.num);
2971 rv = interp_exec(b->right, &rvtype);
2972 mpq_abs(rv.num, rv.num);
2975 rv = interp_exec(b->right, &rvtype);
2978 left = interp_exec(b->left, <ype);
2979 right = interp_exec(b->right, &rtype);
2981 rv.str = text_join(left.str, right.str);
2984 right = interp_exec(b->right, &rvtype);
2987 char *str = strndup(right.str.txt, right.str.len);
2988 parse_value(rvtype, str, &rv);
2992 ###### value functions
2994 static struct text text_join(struct text a, struct text b)
2997 rv.len = a.len + b.len;
2998 rv.txt = malloc(rv.len);
2999 memcpy(rv.txt, a.txt, a.len);
3000 memcpy(rv.txt+a.len, b.txt, b.len);
3004 ### Blocks, Statements, and Statement lists.
3006 Now that we have expressions out of the way we need to turn to
3007 statements. There are simple statements and more complex statements.
3008 Simple statements do not contain (syntactic) newlines, complex statements do.
3010 Statements often come in sequences and we have corresponding simple
3011 statement lists and complex statement lists.
3012 The former comprise only simple statements separated by semicolons.
3013 The later comprise complex statements and simple statement lists. They are
3014 separated by newlines. Thus the semicolon is only used to separate
3015 simple statements on the one line. This may be overly restrictive,
3016 but I'm not sure I ever want a complex statement to share a line with
3019 Note that a simple statement list can still use multiple lines if
3020 subsequent lines are indented, so
3022 ###### Example: wrapped simple statement list
3027 is a single simple statement list. This might allow room for
3028 confusion, so I'm not set on it yet.
3030 A simple statement list needs no extra syntax. A complex statement
3031 list has two syntactic forms. It can be enclosed in braces (much like
3032 C blocks), or it can be introduced by an indent and continue until an
3033 unindented newline (much like Python blocks). With this extra syntax
3034 it is referred to as a block.
3036 Note that a block does not have to include any newlines if it only
3037 contains simple statements. So both of:
3039 if condition: a=b; d=f
3041 if condition { a=b; print f }
3045 In either case the list is constructed from a `binode` list with
3046 `Block` as the operator. When parsing the list it is most convenient
3047 to append to the end, so a list is a list and a statement. When using
3048 the list it is more convenient to consider a list to be a statement
3049 and a list. So we need a function to re-order a list.
3050 `reorder_bilist` serves this purpose.
3052 The only stand-alone statement we introduce at this stage is `pass`
3053 which does nothing and is represented as a `NULL` pointer in a `Block`
3054 list. Other stand-alone statements will follow once the infrastructure
3060 ###### expr precedence
3066 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3067 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3068 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3069 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3070 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3072 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3073 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3074 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3075 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3076 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3078 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3079 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3080 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3082 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3083 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3084 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3085 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3086 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3088 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3090 ComplexStatements -> ComplexStatements ComplexStatement ${
3100 | ComplexStatement ${
3112 ComplexStatement -> SimpleStatements Newlines ${
3113 $0 = reorder_bilist($<SS);
3115 | SimpleStatements ; Newlines ${
3116 $0 = reorder_bilist($<SS);
3118 ## ComplexStatement Grammar
3121 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3127 | SimpleStatement ${
3134 SimpleStatement -> pass ${ $0 = NULL; }$
3135 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3136 ## SimpleStatement Grammar
3138 ###### print binode cases
3142 if (b->left == NULL)
3145 print_exec(b->left, indent, bracket);
3148 print_exec(b->right, indent, bracket);
3151 // block, one per line
3152 if (b->left == NULL)
3153 do_indent(indent, "pass\n");
3155 print_exec(b->left, indent, bracket);
3157 print_exec(b->right, indent, bracket);
3161 ###### propagate binode cases
3164 /* If any statement returns something other than Tnone
3165 * or Tbool then all such must return same type.
3166 * As each statement may be Tnone or something else,
3167 * we must always pass NULL (unknown) down, otherwise an incorrect
3168 * error might occur. We never return Tnone unless it is
3173 for (e = b; e; e = cast(binode, e->right)) {
3174 t = propagate_types(e->left, c, ok, NULL, rules);
3175 if ((rules & Rboolok) && t == Tbool)
3177 if (t && t != Tnone && t != Tbool) {
3181 type_err(c, "error: expected %1%r, found %2",
3182 e->left, type, rules, t);
3188 ###### interp binode cases
3190 while (rvtype == Tnone &&
3193 rv = interp_exec(b->left, &rvtype);
3194 b = cast(binode, b->right);
3198 ### The Print statement
3200 `print` is a simple statement that takes a comma-separated list of
3201 expressions and prints the values separated by spaces and terminated
3202 by a newline. No control of formatting is possible.
3204 `print` faces the same list-ordering issue as blocks, and uses the
3210 ##### expr precedence
3213 ###### SimpleStatement Grammar
3215 | print ExpressionList ${
3216 $0 = reorder_bilist($<2);
3218 | print ExpressionList , ${
3223 $0 = reorder_bilist($0);
3234 ExpressionList -> ExpressionList , Expression ${
3247 ###### print binode cases
3250 do_indent(indent, "print");
3254 print_exec(b->left, -1, bracket);
3258 b = cast(binode, b->right);
3264 ###### propagate binode cases
3267 /* don't care but all must be consistent */
3268 propagate_types(b->left, c, ok, NULL, Rnolabel);
3269 propagate_types(b->right, c, ok, NULL, Rnolabel);
3272 ###### interp binode cases
3278 for ( ; b; b = cast(binode, b->right))
3282 left = interp_exec(b->left, <ype);
3283 print_value(ltype, &left);
3284 free_value(ltype, &left);
3295 ###### Assignment statement
3297 An assignment will assign a value to a variable, providing it hasn't
3298 been declared as a constant. The analysis phase ensures that the type
3299 will be correct so the interpreter just needs to perform the
3300 calculation. There is a form of assignment which declares a new
3301 variable as well as assigning a value. If a name is assigned before
3302 it is declared, and error will be raised as the name is created as
3303 `Tlabel` and it is illegal to assign to such names.
3309 ###### SimpleStatement Grammar
3310 | Variable = Expression ${
3316 | VariableDecl = Expression ${
3324 if ($1->var->where_set == NULL) {
3326 "Variable declared with no type or value: %v",
3336 ###### print binode cases
3339 do_indent(indent, "");
3340 print_exec(b->left, indent, bracket);
3342 print_exec(b->right, indent, bracket);
3349 struct variable *v = cast(var, b->left)->var;
3350 do_indent(indent, "");
3351 print_exec(b->left, indent, bracket);
3352 if (cast(var, b->left)->var->constant) {
3353 if (v->where_decl == v->where_set) {
3355 type_print(v->type, stdout);
3360 if (v->where_decl == v->where_set) {
3362 type_print(v->type, stdout);
3369 print_exec(b->right, indent, bracket);
3376 ###### propagate binode cases
3380 /* Both must match and not be labels,
3381 * Type must support 'dup',
3382 * For Assign, left must not be constant.
3385 t = propagate_types(b->left, c, ok, NULL,
3386 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3391 if (propagate_types(b->right, c, ok, t, 0) != t)
3392 if (b->left->type == Xvar)
3393 type_err(c, "info: variable '%v' was set as %1 here.",
3394 cast(var, b->left)->var->where_set, t, rules, NULL);
3396 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3398 propagate_types(b->left, c, ok, t,
3399 (b->op == Assign ? Rnoconstant : 0));
3401 if (t && t->dup == NULL)
3402 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3407 ###### interp binode cases
3410 lleft = linterp_exec(b->left, <ype);
3411 right = interp_exec(b->right, &rtype);
3413 free_value(ltype, lleft);
3414 dup_value(ltype, &right, lleft);
3421 struct variable *v = cast(var, b->left)->var;
3425 right = interp_exec(b->right, &rtype);
3426 free_value(v->type, v->val);
3428 v->val = val_alloc(v->type, &right);
3431 free_value(v->type, v->val);
3432 v->val = val_alloc(v->type, NULL);
3437 ### The `use` statement
3439 The `use` statement is the last "simple" statement. It is needed when
3440 the condition in a conditional statement is a block. `use` works much
3441 like `return` in C, but only completes the `condition`, not the whole
3447 ###### expr precedence
3450 ###### SimpleStatement Grammar
3452 $0 = new_pos(binode, $1);
3455 if ($0->right->type == Xvar) {
3456 struct var *v = cast(var, $0->right);
3457 if (v->var->type == Tnone) {
3458 /* Convert this to a label */
3459 v->var->type = Tlabel;
3460 v->var->val = val_alloc(Tlabel, NULL);
3461 v->var->val->label = v->var->val;
3466 ###### print binode cases
3469 do_indent(indent, "use ");
3470 print_exec(b->right, -1, bracket);
3475 ###### propagate binode cases
3478 /* result matches value */
3479 return propagate_types(b->right, c, ok, type, 0);
3481 ###### interp binode cases
3484 rv = interp_exec(b->right, &rvtype);
3487 ### The Conditional Statement
3489 This is the biggy and currently the only complex statement. This
3490 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3491 It is comprised of a number of parts, all of which are optional though
3492 set combinations apply. Each part is (usually) a key word (`then` is
3493 sometimes optional) followed by either an expression or a code block,
3494 except the `casepart` which is a "key word and an expression" followed
3495 by a code block. The code-block option is valid for all parts and,
3496 where an expression is also allowed, the code block can use the `use`
3497 statement to report a value. If the code block does not report a value
3498 the effect is similar to reporting `True`.
3500 The `else` and `case` parts, as well as `then` when combined with
3501 `if`, can contain a `use` statement which will apply to some
3502 containing conditional statement. `for` parts, `do` parts and `then`
3503 parts used with `for` can never contain a `use`, except in some
3504 subordinate conditional statement.
3506 If there is a `forpart`, it is executed first, only once.
3507 If there is a `dopart`, then it is executed repeatedly providing
3508 always that the `condpart` or `cond`, if present, does not return a non-True
3509 value. `condpart` can fail to return any value if it simply executes
3510 to completion. This is treated the same as returning `True`.
3512 If there is a `thenpart` it will be executed whenever the `condpart`
3513 or `cond` returns True (or does not return any value), but this will happen
3514 *after* `dopart` (when present).
3516 If `elsepart` is present it will be executed at most once when the
3517 condition returns `False` or some value that isn't `True` and isn't
3518 matched by any `casepart`. If there are any `casepart`s, they will be
3519 executed when the condition returns a matching value.
3521 The particular sorts of values allowed in case parts has not yet been
3522 determined in the language design, so nothing is prohibited.
3524 The various blocks in this complex statement potentially provide scope
3525 for variables as described earlier. Each such block must include the
3526 "OpenScope" nonterminal before parsing the block, and must call
3527 `var_block_close()` when closing the block.
3529 The code following "`if`", "`switch`" and "`for`" does not get its own
3530 scope, but is in a scope covering the whole statement, so names
3531 declared there cannot be redeclared elsewhere. Similarly the
3532 condition following "`while`" is in a scope the covers the body
3533 ("`do`" part) of the loop, and which does not allow conditional scope
3534 extension. Code following "`then`" (both looping and non-looping),
3535 "`else`" and "`case`" each get their own local scope.
3537 The type requirements on the code block in a `whilepart` are quite
3538 unusal. It is allowed to return a value of some identifiable type, in
3539 which case the loop aborts and an appropriate `casepart` is run, or it
3540 can return a Boolean, in which case the loop either continues to the
3541 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3542 This is different both from the `ifpart` code block which is expected to
3543 return a Boolean, or the `switchpart` code block which is expected to
3544 return the same type as the casepart values. The correct analysis of
3545 the type of the `whilepart` code block is the reason for the
3546 `Rboolok` flag which is passed to `propagate_types()`.
3548 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3557 struct exec *action;
3558 struct casepart *next;
3560 struct cond_statement {
3562 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3563 struct casepart *casepart;
3566 ###### ast functions
3568 static void free_casepart(struct casepart *cp)
3572 free_exec(cp->value);
3573 free_exec(cp->action);
3580 static void free_cond_statement(struct cond_statement *s)
3584 free_exec(s->forpart);
3585 free_exec(s->condpart);
3586 free_exec(s->dopart);
3587 free_exec(s->thenpart);
3588 free_exec(s->elsepart);
3589 free_casepart(s->casepart);
3593 ###### free exec cases
3594 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3596 ###### ComplexStatement Grammar
3597 | CondStatement ${ $0 = $<1; }$
3599 ###### expr precedence
3600 $TERM for then while do
3607 // A CondStatement must end with EOL, as does CondSuffix and
3609 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3610 // may or may not end with EOL
3611 // WhilePart and IfPart include an appropriate Suffix
3614 // Both ForPart and Whilepart open scopes, and CondSuffix only
3615 // closes one - so in the first branch here we have another to close.
3616 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3619 $0->thenpart = $<TP;
3620 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3621 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3622 var_block_close(c, CloseSequential);
3624 | ForPart OptNL WhilePart CondSuffix ${
3627 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3628 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3629 var_block_close(c, CloseSequential);
3631 | WhilePart CondSuffix ${
3633 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3634 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3636 | SwitchPart OptNL CasePart CondSuffix ${
3638 $0->condpart = $<SP;
3639 $CP->next = $0->casepart;
3640 $0->casepart = $<CP;
3642 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3644 $0->condpart = $<SP;
3645 $CP->next = $0->casepart;
3646 $0->casepart = $<CP;
3648 | IfPart IfSuffix ${
3650 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3651 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3652 // This is where we close an "if" statement
3653 var_block_close(c, CloseSequential);
3656 CondSuffix -> IfSuffix ${
3658 // This is where we close scope of the whole
3659 // "for" or "while" statement
3660 var_block_close(c, CloseSequential);
3662 | Newlines CasePart CondSuffix ${
3664 $CP->next = $0->casepart;
3665 $0->casepart = $<CP;
3667 | CasePart CondSuffix ${
3669 $CP->next = $0->casepart;
3670 $0->casepart = $<CP;
3673 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3674 | Newlines ElsePart ${ $0 = $<EP; }$
3675 | ElsePart ${$0 = $<EP; }$
3677 ElsePart -> else OpenBlock Newlines ${
3678 $0 = new(cond_statement);
3679 $0->elsepart = $<OB;
3680 var_block_close(c, CloseElse);
3682 | else OpenScope CondStatement ${
3683 $0 = new(cond_statement);
3684 $0->elsepart = $<CS;
3685 var_block_close(c, CloseElse);
3689 CasePart -> case Expression OpenScope ColonBlock ${
3690 $0 = calloc(1,sizeof(struct casepart));
3693 var_block_close(c, CloseParallel);
3697 // These scopes are closed in CondSuffix
3698 ForPart -> for OpenBlock ${
3702 ThenPart -> then OpenBlock ${
3704 var_block_close(c, CloseSequential);
3708 // This scope is closed in CondSuffix
3709 WhilePart -> while UseBlock OptNL do Block ${
3713 | while OpenScope Expression ColonBlock ${
3714 $0.condpart = $<Exp;
3718 IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
3722 | if OpenScope Expression OpenScope ColonBlock ClosePara ${
3726 | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
3732 // This scope is closed in CondSuffix
3733 SwitchPart -> switch OpenScope Expression ${
3736 | switch UseBlock ${
3740 ###### print exec cases
3742 case Xcond_statement:
3744 struct cond_statement *cs = cast(cond_statement, e);
3745 struct casepart *cp;
3747 do_indent(indent, "for");
3748 if (bracket) printf(" {\n"); else printf("\n");
3749 print_exec(cs->forpart, indent+1, bracket);
3752 do_indent(indent, "} then {\n");
3754 do_indent(indent, "then\n");
3755 print_exec(cs->thenpart, indent+1, bracket);
3757 if (bracket) do_indent(indent, "}\n");
3761 if (cs->condpart && cs->condpart->type == Xbinode &&
3762 cast(binode, cs->condpart)->op == Block) {
3764 do_indent(indent, "while {\n");
3766 do_indent(indent, "while\n");
3767 print_exec(cs->condpart, indent+1, bracket);
3769 do_indent(indent, "} do {\n");
3771 do_indent(indent, "do\n");
3772 print_exec(cs->dopart, indent+1, bracket);
3774 do_indent(indent, "}\n");
3776 do_indent(indent, "while ");
3777 print_exec(cs->condpart, 0, bracket);
3782 print_exec(cs->dopart, indent+1, bracket);
3784 do_indent(indent, "}\n");
3789 do_indent(indent, "switch");
3791 do_indent(indent, "if");
3792 if (cs->condpart && cs->condpart->type == Xbinode &&
3793 cast(binode, cs->condpart)->op == Block) {
3798 print_exec(cs->condpart, indent+1, bracket);
3800 do_indent(indent, "}\n");
3802 do_indent(indent, "then:\n");
3803 print_exec(cs->thenpart, indent+1, bracket);
3807 print_exec(cs->condpart, 0, bracket);
3813 print_exec(cs->thenpart, indent+1, bracket);
3815 do_indent(indent, "}\n");
3820 for (cp = cs->casepart; cp; cp = cp->next) {
3821 do_indent(indent, "case ");
3822 print_exec(cp->value, -1, 0);
3827 print_exec(cp->action, indent+1, bracket);
3829 do_indent(indent, "}\n");
3832 do_indent(indent, "else");
3837 print_exec(cs->elsepart, indent+1, bracket);
3839 do_indent(indent, "}\n");
3844 ###### propagate exec cases
3845 case Xcond_statement:
3847 // forpart and dopart must return Tnone
3848 // thenpart must return Tnone if there is a dopart,
3849 // otherwise it is like elsepart.
3851 // be bool if there is no casepart
3852 // match casepart->values if there is a switchpart
3853 // either be bool or match casepart->value if there
3855 // elsepart and casepart->action must match the return type
3856 // expected of this statement.
3857 struct cond_statement *cs = cast(cond_statement, prog);
3858 struct casepart *cp;
3860 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3861 if (!type_compat(Tnone, t, 0))
3863 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3864 if (!type_compat(Tnone, t, 0))
3867 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3868 if (!type_compat(Tnone, t, 0))
3871 if (cs->casepart == NULL)
3872 propagate_types(cs->condpart, c, ok, Tbool, 0);
3874 /* Condpart must match case values, with bool permitted */
3876 for (cp = cs->casepart;
3877 cp && !t; cp = cp->next)
3878 t = propagate_types(cp->value, c, ok, NULL, 0);
3879 if (!t && cs->condpart)
3880 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3881 // Now we have a type (I hope) push it down
3883 for (cp = cs->casepart; cp; cp = cp->next)
3884 propagate_types(cp->value, c, ok, t, 0);
3885 propagate_types(cs->condpart, c, ok, t, Rboolok);
3888 // (if)then, else, and case parts must return expected type.
3889 if (!cs->dopart && !type)
3890 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3892 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3893 for (cp = cs->casepart;
3896 type = propagate_types(cp->action, c, ok, NULL, rules);
3899 propagate_types(cs->thenpart, c, ok, type, rules);
3900 propagate_types(cs->elsepart, c, ok, type, rules);
3901 for (cp = cs->casepart; cp ; cp = cp->next)
3902 propagate_types(cp->action, c, ok, type, rules);
3908 ###### interp exec cases
3909 case Xcond_statement:
3911 struct value v, cnd;
3912 struct type *vtype, *cndtype;
3913 struct casepart *cp;
3914 struct cond_statement *c = cast(cond_statement, e);
3917 interp_exec(c->forpart, NULL);
3920 cnd = interp_exec(c->condpart, &cndtype);
3923 if (!(cndtype == Tnone ||
3924 (cndtype == Tbool && cnd.bool != 0)))
3926 // cnd is Tnone or Tbool, doesn't need to be freed
3928 interp_exec(c->dopart, NULL);
3931 rv = interp_exec(c->thenpart, &rvtype);
3932 if (rvtype != Tnone || !c->dopart)
3934 free_value(rvtype, &rv);
3937 } while (c->dopart);
3939 for (cp = c->casepart; cp; cp = cp->next) {
3940 v = interp_exec(cp->value, &vtype);
3941 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
3942 free_value(vtype, &v);
3943 free_value(cndtype, &cnd);
3944 rv = interp_exec(cp->action, &rvtype);
3947 free_value(vtype, &v);
3949 free_value(cndtype, &cnd);
3951 rv = interp_exec(c->elsepart, &rvtype);
3958 ### Top level structure
3960 All the language elements so far can be used in various places. Now
3961 it is time to clarify what those places are.
3963 At the top level of a file there will be a number of declarations.
3964 Many of the things that can be declared haven't been described yet,
3965 such as functions, procedures, imports, and probably more.
3966 For now there are two sorts of things that can appear at the top
3967 level. They are predefined constants, `struct` types, and the main
3968 program. While the syntax will allow the main program to appear
3969 multiple times, that will trigger an error if it is actually attempted.
3971 The various declarations do not return anything. They store the
3972 various declarations in the parse context.
3974 ###### Parser: grammar
3977 Ocean -> OptNL DeclarationList
3984 DeclarationList -> Declaration
3985 | DeclarationList Declaration
3987 Declaration -> ERROR Newlines ${
3989 "error: unhandled parse error", &$1);
3995 ## top level grammar
3997 ### The `const` section
3999 As well as being defined in with the code that uses them, constants
4000 can be declared at the top level. These have full-file scope, so they
4001 are always `InScope`. The value of a top level constant can be given
4002 as an expression, and this is evaluated immediately rather than in the
4003 later interpretation stage. Once we add functions to the language, we
4004 will need rules concern which, if any, can be used to define a top
4007 Constants are defined in a section that starts with the reserved word
4008 `const` and then has a block with a list of assignment statements.
4009 For syntactic consistency, these must use the double-colon syntax to
4010 make it clear that they are constants. Type can also be given: if
4011 not, the type will be determined during analysis, as with other
4014 As the types constants are inserted at the head of a list, printing
4015 them in the same order that they were read is not straight forward.
4016 We take a quadratic approach here and count the number of constants
4017 (variables of depth 0), then count down from there, each time
4018 searching through for the Nth constant for decreasing N.
4020 ###### top level grammar
4022 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4023 | const { SimpleConstList } Newlines
4024 | const IN OptNL ConstList OUT Newlines
4025 | const SimpleConstList Newlines
4027 ConstList -> ConstList SimpleConstLine
4029 SimpleConstList -> SimpleConstList ; Const
4032 SimpleConstLine -> SimpleConstList Newlines
4033 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4036 CType -> Type ${ $0 = $<1; }$
4039 Const -> IDENTIFIER :: CType = Expression ${ {
4043 v = var_decl(c, $1.txt);
4045 struct var *var = new_pos(var, $1);
4046 v->where_decl = var;
4051 v = var_ref(c, $1.txt);
4052 tok_err(c, "error: name already declared", &$1);
4053 type_err(c, "info: this is where '%v' was first declared",
4054 v->where_decl, NULL, 0, NULL);
4058 propagate_types($5, c, &ok, $3, 0);
4063 struct value res = interp_exec($5, &v->type);
4064 v->val = val_alloc(v->type, &res);
4068 ###### print const decls
4073 while (target != 0) {
4075 for (v = context.in_scope; v; v=v->in_scope)
4076 if (v->depth == 0) {
4087 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4088 type_print(v->type, stdout);
4090 if (v->type == Tstr)
4092 print_value(v->type, v->val);
4093 if (v->type == Tstr)
4101 ### Finally the whole program.
4103 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4104 the keyword "program" and a list of variable names which are assigned
4105 values from command line arguments. Following this is a `block` which
4106 is the code to execute. Unlike Pascal, constants and other
4107 declarations come *before* the program.
4109 As this is the top level, several things are handled a bit
4111 The whole program is not interpreted by `interp_exec` as that isn't
4112 passed the argument list which the program requires. Similarly type
4113 analysis is a bit more interesting at this level.
4118 ###### top level grammar
4120 DeclareProgram -> Program ${ {
4122 type_err(c, "Program defined a second time",
4129 Program -> program OpenScope Varlist ColonBlock Newlines ${
4132 $0->left = reorder_bilist($<Vl);
4134 var_block_close(c, CloseSequential);
4135 if (c->scope_stack && !c->parse_error) abort();
4138 Varlist -> Varlist ArgDecl ${
4147 ArgDecl -> IDENTIFIER ${ {
4148 struct variable *v = var_decl(c, $1.txt);
4155 ###### print binode cases
4157 do_indent(indent, "program");
4158 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4160 print_exec(b2->left, 0, 0);
4166 print_exec(b->right, indent+1, bracket);
4168 do_indent(indent, "}\n");
4171 ###### propagate binode cases
4172 case Program: abort(); // NOTEST
4174 ###### core functions
4176 static int analyse_prog(struct exec *prog, struct parse_context *c)
4178 struct binode *b = cast(binode, prog);
4185 propagate_types(b->right, c, &ok, Tnone, 0);
4190 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4191 struct var *v = cast(var, b->left);
4192 if (!v->var->type) {
4193 v->var->where_set = b;
4194 v->var->type = Tstr;
4198 b = cast(binode, prog);
4201 propagate_types(b->right, c, &ok, Tnone, 0);
4206 /* Make sure everything is still consistent */
4207 propagate_types(b->right, c, &ok, Tnone, 0);
4211 static void interp_prog(struct exec *prog, char **argv)
4213 struct binode *p = cast(binode, prog);
4220 al = cast(binode, p->left);
4222 struct var *v = cast(var, al->left);
4223 struct value *vl = v->var->val;
4225 if (argv[0] == NULL) {
4226 printf("Not enough args\n");
4229 al = cast(binode, al->right);
4231 free_value(v->var->type, vl);
4233 vl = val_alloc(v->var->type, NULL);
4236 free_value(v->var->type, vl);
4237 if (!parse_value(v->var->type, argv[0], vl))
4241 v = interp_exec(p->right, &vtype);
4242 free_value(vtype, &v);
4245 ###### interp binode cases
4246 case Program: abort(); // NOTEST
4248 ## And now to test it out.
4250 Having a language requires having a "hello world" program. I'll
4251 provide a little more than that: a program that prints "Hello world"
4252 finds the GCD of two numbers, prints the first few elements of
4253 Fibonacci, performs a binary search for a number, and a few other
4254 things which will likely grow as the languages grows.
4256 ###### File: oceani.mk
4259 @echo "===== DEMO ====="
4260 ./oceani --section "demo: hello" oceani.mdc 55 33
4266 four ::= 2 + 2 ; five ::= 10/2
4267 const pie ::= "I like Pie";
4268 cake ::= "The cake is"
4277 print "Hello World, what lovely oceans you have!"
4278 print "Are there", five, "?"
4279 print pi, pie, "but", cake
4281 A := $Astr; B := $Bstr
4283 /* When a variable is defined in both branches of an 'if',
4284 * and used afterwards, the variables are merged.
4290 print "Is", A, "bigger than", B,"? ", bigger
4291 /* If a variable is not used after the 'if', no
4292 * merge happens, so types can be different
4295 double:string = "yes"
4296 print A, "is more than twice", B, "?", double
4299 print "double", B, "is", double
4304 if a > 0 and then b > 0:
4310 print "GCD of", A, "and", B,"is", a
4312 print a, "is not positive, cannot calculate GCD"
4314 print b, "is not positive, cannot calculate GCD"
4319 print "Fibonacci:", f1,f2,
4320 then togo = togo - 1
4328 /* Binary search... */
4333 mid := (lo + hi) / 2
4345 print "Yay, I found", target
4347 print "Closest I found was", mid
4352 // "middle square" PRNG. Not particularly good, but one my
4353 // Dad taught me - the first one I ever heard of.
4354 for i:=1; then i = i + 1; while i < size:
4355 n := list[i-1] * list[i-1]
4356 list[i] = (n / 100) % 10 000
4358 print "Before sort:",
4359 for i:=0; then i = i + 1; while i < size:
4363 for i := 1; then i=i+1; while i < size:
4364 for j:=i-1; then j=j-1; while j >= 0:
4365 if list[j] > list[j+1]:
4369 print " After sort:",
4370 for i:=0; then i = i + 1; while i < size:
4374 if 1 == 2 then print "yes"; else print "no"
4378 bob.alive = (bob.name == "Hello")
4379 print "bob", "is" if bob.alive else "isn't", "alive"