1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs. This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
126 ###### Parser: reduce
127 struct parse_context *c = config2context(config);
135 #include <sys/mman.h>
154 static char Usage[] =
155 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
170 struct section *s, *ss;
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_mark),
175 .number_chars = ".,_+- ",
180 int doprint=0, dotrace=0, doexec=1, brackets=0;
182 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
185 case 't': dotrace=1; break;
186 case 'p': doprint=1; break;
187 case 'n': doexec=0; break;
188 case 'b': brackets=1; break;
189 case 's': section = optarg; break;
190 default: fprintf(stderr, Usage);
194 if (optind >= argc) {
195 fprintf(stderr, "oceani: no input file given\n");
198 fd = open(argv[optind], O_RDONLY);
200 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
203 context.file_name = argv[optind];
204 len = lseek(fd, 0, 2);
205 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
206 s = code_extract(file, file+len, NULL);
208 fprintf(stderr, "oceani: could not find any code in %s\n",
213 ## context initialization
216 for (ss = s; ss; ss = ss->next) {
217 struct text sec = ss->section;
218 if (sec.len == strlen(section) &&
219 strncmp(sec.txt, section, sec.len) == 0)
223 fprintf(stderr, "oceani: cannot find section %s\n",
229 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
232 fprintf(stderr, "oceani: no program found.\n");
233 context.parse_error = 1;
235 if (context.prog && doprint) {
238 print_exec(context.prog, 0, brackets);
240 if (context.prog && doexec && !context.parse_error) {
241 if (!analyse_prog(context.prog, &context)) {
242 fprintf(stderr, "oceani: type error in program - not running.\n");
245 interp_prog(context.prog, argv+optind+1);
247 free_exec(context.prog);
250 struct section *t = s->next;
256 ## free context types
257 exit(context.parse_error ? 1 : 0);
262 The four requirements of parse, analyse, print, interpret apply to
263 each language element individually so that is how most of the code
266 Three of the four are fairly self explanatory. The one that requires
267 a little explanation is the analysis step.
269 The current language design does not require the types of variables to
270 be declared, but they must still have a single type. Different
271 operations impose different requirements on the variables, for example
272 addition requires both arguments to be numeric, and assignment
273 requires the variable on the left to have the same type as the
274 expression on the right.
276 Analysis involves propagating these type requirements around and
277 consequently setting the type of each variable. If any requirements
278 are violated (e.g. a string is compared with a number) or if a
279 variable needs to have two different types, then an error is raised
280 and the program will not run.
282 If the same variable is declared in both branchs of an 'if/else', or
283 in all cases of a 'switch' then the multiple instances may be merged
284 into just one variable if the variable is referenced after the
285 conditional statement. When this happens, the types must naturally be
286 consistent across all the branches. When the variable is not used
287 outside the if, the variables in the different branches are distinct
288 and can be of different types.
290 Undeclared names may only appear in "use" statements and "case" expressions.
291 These names are given a type of "label" and a unique value.
292 This allows them to fill the role of a name in an enumerated type, which
293 is useful for testing the `switch` statement.
295 As we will see, the condition part of a `while` statement can return
296 either a Boolean or some other type. This requires that the expected
297 type that gets passed around comprises a type and a flag to indicate
298 that `Tbool` is also permitted.
300 As there are, as yet, no distinct types that are compatible, there
301 isn't much subtlety in the analysis. When we have distinct number
302 types, this will become more interesting.
306 When analysis discovers an inconsistency it needs to report an error;
307 just refusing to run the code ensures that the error doesn't cascade,
308 but by itself it isn't very useful. A clear understanding of the sort
309 of error message that are useful will help guide the process of
312 At a simplistic level, the only sort of error that type analysis can
313 report is that the type of some construct doesn't match a contextual
314 requirement. For example, in `4 + "hello"` the addition provides a
315 contextual requirement for numbers, but `"hello"` is not a number. In
316 this particular example no further information is needed as the types
317 are obvious from local information. When a variable is involved that
318 isn't the case. It may be helpful to explain why the variable has a
319 particular type, by indicating the location where the type was set,
320 whether by declaration or usage.
322 Using a recursive-descent analysis we can easily detect a problem at
323 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
324 will detect that one argument is not a number and the usage of `hello`
325 will detect that a number was wanted, but not provided. In this
326 (early) version of the language, we will generate error reports at
327 multiple locations, so the use of `hello` will report an error and
328 explain were the value was set, and the addition will report an error
329 and say why numbers are needed. To be able to report locations for
330 errors, each language element will need to record a file location
331 (line and column) and each variable will need to record the language
332 element where its type was set. For now we will assume that each line
333 of an error message indicates one location in the file, and up to 2
334 types. So we provide a `printf`-like function which takes a format, a
335 location (a `struct exec` which has not yet been introduced), and 2
336 types. "`%1`" reports the first type, "`%2`" reports the second. We
337 will need a function to print the location, once we know how that is
338 stored. e As will be explained later, there are sometimes extra rules for
339 type matching and they might affect error messages, we need to pass those
342 As well as type errors, we sometimes need to report problems with
343 tokens, which might be unexpected or might name a type that has not
344 been defined. For these we have `tok_err()` which reports an error
345 with a given token. Each of the error functions sets the flag in the
346 context so indicate that parsing failed.
350 static void fput_loc(struct exec *loc, FILE *f);
352 ###### core functions
354 static void type_err(struct parse_context *c,
355 char *fmt, struct exec *loc,
356 struct type *t1, int rules, struct type *t2)
358 fprintf(stderr, "%s:", c->file_name);
359 fput_loc(loc, stderr);
360 for (; *fmt ; fmt++) {
367 case '%': fputc(*fmt, stderr); break; // NOTEST
368 default: fputc('?', stderr); break; // NOTEST
370 type_print(t1, stderr);
373 type_print(t2, stderr);
382 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
384 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
385 t->txt.len, t->txt.txt);
389 ## Entities: declared and predeclared.
391 There are various "things" that the language and/or the interpreter
392 needs to know about to parse and execute a program. These include
393 types, variables, values, and executable code. These are all lumped
394 together under the term "entities" (calling them "objects" would be
395 confusing) and introduced here. The following section will present the
396 different specific code elements which comprise or manipulate these
401 Values come in a wide range of types, with more likely to be added.
402 Each type needs to be able to print its own values (for convenience at
403 least) as well as to compare two values, at least for equality and
404 possibly for order. For now, values might need to be duplicated and
405 freed, though eventually such manipulations will be better integrated
408 Rather than requiring every numeric type to support all numeric
409 operations (add, multiple, etc), we allow types to be able to present
410 as one of a few standard types: integer, float, and fraction. The
411 existence of these conversion functions eventually enable types to
412 determine if they are compatible with other types, though such types
413 have not yet been implemented.
415 Named type are stored in a simple linked list. Objects of each type are
416 "values" which are often passed around by value.
423 ## value union fields
431 void (*init)(struct type *type, struct value *val);
432 void (*print)(struct type *type, struct value *val);
433 void (*print_type)(struct type *type, FILE *f);
434 int (*cmp_order)(struct type *t1, struct type *t2,
435 struct value *v1, struct value *v2);
436 int (*cmp_eq)(struct type *t1, struct type *t2,
437 struct value *v1, struct value *v2);
438 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
439 void (*free)(struct type *type, struct value *val);
440 void (*free_type)(struct type *t);
441 long long (*to_int)(struct value *v);
442 double (*to_float)(struct value *v);
443 int (*to_mpq)(mpq_t *q, struct value *v);
452 struct type *typelist;
456 static struct type *find_type(struct parse_context *c, struct text s)
458 struct type *l = c->typelist;
461 text_cmp(l->name, s) != 0)
466 static struct type *add_type(struct parse_context *c, struct text s,
471 n = calloc(1, sizeof(*n));
474 n->next = c->typelist;
479 static void free_type(struct type *t)
481 /* The type is always a reference to something in the
482 * context, so we don't need to free anything.
486 static void free_value(struct type *type, struct value *v)
492 static void type_print(struct type *type, FILE *f)
495 fputs("*unknown*type*", f);
496 else if (type->name.len)
497 fprintf(f, "%.*s", type->name.len, type->name.txt);
498 else if (type->print_type)
499 type->print_type(type, f);
501 fputs("*invalid*type*", f); // NOTEST
504 static void val_init(struct type *type, struct value *val)
506 if (type && type->init)
507 type->init(type, val);
510 static void dup_value(struct type *type,
511 struct value *vold, struct value *vnew)
513 if (type && type->dup)
514 type->dup(type, vold, vnew);
517 static int value_cmp(struct type *tl, struct type *tr,
518 struct value *left, struct value *right)
520 if (tl && tl->cmp_order)
521 return tl->cmp_order(tl, tr, left, right);
522 if (tl && tl->cmp_eq)
523 return tl->cmp_eq(tl, tr, left, right);
527 static void print_value(struct type *type, struct value *v)
529 if (type && type->print)
530 type->print(type, v);
532 printf("*Unknown*"); // NOTEST
535 static struct value *val_alloc(struct type *t, struct value *init)
541 ret = calloc(1, t->size);
543 memcpy(ret, init, t->size);
551 static void free_value(struct type *type, struct value *v);
552 static int type_compat(struct type *require, struct type *have, int rules);
553 static void type_print(struct type *type, FILE *f);
554 static void val_init(struct type *type, struct value *v);
555 static void dup_value(struct type *type,
556 struct value *vold, struct value *vnew);
557 static int value_cmp(struct type *tl, struct type *tr,
558 struct value *left, struct value *right);
559 static void print_value(struct type *type, struct value *v);
561 ###### free context types
563 while (context.typelist) {
564 struct type *t = context.typelist;
566 context.typelist = t->next;
574 Values of the base types can be numbers, which we represent as
575 multi-precision fractions, strings, Booleans and labels. When
576 analysing the program we also need to allow for places where no value
577 is meaningful (type `Tnone`) and where we don't know what type to
578 expect yet (type is `NULL`).
580 Values are never shared, they are always copied when used, and freed
581 when no longer needed.
583 When propagating type information around the program, we need to
584 determine if two types are compatible, where type `NULL` is compatible
585 with anything. There are two special cases with type compatibility,
586 both related to the Conditional Statement which will be described
587 later. In some cases a Boolean can be accepted as well as some other
588 primary type, and in others any type is acceptable except a label (`Vlabel`).
589 A separate function encoding these cases will simplify some code later.
593 int (*compat)(struct type *this, struct type *other);
597 static int type_compat(struct type *require, struct type *have, int rules)
599 if ((rules & Rboolok) && have == Tbool)
601 if ((rules & Rnolabel) && have == Tlabel)
603 if (!require || !have)
607 return require->compat(require, have);
609 return require == have;
614 #include "parse_string.h"
615 #include "parse_number.h"
618 myLDLIBS := libnumber.o libstring.o -lgmp
619 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
621 ###### type union fields
622 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
624 ###### value union fields
631 static void _free_value(struct type *type, struct value *v)
635 switch (type->vtype) {
637 case Vstr: free(v->str.txt); break;
638 case Vnum: mpq_clear(v->num); break;
644 ###### value functions
646 static void _val_init(struct type *type, struct value *val)
648 switch(type->vtype) {
649 case Vnone: // NOTEST
652 mpq_init(val->num); break;
654 val->str.txt = malloc(1);
660 case Vlabel: // NOTEST
661 val->label = NULL; // NOTEST
666 static void _dup_value(struct type *type,
667 struct value *vold, struct value *vnew)
669 switch (type->vtype) {
670 case Vnone: // NOTEST
673 vnew->label = vold->label;
676 vnew->bool = vold->bool;
680 mpq_set(vnew->num, vold->num);
683 vnew->str.len = vold->str.len;
684 vnew->str.txt = malloc(vnew->str.len);
685 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
690 static int _value_cmp(struct type *tl, struct type *tr,
691 struct value *left, struct value *right)
695 return tl - tr; // NOTEST
697 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
698 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
699 case Vstr: cmp = text_cmp(left->str, right->str); break;
700 case Vbool: cmp = left->bool - right->bool; break;
701 case Vnone: cmp = 0; // NOTEST
706 static void _print_value(struct type *type, struct value *v)
708 switch (type->vtype) {
709 case Vnone: // NOTEST
710 printf("*no-value*"); break; // NOTEST
711 case Vlabel: // NOTEST
712 printf("*label-%p*", v->label); break; // NOTEST
714 printf("%.*s", v->str.len, v->str.txt); break;
716 printf("%s", v->bool ? "True":"False"); break;
721 mpf_set_q(fl, v->num);
722 gmp_printf("%Fg", fl);
729 static void _free_value(struct type *type, struct value *v);
731 static struct type base_prototype = {
733 .print = _print_value,
734 .cmp_order = _value_cmp,
735 .cmp_eq = _value_cmp,
740 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
743 static struct type *add_base_type(struct parse_context *c, char *n,
744 enum vtype vt, int size)
746 struct text txt = { n, strlen(n) };
749 t = add_type(c, txt, &base_prototype);
752 t->align = size > sizeof(void*) ? sizeof(void*) : size;
753 if (t->size & (t->align - 1))
754 t->size = (t->size | (t->align - 1)) + 1;
758 ###### context initialization
760 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
761 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
762 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
763 Tnone = add_base_type(&context, "none", Vnone, 0);
764 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
768 Variables are scoped named values. We store the names in a linked list
769 of "bindings" sorted in lexical order, and use sequential search and
776 struct binding *next; // in lexical order
780 This linked list is stored in the parse context so that "reduce"
781 functions can find or add variables, and so the analysis phase can
782 ensure that every variable gets a type.
786 struct binding *varlist; // In lexical order
790 static struct binding *find_binding(struct parse_context *c, struct text s)
792 struct binding **l = &c->varlist;
797 (cmp = text_cmp((*l)->name, s)) < 0)
801 n = calloc(1, sizeof(*n));
808 Each name can be linked to multiple variables defined in different
809 scopes. Each scope starts where the name is declared and continues
810 until the end of the containing code block. Scopes of a given name
811 cannot nest, so a declaration while a name is in-scope is an error.
813 ###### binding fields
814 struct variable *var;
818 struct variable *previous;
821 struct binding *name;
822 struct exec *where_decl;// where name was declared
823 struct exec *where_set; // where type was set
827 While the naming seems strange, we include local constants in the
828 definition of variables. A name declared `var := value` can
829 subsequently be changed, but a name declared `var ::= value` cannot -
832 ###### variable fields
835 Scopes in parallel branches can be partially merged. More
836 specifically, if a given name is declared in both branches of an
837 if/else then its scope is a candidate for merging. Similarly if
838 every branch of an exhaustive switch (e.g. has an "else" clause)
839 declares a given name, then the scopes from the branches are
840 candidates for merging.
842 Note that names declared inside a loop (which is only parallel to
843 itself) are never visible after the loop. Similarly names defined in
844 scopes which are not parallel, such as those started by `for` and
845 `switch`, are never visible after the scope. Only variables defined in
846 both `then` and `else` (including the implicit then after an `if`, and
847 excluding `then` used with `for`) and in all `case`s and `else` of a
848 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
850 Labels, which are a bit like variables, follow different rules.
851 Labels are not explicitly declared, but if an undeclared name appears
852 in a context where a label is legal, that effectively declares the
853 name as a label. The declaration remains in force (or in scope) at
854 least to the end of the immediately containing block and conditionally
855 in any larger containing block which does not declare the name in some
856 other way. Importantly, the conditional scope extension happens even
857 if the label is only used in one parallel branch of a conditional --
858 when used in one branch it is treated as having been declared in all
861 Merge candidates are tentatively visible beyond the end of the
862 branching statement which creates them. If the name is used, the
863 merge is affirmed and they become a single variable visible at the
864 outer layer. If not - if it is redeclared first - the merge lapses.
866 To track scopes we have an extra stack, implemented as a linked list,
867 which roughly parallels the parse stack and which is used exclusively
868 for scoping. When a new scope is opened, a new frame is pushed and
869 the child-count of the parent frame is incremented. This child-count
870 is used to distinguish between the first of a set of parallel scopes,
871 in which declared variables must not be in scope, and subsequent
872 branches, whether they may already be conditionally scoped.
874 To push a new frame *before* any code in the frame is parsed, we need a
875 grammar reduction. This is most easily achieved with a grammar
876 element which derives the empty string, and creates the new scope when
877 it is recognised. This can be placed, for example, between a keyword
878 like "if" and the code following it.
882 struct scope *parent;
888 struct scope *scope_stack;
891 static void scope_pop(struct parse_context *c)
893 struct scope *s = c->scope_stack;
895 c->scope_stack = s->parent;
900 static void scope_push(struct parse_context *c)
902 struct scope *s = calloc(1, sizeof(*s));
904 c->scope_stack->child_count += 1;
905 s->parent = c->scope_stack;
913 OpenScope -> ${ scope_push(c); }$
914 ClosePara -> ${ var_block_close(c, CloseParallel); }$
916 Each variable records a scope depth and is in one of four states:
918 - "in scope". This is the case between the declaration of the
919 variable and the end of the containing block, and also between
920 the usage with affirms a merge and the end of that block.
922 The scope depth is not greater than the current parse context scope
923 nest depth. When the block of that depth closes, the state will
924 change. To achieve this, all "in scope" variables are linked
925 together as a stack in nesting order.
927 - "pending". The "in scope" block has closed, but other parallel
928 scopes are still being processed. So far, every parallel block at
929 the same level that has closed has declared the name.
931 The scope depth is the depth of the last parallel block that
932 enclosed the declaration, and that has closed.
934 - "conditionally in scope". The "in scope" block and all parallel
935 scopes have closed, and no further mention of the name has been
936 seen. This state includes a secondary nest depth which records the
937 outermost scope seen since the variable became conditionally in
938 scope. If a use of the name is found, the variable becomes "in
939 scope" and that secondary depth becomes the recorded scope depth.
940 If the name is declared as a new variable, the old variable becomes
941 "out of scope" and the recorded scope depth stays unchanged.
943 - "out of scope". The variable is neither in scope nor conditionally
944 in scope. It is permanently out of scope now and can be removed from
945 the "in scope" stack.
947 ###### variable fields
948 int depth, min_depth;
949 enum { OutScope, PendingScope, CondScope, InScope } scope;
950 struct variable *in_scope;
954 struct variable *in_scope;
956 All variables with the same name are linked together using the
957 'previous' link. Those variable that have been affirmatively merged all
958 have a 'merged' pointer that points to one primary variable - the most
959 recently declared instance. When merging variables, we need to also
960 adjust the 'merged' pointer on any other variables that had previously
961 been merged with the one that will no longer be primary.
963 A variable that is no longer the most recent instance of a name may
964 still have "pending" scope, if it might still be merged with most
965 recent instance. These variables don't really belong in the
966 "in_scope" list, but are not immediately removed when a new instance
967 is found. Instead, they are detected and ignored when considering the
968 list of in_scope names.
970 ###### variable fields
971 struct variable *merged;
975 static void variable_merge(struct variable *primary, struct variable *secondary)
981 primary = primary->merged;
983 for (v = primary->previous; v; v=v->previous)
984 if (v == secondary || v == secondary->merged ||
985 v->merged == secondary ||
986 (v->merged && v->merged == secondary->merged)) {
992 ###### free context vars
994 while (context.varlist) {
995 struct binding *b = context.varlist;
996 struct variable *v = b->var;
997 context.varlist = b->next;
1000 struct variable *t = v;
1003 free_value(t->type, t->val);
1006 // This is a global constant
1007 free_exec(t->where_decl);
1012 #### Manipulating Bindings
1014 When a name is conditionally visible, a new declaration discards the
1015 old binding - the condition lapses. Conversely a usage of the name
1016 affirms the visibility and extends it to the end of the containing
1017 block - i.e. the block that contains both the original declaration and
1018 the latest usage. This is determined from `min_depth`. When a
1019 conditionally visible variable gets affirmed like this, it is also
1020 merged with other conditionally visible variables with the same name.
1022 When we parse a variable declaration we either report an error if the
1023 name is currently bound, or create a new variable at the current nest
1024 depth if the name is unbound or bound to a conditionally scoped or
1025 pending-scope variable. If the previous variable was conditionally
1026 scoped, it and its homonyms becomes out-of-scope.
1028 When we parse a variable reference (including non-declarative assignment
1029 "foo = bar") we report an error if the name is not bound or is bound to
1030 a pending-scope variable; update the scope if the name is bound to a
1031 conditionally scoped variable; or just proceed normally if the named
1032 variable is in scope.
1034 When we exit a scope, any variables bound at this level are either
1035 marked out of scope or pending-scoped, depending on whether the scope
1036 was sequential or parallel. Here a "parallel" scope means the "then"
1037 or "else" part of a conditional, or any "case" or "else" branch of a
1038 switch. Other scopes are "sequential".
1040 When exiting a parallel scope we check if there are any variables that
1041 were previously pending and are still visible. If there are, then
1042 there weren't redeclared in the most recent scope, so they cannot be
1043 merged and must become out-of-scope. If it is not the first of
1044 parallel scopes (based on `child_count`), we check that there was a
1045 previous binding that is still pending-scope. If there isn't, the new
1046 variable must now be out-of-scope.
1048 When exiting a sequential scope that immediately enclosed parallel
1049 scopes, we need to resolve any pending-scope variables. If there was
1050 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1051 we need to mark all pending-scope variable as out-of-scope. Otherwise
1052 all pending-scope variables become conditionally scoped.
1055 enum closetype { CloseSequential, CloseParallel, CloseElse };
1057 ###### ast functions
1059 static struct variable *var_decl(struct parse_context *c, struct text s)
1061 struct binding *b = find_binding(c, s);
1062 struct variable *v = b->var;
1064 switch (v ? v->scope : OutScope) {
1066 /* Caller will report the error */
1070 v && v->scope == CondScope;
1072 v->scope = OutScope;
1076 v = calloc(1, sizeof(*v));
1077 v->previous = b->var;
1080 v->min_depth = v->depth = c->scope_depth;
1082 v->in_scope = c->in_scope;
1088 static struct variable *var_ref(struct parse_context *c, struct text s)
1090 struct binding *b = find_binding(c, s);
1091 struct variable *v = b->var;
1092 struct variable *v2;
1094 switch (v ? v->scope : OutScope) {
1097 /* Caller will report the error */
1100 /* All CondScope variables of this name need to be merged
1101 * and become InScope
1103 v->depth = v->min_depth;
1105 for (v2 = v->previous;
1106 v2 && v2->scope == CondScope;
1108 variable_merge(v, v2);
1116 static void var_block_close(struct parse_context *c, enum closetype ct)
1118 /* Close off all variables that are in_scope */
1119 struct variable *v, **vp, *v2;
1122 for (vp = &c->in_scope;
1123 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1125 if (v->name->var == v) switch (ct) {
1127 case CloseParallel: /* handle PendingScope */
1131 if (c->scope_stack->child_count == 1)
1132 v->scope = PendingScope;
1133 else if (v->previous &&
1134 v->previous->scope == PendingScope)
1135 v->scope = PendingScope;
1136 else if (v->type == Tlabel)
1137 v->scope = PendingScope;
1138 else if (v->name->var == v)
1139 v->scope = OutScope;
1140 if (ct == CloseElse) {
1141 /* All Pending variables with this name
1142 * are now Conditional */
1144 v2 && v2->scope == PendingScope;
1146 v2->scope = CondScope;
1151 v2 && v2->scope == PendingScope;
1153 if (v2->type != Tlabel)
1154 v2->scope = OutScope;
1156 case OutScope: break;
1159 case CloseSequential:
1160 if (v->type == Tlabel)
1161 v->scope = PendingScope;
1164 v->scope = OutScope;
1167 /* There was no 'else', so we can only become
1168 * conditional if we know the cases were exhaustive,
1169 * and that doesn't mean anything yet.
1170 * So only labels become conditional..
1173 v2 && v2->scope == PendingScope;
1175 if (v2->type == Tlabel) {
1176 v2->scope = CondScope;
1177 v2->min_depth = c->scope_depth;
1179 v2->scope = OutScope;
1182 case OutScope: break;
1186 if (v->scope == OutScope || v->name->var != v)
1195 Executables can be lots of different things. In many cases an
1196 executable is just an operation combined with one or two other
1197 executables. This allows for expressions and lists etc. Other times an
1198 executable is something quite specific like a constant or variable name.
1199 So we define a `struct exec` to be a general executable with a type, and
1200 a `struct binode` which is a subclass of `exec`, forms a node in a
1201 binary tree, and holds an operation. There will be other subclasses,
1202 and to access these we need to be able to `cast` the `exec` into the
1203 various other types. The first field in any `struct exec` is the type
1204 from the `exec_types` enum.
1207 #define cast(structname, pointer) ({ \
1208 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1209 if (__mptr && *__mptr != X##structname) abort(); \
1210 (struct structname *)( (char *)__mptr);})
1212 #define new(structname) ({ \
1213 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1214 __ptr->type = X##structname; \
1215 __ptr->line = -1; __ptr->column = -1; \
1218 #define new_pos(structname, token) ({ \
1219 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1220 __ptr->type = X##structname; \
1221 __ptr->line = token.line; __ptr->column = token.col; \
1230 enum exec_types type;
1238 struct exec *left, *right;
1241 ###### ast functions
1243 static int __fput_loc(struct exec *loc, FILE *f)
1247 if (loc->line >= 0) {
1248 fprintf(f, "%d:%d: ", loc->line, loc->column);
1251 if (loc->type == Xbinode)
1252 return __fput_loc(cast(binode,loc)->left, f) ||
1253 __fput_loc(cast(binode,loc)->right, f);
1256 static void fput_loc(struct exec *loc, FILE *f)
1258 if (!__fput_loc(loc, f))
1259 fprintf(f, "??:??: "); // NOTEST
1262 Each different type of `exec` node needs a number of functions defined,
1263 a bit like methods. We must be able to free it, print it, analyse it
1264 and execute it. Once we have specific `exec` types we will need to
1265 parse them too. Let's take this a bit more slowly.
1269 The parser generator requires a `free_foo` function for each struct
1270 that stores attributes and they will often be `exec`s and subtypes
1271 there-of. So we need `free_exec` which can handle all the subtypes,
1272 and we need `free_binode`.
1274 ###### ast functions
1276 static void free_binode(struct binode *b)
1281 free_exec(b->right);
1285 ###### core functions
1286 static void free_exec(struct exec *e)
1295 ###### forward decls
1297 static void free_exec(struct exec *e);
1299 ###### free exec cases
1300 case Xbinode: free_binode(cast(binode, e)); break;
1304 Printing an `exec` requires that we know the current indent level for
1305 printing line-oriented components. As will become clear later, we
1306 also want to know what sort of bracketing to use.
1308 ###### ast functions
1310 static void do_indent(int i, char *str)
1317 ###### core functions
1318 static void print_binode(struct binode *b, int indent, int bracket)
1322 ## print binode cases
1326 static void print_exec(struct exec *e, int indent, int bracket)
1332 print_binode(cast(binode, e), indent, bracket); break;
1337 ###### forward decls
1339 static void print_exec(struct exec *e, int indent, int bracket);
1343 As discussed, analysis involves propagating type requirements around the
1344 program and looking for errors.
1346 So `propagate_types` is passed an expected type (being a `struct type`
1347 pointer together with some `val_rules` flags) that the `exec` is
1348 expected to return, and returns the type that it does return, either
1349 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1350 by reference. It is set to `0` when an error is found, and `2` when
1351 any change is made. If it remains unchanged at `1`, then no more
1352 propagation is needed.
1356 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1360 if (rules & Rnolabel)
1361 fputs(" (labels not permitted)", stderr);
1364 ###### core functions
1366 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1367 struct type *type, int rules);
1368 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1369 struct type *type, int rules)
1376 switch (prog->type) {
1379 struct binode *b = cast(binode, prog);
1381 ## propagate binode cases
1385 ## propagate exec cases
1390 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1391 struct type *type, int rules)
1393 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1402 Interpreting an `exec` doesn't require anything but the `exec`. State
1403 is stored in variables and each variable will be directly linked from
1404 within the `exec` tree. The exception to this is the whole `program`
1405 which needs to look at command line arguments. The `program` will be
1406 interpreted separately.
1408 Each `exec` can return a value combined with a type in `struct lrval`.
1409 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1410 the location of a value, which can be updated, in `lval`. Others will
1411 set `lval` to NULL indicating that there is a value of appropriate type
1415 ###### core functions
1419 struct value rval, *lval;
1422 static struct lrval _interp_exec(struct exec *e);
1424 static struct value interp_exec(struct exec *e, struct type **typeret)
1426 struct lrval ret = _interp_exec(e);
1428 if (!ret.type) abort();
1430 *typeret = ret.type;
1432 dup_value(ret.type, ret.lval, &ret.rval);
1436 static struct value *linterp_exec(struct exec *e, struct type **typeret)
1438 struct lrval ret = _interp_exec(e);
1441 *typeret = ret.type;
1445 static struct lrval _interp_exec(struct exec *e)
1448 struct value rv = {}, *lrv = NULL;
1449 struct type *rvtype;
1451 rvtype = ret.type = Tnone;
1461 struct binode *b = cast(binode, e);
1462 struct value left, right, *lleft;
1463 struct type *ltype, *rtype;
1464 ltype = rtype = Tnone;
1466 ## interp binode cases
1468 free_value(ltype, &left);
1469 free_value(rtype, &right);
1472 ## interp exec cases
1482 Now that we have the shape of the interpreter in place we can add some
1483 complex types and connected them in to the data structures and the
1484 different phases of parse, analyse, print, interpret.
1486 Thus far we have arrays and structs.
1488 Some complex types need do not exist in a name table, so they are kept
1489 on a linked list in the context (`anon_typelist`). This allows them to
1490 be freed when parsing is complete.
1494 Arrays can be declared by giving a size and a type, as `[size]type' so
1495 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1496 size can be either a literal number, or a named constant. Some day an
1497 arbitrary expression will be supported.
1499 Arrays cannot be assigned. When pointers are introduced we will also
1500 introduce array slices which can refer to part or all of an array -
1501 the assignment syntax will create a slice. For now, an array can only
1502 ever be referenced by the name it is declared with. It is likely that
1503 a "`copy`" primitive will eventually be define which can be used to
1504 make a copy of an array with controllable recursive depth.
1506 ###### type union fields
1510 struct variable *vsize;
1511 struct type *member;
1514 ###### value union fields
1517 ###### value functions
1519 static void array_init(struct type *type, struct value *val)
1523 if (type->array.vsize) {
1526 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val->num),
1527 mpq_denref(type->array.vsize->val->num));
1528 type->array.size = mpz_get_si(q);
1531 type->size = type->array.size * type->array.member->size;
1532 type->align = type->array.member->align;
1536 for (i = 0; i < type->array.size; i++) {
1538 v = (void*)val->ptr + i * type->array.member->size;
1539 val_init(type->array.member, v);
1543 static void array_free(struct type *type, struct value *val)
1547 for (i = 0; i < type->array.size; i++) {
1549 v = (void*)val->ptr + i * type->array.member->size;
1550 free_value(type->array.member, v);
1554 static int array_compat(struct type *require, struct type *have)
1556 if (have->compat != require->compat)
1558 /* Both are arrays, so we can look at details */
1559 if (!type_compat(require->array.member, have->array.member, 0))
1561 if (require->array.vsize == NULL && have->array.vsize == NULL)
1562 return require->array.size == have->array.size;
1564 return require->array.vsize == have->array.vsize;
1567 static void array_print_type(struct type *type, FILE *f)
1570 if (type->array.vsize) {
1571 struct binding *b = type->array.vsize->name;
1572 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1574 fprintf(f, "%d]", type->array.size);
1575 type_print(type->array.member, f);
1578 static struct type array_prototype = {
1580 .print_type = array_print_type,
1581 .compat = array_compat,
1585 ###### declare terminals
1590 | [ NUMBER ] Type ${
1591 $0 = calloc(1, sizeof(struct type));
1592 *($0) = array_prototype;
1593 $0->array.member = $<4;
1594 $0->array.vsize = NULL;
1598 if (number_parse(num, tail, $2.txt) == 0)
1599 tok_err(c, "error: unrecognised number", &$2);
1601 tok_err(c, "error: unsupported number suffix", &$2);
1603 $0->array.size = mpz_get_ui(mpq_numref(num));
1604 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1605 tok_err(c, "error: array size must be an integer",
1607 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1608 tok_err(c, "error: array size is too large",
1612 $0->next = c->anon_typelist;
1613 c->anon_typelist = $0;
1617 | [ IDENTIFIER ] Type ${ {
1618 struct variable *v = var_ref(c, $2.txt);
1621 tok_err(c, "error: name undeclared", &$2);
1622 else if (!v->constant)
1623 tok_err(c, "error: array size must be a constant", &$2);
1625 $0 = calloc(1, sizeof(struct type));
1626 *($0) = array_prototype;
1627 $0->array.member = $<4;
1629 $0->array.vsize = v;
1630 $0->next = c->anon_typelist;
1631 c->anon_typelist = $0;
1634 ###### parse context
1636 struct type *anon_typelist;
1638 ###### free context types
1640 while (context.anon_typelist) {
1641 struct type *t = context.anon_typelist;
1643 context.anon_typelist = t->next;
1650 ###### variable grammar
1652 | Variable [ Expression ] ${ {
1653 struct binode *b = new(binode);
1660 ###### print binode cases
1662 print_exec(b->left, -1, bracket);
1664 print_exec(b->right, -1, bracket);
1668 ###### propagate binode cases
1670 /* left must be an array, right must be a number,
1671 * result is the member type of the array
1673 propagate_types(b->right, c, ok, Tnum, 0);
1674 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1675 if (!t || t->compat != array_compat) {
1676 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1679 if (!type_compat(type, t->array.member, rules)) {
1680 type_err(c, "error: have %1 but need %2", prog,
1681 t->array.member, rules, type);
1683 return t->array.member;
1687 ###### interp binode cases
1692 lleft = linterp_exec(b->left, <ype);
1693 right = interp_exec(b->right, &rtype);
1695 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1699 rvtype = ltype->array.member;
1700 if (i >= 0 && i < ltype->array.size)
1701 lrv = (void*)lleft + i * rvtype->size;
1703 val_init(ltype->array.member, &rv);
1710 A `struct` is a data-type that contains one or more other data-types.
1711 It differs from an array in that each member can be of a different
1712 type, and they are accessed by name rather than by number. Thus you
1713 cannot choose an element by calculation, you need to know what you
1716 The language makes no promises about how a given structure will be
1717 stored in memory - it is free to rearrange fields to suit whatever
1718 criteria seems important.
1720 Structs are declared separately from program code - they cannot be
1721 declared in-line in a variable declaration like arrays can. A struct
1722 is given a name and this name is used to identify the type - the name
1723 is not prefixed by the word `struct` as it would be in C.
1725 Structs are only treated as the same if they have the same name.
1726 Simply having the same fields in the same order is not enough. This
1727 might change once we can create structure initializers from a list of
1730 Each component datum is identified much like a variable is declared,
1731 with a name, one or two colons, and a type. The type cannot be omitted
1732 as there is no opportunity to deduce the type from usage. An initial
1733 value can be given following an equals sign, so
1735 ##### Example: a struct type
1741 would declare a type called "complex" which has two number fields,
1742 each initialised to zero.
1744 Struct will need to be declared separately from the code that uses
1745 them, so we will need to be able to print out the declaration of a
1746 struct when reprinting the whole program. So a `print_type_decl` type
1747 function will be needed.
1749 ###### type union fields
1761 ###### type functions
1762 void (*print_type_decl)(struct type *type, FILE *f);
1764 ###### value functions
1766 static void structure_init(struct type *type, struct value *val)
1770 for (i = 0; i < type->structure.nfields; i++) {
1772 v = (void*) val->ptr + type->structure.fields[i].offset;
1773 val_init(type->structure.fields[i].type, v);
1777 static void structure_free(struct type *type, struct value *val)
1781 for (i = 0; i < type->structure.nfields; i++) {
1783 v = (void*)val->ptr + type->structure.fields[i].offset;
1784 free_value(type->structure.fields[i].type, v);
1788 static void structure_free_type(struct type *t)
1791 for (i = 0; i < t->structure.nfields; i++)
1792 if (t->structure.fields[i].init) {
1793 free_value(t->structure.fields[i].type,
1794 t->structure.fields[i].init);
1795 free(t->structure.fields[i].init);
1797 free(t->structure.fields);
1800 static struct type structure_prototype = {
1801 .init = structure_init,
1802 .free = structure_free,
1803 .free_type = structure_free_type,
1804 .print_type_decl = structure_print_type,
1818 ###### free exec cases
1820 free_exec(cast(fieldref, e)->left);
1824 ###### declare terminals
1827 ###### variable grammar
1829 | Variable . IDENTIFIER ${ {
1830 struct fieldref *fr = new_pos(fieldref, $2);
1837 ###### print exec cases
1841 struct fieldref *f = cast(fieldref, e);
1842 print_exec(f->left, -1, bracket);
1843 printf(".%.*s", f->name.len, f->name.txt);
1847 ###### ast functions
1848 static int find_struct_index(struct type *type, struct text field)
1851 for (i = 0; i < type->structure.nfields; i++)
1852 if (text_cmp(type->structure.fields[i].name, field) == 0)
1857 ###### propagate exec cases
1861 struct fieldref *f = cast(fieldref, prog);
1862 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1865 type_err(c, "error: unknown type for field access", f->left,
1867 else if (st->init != structure_init)
1868 type_err(c, "error: field reference attempted on %1, not a struct",
1869 f->left, st, 0, NULL);
1870 else if (f->index == -2) {
1871 f->index = find_struct_index(st, f->name);
1873 type_err(c, "error: cannot find requested field in %1",
1874 f->left, st, 0, NULL);
1876 if (f->index >= 0) {
1877 struct type *ft = st->structure.fields[f->index].type;
1878 if (!type_compat(type, ft, rules))
1879 type_err(c, "error: have %1 but need %2", prog,
1886 ###### interp exec cases
1889 struct fieldref *f = cast(fieldref, e);
1891 struct value *lleft = linterp_exec(f->left, <ype);
1892 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
1893 rvtype = ltype->structure.fields[f->index].type;
1899 struct fieldlist *prev;
1903 ###### ast functions
1904 static void free_fieldlist(struct fieldlist *f)
1908 free_fieldlist(f->prev);
1910 free_value(f->f.type, f->f.init);
1916 ###### top level grammar
1917 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
1919 add_type(c, $2.txt, &structure_prototype);
1921 struct fieldlist *f;
1923 for (f = $3; f; f=f->prev)
1926 t->structure.nfields = cnt;
1927 t->structure.fields = calloc(cnt, sizeof(struct field));
1930 int a = f->f.type->align;
1932 t->structure.fields[cnt] = f->f;
1933 if (t->size & (a-1))
1934 t->size = (t->size | (a-1)) + 1;
1935 t->structure.fields[cnt].offset = t->size;
1936 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
1945 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
1946 | { SimpleFieldList } ${ $0 = $<SFL; }$
1947 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
1948 | SimpleFieldList EOL ${ $0 = $<SFL; }$
1950 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
1951 | FieldLines SimpleFieldList Newlines ${
1956 SimpleFieldList -> Field ${ $0 = $<F; }$
1957 | SimpleFieldList ; Field ${
1961 | SimpleFieldList ; ${
1964 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
1966 Field -> IDENTIFIER : Type = Expression ${ {
1969 $0 = calloc(1, sizeof(struct fieldlist));
1970 $0->f.name = $1.txt;
1975 propagate_types($<5, c, &ok, $3, 0);
1980 struct value vl = interp_exec($5, NULL);
1981 $0->f.init = val_alloc($0->f.type, &vl);
1984 | IDENTIFIER : Type ${
1985 $0 = calloc(1, sizeof(struct fieldlist));
1986 $0->f.name = $1.txt;
1988 $0->f.init = val_alloc($0->f.type, NULL);
1991 ###### forward decls
1992 static void structure_print_type(struct type *t, FILE *f);
1994 ###### value functions
1995 static void structure_print_type(struct type *t, FILE *f)
1999 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2001 for (i = 0; i < t->structure.nfields; i++) {
2002 struct field *fl = t->structure.fields + i;
2003 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
2004 type_print(fl->type, f);
2005 if (fl->type->print && fl->init) {
2007 if (fl->type == Tstr)
2009 print_value(fl->type, fl->init);
2010 if (fl->type == Tstr)
2017 ###### print type decls
2022 while (target != 0) {
2024 for (t = context.typelist; t ; t=t->next)
2025 if (t->print_type_decl) {
2034 t->print_type_decl(t, stdout);
2040 ## Executables: the elements of code
2042 Each code element needs to be parsed, printed, analysed,
2043 interpreted, and freed. There are several, so let's just start with
2044 the easy ones and work our way up.
2048 We have already met values as separate objects. When manifest
2049 constants appear in the program text, that must result in an executable
2050 which has a constant value. So the `val` structure embeds a value in
2063 ###### ast functions
2064 struct val *new_val(struct type *T, struct token tk)
2066 struct val *v = new_pos(val, tk);
2077 $0 = new_val(Tbool, $1);
2081 $0 = new_val(Tbool, $1);
2085 $0 = new_val(Tnum, $1);
2088 if (number_parse($0->val.num, tail, $1.txt) == 0)
2089 mpq_init($0->val.num);
2091 tok_err(c, "error: unsupported number suffix",
2096 $0 = new_val(Tstr, $1);
2099 string_parse(&$1, '\\', &$0->val.str, tail);
2101 tok_err(c, "error: unsupported string suffix",
2106 $0 = new_val(Tstr, $1);
2109 string_parse(&$1, '\\', &$0->val.str, tail);
2111 tok_err(c, "error: unsupported string suffix",
2116 ###### print exec cases
2119 struct val *v = cast(val, e);
2120 if (v->vtype == Tstr)
2122 print_value(v->vtype, &v->val);
2123 if (v->vtype == Tstr)
2128 ###### propagate exec cases
2131 struct val *val = cast(val, prog);
2132 if (!type_compat(type, val->vtype, rules))
2133 type_err(c, "error: expected %1%r found %2",
2134 prog, type, rules, val->vtype);
2138 ###### interp exec cases
2140 rvtype = cast(val, e)->vtype;
2141 dup_value(rvtype, &cast(val, e)->val, &rv);
2144 ###### ast functions
2145 static void free_val(struct val *v)
2148 free_value(v->vtype, &v->val);
2152 ###### free exec cases
2153 case Xval: free_val(cast(val, e)); break;
2155 ###### ast functions
2156 // Move all nodes from 'b' to 'rv', reversing their order.
2157 // In 'b' 'left' is a list, and 'right' is the last node.
2158 // In 'rv', left' is the first node and 'right' is a list.
2159 static struct binode *reorder_bilist(struct binode *b)
2161 struct binode *rv = NULL;
2164 struct exec *t = b->right;
2168 b = cast(binode, b->left);
2178 Just as we used a `val` to wrap a value into an `exec`, we similarly
2179 need a `var` to wrap a `variable` into an exec. While each `val`
2180 contained a copy of the value, each `var` holds a link to the variable
2181 because it really is the same variable no matter where it appears.
2182 When a variable is used, we need to remember to follow the `->merged`
2183 link to find the primary instance.
2191 struct variable *var;
2199 VariableDecl -> IDENTIFIER : ${ {
2200 struct variable *v = var_decl(c, $1.txt);
2201 $0 = new_pos(var, $1);
2206 v = var_ref(c, $1.txt);
2208 type_err(c, "error: variable '%v' redeclared",
2210 type_err(c, "info: this is where '%v' was first declared",
2211 v->where_decl, NULL, 0, NULL);
2214 | IDENTIFIER :: ${ {
2215 struct variable *v = var_decl(c, $1.txt);
2216 $0 = new_pos(var, $1);
2222 v = var_ref(c, $1.txt);
2224 type_err(c, "error: variable '%v' redeclared",
2226 type_err(c, "info: this is where '%v' was first declared",
2227 v->where_decl, NULL, 0, NULL);
2230 | IDENTIFIER : Type ${ {
2231 struct variable *v = var_decl(c, $1.txt);
2232 $0 = new_pos(var, $1);
2240 v = var_ref(c, $1.txt);
2242 type_err(c, "error: variable '%v' redeclared",
2244 type_err(c, "info: this is where '%v' was first declared",
2245 v->where_decl, NULL, 0, NULL);
2248 | IDENTIFIER :: Type ${ {
2249 struct variable *v = var_decl(c, $1.txt);
2250 $0 = new_pos(var, $1);
2259 v = var_ref(c, $1.txt);
2261 type_err(c, "error: variable '%v' redeclared",
2263 type_err(c, "info: this is where '%v' was first declared",
2264 v->where_decl, NULL, 0, NULL);
2269 Variable -> IDENTIFIER ${ {
2270 struct variable *v = var_ref(c, $1.txt);
2271 $0 = new_pos(var, $1);
2273 /* This might be a label - allocate a var just in case */
2274 v = var_decl(c, $1.txt);
2282 cast(var, $0)->var = v;
2287 Type -> IDENTIFIER ${
2288 $0 = find_type(c, $1.txt);
2291 "error: undefined type", &$1);
2298 ###### print exec cases
2301 struct var *v = cast(var, e);
2303 struct binding *b = v->var->name;
2304 printf("%.*s", b->name.len, b->name.txt);
2311 if (loc->type == Xvar) {
2312 struct var *v = cast(var, loc);
2314 struct binding *b = v->var->name;
2315 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2317 fputs("???", stderr); // NOTEST
2319 fputs("NOTVAR", stderr); // NOTEST
2322 ###### propagate exec cases
2326 struct var *var = cast(var, prog);
2327 struct variable *v = var->var;
2329 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2330 return Tnone; // NOTEST
2334 if (v->constant && (rules & Rnoconstant)) {
2335 type_err(c, "error: Cannot assign to a constant: %v",
2336 prog, NULL, 0, NULL);
2337 type_err(c, "info: name was defined as a constant here",
2338 v->where_decl, NULL, 0, NULL);
2341 if (v->type == Tnone && v->where_decl == prog)
2342 type_err(c, "error: variable used but not declared: %v",
2343 prog, NULL, 0, NULL);
2344 if (v->type == NULL) {
2345 if (type && *ok != 0) {
2348 v->where_set = prog;
2353 if (!type_compat(type, v->type, rules)) {
2354 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2355 type, rules, v->type);
2356 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2357 v->type, rules, NULL);
2364 ###### interp exec cases
2367 struct var *var = cast(var, e);
2368 struct variable *v = var->var;
2377 ###### ast functions
2379 static void free_var(struct var *v)
2384 ###### free exec cases
2385 case Xvar: free_var(cast(var, e)); break;
2387 ### Expressions: Conditional
2389 Our first user of the `binode` will be conditional expressions, which
2390 is a bit odd as they actually have three components. That will be
2391 handled by having 2 binodes for each expression. The conditional
2392 expression is the lowest precedence operator which is why we define it
2393 first - to start the precedence list.
2395 Conditional expressions are of the form "value `if` condition `else`
2396 other_value". They associate to the right, so everything to the right
2397 of `else` is part of an else value, while only a higher-precedence to
2398 the left of `if` is the if values. Between `if` and `else` there is no
2399 room for ambiguity, so a full conditional expression is allowed in
2411 Expression -> Expression if Expression else Expression $$ifelse ${ {
2412 struct binode *b1 = new(binode);
2413 struct binode *b2 = new(binode);
2422 ## expression grammar
2424 ###### print binode cases
2427 b2 = cast(binode, b->right);
2428 if (bracket) printf("(");
2429 print_exec(b2->left, -1, bracket);
2431 print_exec(b->left, -1, bracket);
2433 print_exec(b2->right, -1, bracket);
2434 if (bracket) printf(")");
2437 ###### propagate binode cases
2440 /* cond must be Tbool, others must match */
2441 struct binode *b2 = cast(binode, b->right);
2444 propagate_types(b->left, c, ok, Tbool, 0);
2445 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2446 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2450 ###### interp binode cases
2453 struct binode *b2 = cast(binode, b->right);
2454 left = interp_exec(b->left, <ype);
2456 rv = interp_exec(b2->left, &rvtype);
2458 rv = interp_exec(b2->right, &rvtype);
2462 ### Expressions: Boolean
2464 The next class of expressions to use the `binode` will be Boolean
2465 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2466 have same corresponding precendence. The difference is that they don't
2467 evaluate the second expression if not necessary.
2476 ###### expr precedence
2481 ###### expression grammar
2482 | Expression or Expression ${ {
2483 struct binode *b = new(binode);
2489 | Expression or else Expression ${ {
2490 struct binode *b = new(binode);
2497 | Expression and Expression ${ {
2498 struct binode *b = new(binode);
2504 | Expression and then Expression ${ {
2505 struct binode *b = new(binode);
2512 | not Expression ${ {
2513 struct binode *b = new(binode);
2519 ###### print binode cases
2521 if (bracket) printf("(");
2522 print_exec(b->left, -1, bracket);
2524 print_exec(b->right, -1, bracket);
2525 if (bracket) printf(")");
2528 if (bracket) printf("(");
2529 print_exec(b->left, -1, bracket);
2530 printf(" and then ");
2531 print_exec(b->right, -1, bracket);
2532 if (bracket) printf(")");
2535 if (bracket) printf("(");
2536 print_exec(b->left, -1, bracket);
2538 print_exec(b->right, -1, bracket);
2539 if (bracket) printf(")");
2542 if (bracket) printf("(");
2543 print_exec(b->left, -1, bracket);
2544 printf(" or else ");
2545 print_exec(b->right, -1, bracket);
2546 if (bracket) printf(")");
2549 if (bracket) printf("(");
2551 print_exec(b->right, -1, bracket);
2552 if (bracket) printf(")");
2555 ###### propagate binode cases
2561 /* both must be Tbool, result is Tbool */
2562 propagate_types(b->left, c, ok, Tbool, 0);
2563 propagate_types(b->right, c, ok, Tbool, 0);
2564 if (type && type != Tbool)
2565 type_err(c, "error: %1 operation found where %2 expected", prog,
2569 ###### interp binode cases
2571 rv = interp_exec(b->left, &rvtype);
2572 right = interp_exec(b->right, &rtype);
2573 rv.bool = rv.bool && right.bool;
2576 rv = interp_exec(b->left, &rvtype);
2578 rv = interp_exec(b->right, NULL);
2581 rv = interp_exec(b->left, &rvtype);
2582 right = interp_exec(b->right, &rtype);
2583 rv.bool = rv.bool || right.bool;
2586 rv = interp_exec(b->left, &rvtype);
2588 rv = interp_exec(b->right, NULL);
2591 rv = interp_exec(b->right, &rvtype);
2595 ### Expressions: Comparison
2597 Of slightly higher precedence that Boolean expressions are Comparisons.
2598 A comparison takes arguments of any comparable type, but the two types
2601 To simplify the parsing we introduce an `eop` which can record an
2602 expression operator, and the `CMPop` non-terminal will match one of them.
2609 ###### ast functions
2610 static void free_eop(struct eop *e)
2624 ###### expr precedence
2625 $LEFT < > <= >= == != CMPop
2627 ###### expression grammar
2628 | Expression CMPop Expression ${ {
2629 struct binode *b = new(binode);
2639 CMPop -> < ${ $0.op = Less; }$
2640 | > ${ $0.op = Gtr; }$
2641 | <= ${ $0.op = LessEq; }$
2642 | >= ${ $0.op = GtrEq; }$
2643 | == ${ $0.op = Eql; }$
2644 | != ${ $0.op = NEql; }$
2646 ###### print binode cases
2654 if (bracket) printf("(");
2655 print_exec(b->left, -1, bracket);
2657 case Less: printf(" < "); break;
2658 case LessEq: printf(" <= "); break;
2659 case Gtr: printf(" > "); break;
2660 case GtrEq: printf(" >= "); break;
2661 case Eql: printf(" == "); break;
2662 case NEql: printf(" != "); break;
2663 default: abort(); // NOTEST
2665 print_exec(b->right, -1, bracket);
2666 if (bracket) printf(")");
2669 ###### propagate binode cases
2676 /* Both must match but not be labels, result is Tbool */
2677 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2679 propagate_types(b->right, c, ok, t, 0);
2681 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2683 t = propagate_types(b->left, c, ok, t, 0);
2685 if (!type_compat(type, Tbool, 0))
2686 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2687 Tbool, rules, type);
2690 ###### interp binode cases
2699 left = interp_exec(b->left, <ype);
2700 right = interp_exec(b->right, &rtype);
2701 cmp = value_cmp(ltype, rtype, &left, &right);
2704 case Less: rv.bool = cmp < 0; break;
2705 case LessEq: rv.bool = cmp <= 0; break;
2706 case Gtr: rv.bool = cmp > 0; break;
2707 case GtrEq: rv.bool = cmp >= 0; break;
2708 case Eql: rv.bool = cmp == 0; break;
2709 case NEql: rv.bool = cmp != 0; break;
2710 default: rv.bool = 0; break; // NOTEST
2715 ### Expressions: The rest
2717 The remaining expressions with the highest precedence are arithmetic,
2718 string concatenation, and string conversion. String concatenation
2719 (`++`) has the same precedence as multiplication and division, but lower
2722 String conversion is a temporary feature until I get a better type
2723 system. `$` is a prefix operator which expects a string and returns
2726 `+` and `-` are both infix and prefix operations (where they are
2727 absolute value and negation). These have different operator names.
2729 We also have a 'Bracket' operator which records where parentheses were
2730 found. This makes it easy to reproduce these when printing. Possibly I
2731 should only insert brackets were needed for precedence.
2741 ###### expr precedence
2747 ###### expression grammar
2748 | Expression Eop Expression ${ {
2749 struct binode *b = new(binode);
2756 | Expression Top Expression ${ {
2757 struct binode *b = new(binode);
2764 | ( Expression ) ${ {
2765 struct binode *b = new_pos(binode, $1);
2770 | Uop Expression ${ {
2771 struct binode *b = new(binode);
2776 | Value ${ $0 = $<1; }$
2777 | Variable ${ $0 = $<1; }$
2780 Eop -> + ${ $0.op = Plus; }$
2781 | - ${ $0.op = Minus; }$
2783 Uop -> + ${ $0.op = Absolute; }$
2784 | - ${ $0.op = Negate; }$
2785 | $ ${ $0.op = StringConv; }$
2787 Top -> * ${ $0.op = Times; }$
2788 | / ${ $0.op = Divide; }$
2789 | % ${ $0.op = Rem; }$
2790 | ++ ${ $0.op = Concat; }$
2792 ###### print binode cases
2799 if (bracket) printf("(");
2800 print_exec(b->left, indent, bracket);
2802 case Plus: fputs(" + ", stdout); break;
2803 case Minus: fputs(" - ", stdout); break;
2804 case Times: fputs(" * ", stdout); break;
2805 case Divide: fputs(" / ", stdout); break;
2806 case Rem: fputs(" % ", stdout); break;
2807 case Concat: fputs(" ++ ", stdout); break;
2808 default: abort(); // NOTEST
2810 print_exec(b->right, indent, bracket);
2811 if (bracket) printf(")");
2816 if (bracket) printf("(");
2818 case Absolute: fputs("+", stdout); break;
2819 case Negate: fputs("-", stdout); break;
2820 case StringConv: fputs("$", stdout); break;
2821 default: abort(); // NOTEST
2823 print_exec(b->right, indent, bracket);
2824 if (bracket) printf(")");
2828 print_exec(b->right, indent, bracket);
2832 ###### propagate binode cases
2838 /* both must be numbers, result is Tnum */
2841 /* as propagate_types ignores a NULL,
2842 * unary ops fit here too */
2843 propagate_types(b->left, c, ok, Tnum, 0);
2844 propagate_types(b->right, c, ok, Tnum, 0);
2845 if (!type_compat(type, Tnum, 0))
2846 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2851 /* both must be Tstr, result is Tstr */
2852 propagate_types(b->left, c, ok, Tstr, 0);
2853 propagate_types(b->right, c, ok, Tstr, 0);
2854 if (!type_compat(type, Tstr, 0))
2855 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2860 /* op must be string, result is number */
2861 propagate_types(b->left, c, ok, Tstr, 0);
2862 if (!type_compat(type, Tnum, 0))
2864 "error: Can only convert string to number, not %1",
2865 prog, type, 0, NULL);
2869 return propagate_types(b->right, c, ok, type, 0);
2871 ###### interp binode cases
2874 rv = interp_exec(b->left, &rvtype);
2875 right = interp_exec(b->right, &rtype);
2876 mpq_add(rv.num, rv.num, right.num);
2879 rv = interp_exec(b->left, &rvtype);
2880 right = interp_exec(b->right, &rtype);
2881 mpq_sub(rv.num, rv.num, right.num);
2884 rv = interp_exec(b->left, &rvtype);
2885 right = interp_exec(b->right, &rtype);
2886 mpq_mul(rv.num, rv.num, right.num);
2889 rv = interp_exec(b->left, &rvtype);
2890 right = interp_exec(b->right, &rtype);
2891 mpq_div(rv.num, rv.num, right.num);
2896 left = interp_exec(b->left, <ype);
2897 right = interp_exec(b->right, &rtype);
2898 mpz_init(l); mpz_init(r); mpz_init(rem);
2899 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2900 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2901 mpz_tdiv_r(rem, l, r);
2902 val_init(Tnum, &rv);
2903 mpq_set_z(rv.num, rem);
2904 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2909 rv = interp_exec(b->right, &rvtype);
2910 mpq_neg(rv.num, rv.num);
2913 rv = interp_exec(b->right, &rvtype);
2914 mpq_abs(rv.num, rv.num);
2917 rv = interp_exec(b->right, &rvtype);
2920 left = interp_exec(b->left, <ype);
2921 right = interp_exec(b->right, &rtype);
2923 rv.str = text_join(left.str, right.str);
2926 right = interp_exec(b->right, &rvtype);
2930 struct text tx = right.str;
2933 if (tx.txt[0] == '-') {
2938 if (number_parse(rv.num, tail, tx) == 0)
2941 mpq_neg(rv.num, rv.num);
2943 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt);
2947 ###### value functions
2949 static struct text text_join(struct text a, struct text b)
2952 rv.len = a.len + b.len;
2953 rv.txt = malloc(rv.len);
2954 memcpy(rv.txt, a.txt, a.len);
2955 memcpy(rv.txt+a.len, b.txt, b.len);
2959 ### Blocks, Statements, and Statement lists.
2961 Now that we have expressions out of the way we need to turn to
2962 statements. There are simple statements and more complex statements.
2963 Simple statements do not contain (syntactic) newlines, complex statements do.
2965 Statements often come in sequences and we have corresponding simple
2966 statement lists and complex statement lists.
2967 The former comprise only simple statements separated by semicolons.
2968 The later comprise complex statements and simple statement lists. They are
2969 separated by newlines. Thus the semicolon is only used to separate
2970 simple statements on the one line. This may be overly restrictive,
2971 but I'm not sure I ever want a complex statement to share a line with
2974 Note that a simple statement list can still use multiple lines if
2975 subsequent lines are indented, so
2977 ###### Example: wrapped simple statement list
2982 is a single simple statement list. This might allow room for
2983 confusion, so I'm not set on it yet.
2985 A simple statement list needs no extra syntax. A complex statement
2986 list has two syntactic forms. It can be enclosed in braces (much like
2987 C blocks), or it can be introduced by an indent and continue until an
2988 unindented newline (much like Python blocks). With this extra syntax
2989 it is referred to as a block.
2991 Note that a block does not have to include any newlines if it only
2992 contains simple statements. So both of:
2994 if condition: a=b; d=f
2996 if condition { a=b; print f }
3000 In either case the list is constructed from a `binode` list with
3001 `Block` as the operator. When parsing the list it is most convenient
3002 to append to the end, so a list is a list and a statement. When using
3003 the list it is more convenient to consider a list to be a statement
3004 and a list. So we need a function to re-order a list.
3005 `reorder_bilist` serves this purpose.
3007 The only stand-alone statement we introduce at this stage is `pass`
3008 which does nothing and is represented as a `NULL` pointer in a `Block`
3009 list. Other stand-alone statements will follow once the infrastructure
3020 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3021 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3022 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3023 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3024 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3026 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3027 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3028 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3029 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3030 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3032 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3033 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3034 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3036 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3037 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3038 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3039 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3040 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3042 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3044 ComplexStatements -> ComplexStatements ComplexStatement ${
3054 | ComplexStatement ${
3066 ComplexStatement -> SimpleStatements Newlines ${
3067 $0 = reorder_bilist($<SS);
3069 | SimpleStatements ; Newlines ${
3070 $0 = reorder_bilist($<SS);
3072 ## ComplexStatement Grammar
3075 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3081 | SimpleStatement ${
3089 SimpleStatement -> pass ${ $0 = NULL; }$
3090 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3091 ## SimpleStatement Grammar
3093 ###### print binode cases
3097 if (b->left == NULL)
3100 print_exec(b->left, indent, bracket);
3103 print_exec(b->right, indent, bracket);
3106 // block, one per line
3107 if (b->left == NULL)
3108 do_indent(indent, "pass\n");
3110 print_exec(b->left, indent, bracket);
3112 print_exec(b->right, indent, bracket);
3116 ###### propagate binode cases
3119 /* If any statement returns something other than Tnone
3120 * or Tbool then all such must return same type.
3121 * As each statement may be Tnone or something else,
3122 * we must always pass NULL (unknown) down, otherwise an incorrect
3123 * error might occur. We never return Tnone unless it is
3128 for (e = b; e; e = cast(binode, e->right)) {
3129 t = propagate_types(e->left, c, ok, NULL, rules);
3130 if ((rules & Rboolok) && t == Tbool)
3132 if (t && t != Tnone && t != Tbool) {
3136 type_err(c, "error: expected %1%r, found %2",
3137 e->left, type, rules, t);
3143 ###### interp binode cases
3145 while (rvtype == Tnone &&
3148 rv = interp_exec(b->left, &rvtype);
3149 b = cast(binode, b->right);
3153 ### The Print statement
3155 `print` is a simple statement that takes a comma-separated list of
3156 expressions and prints the values separated by spaces and terminated
3157 by a newline. No control of formatting is possible.
3159 `print` faces the same list-ordering issue as blocks, and uses the
3165 ##### expr precedence
3168 ###### SimpleStatement Grammar
3170 | print ExpressionList ${
3171 $0 = reorder_bilist($<2);
3173 | print ExpressionList , ${
3178 $0 = reorder_bilist($0);
3189 ExpressionList -> ExpressionList , Expression ${
3202 ###### print binode cases
3205 do_indent(indent, "print");
3209 print_exec(b->left, -1, bracket);
3213 b = cast(binode, b->right);
3219 ###### propagate binode cases
3222 /* don't care but all must be consistent */
3223 propagate_types(b->left, c, ok, NULL, Rnolabel);
3224 propagate_types(b->right, c, ok, NULL, Rnolabel);
3227 ###### interp binode cases
3233 for ( ; b; b = cast(binode, b->right))
3237 left = interp_exec(b->left, <ype);
3238 print_value(ltype, &left);
3239 free_value(ltype, &left);
3250 ###### Assignment statement
3252 An assignment will assign a value to a variable, providing it hasn't
3253 been declared as a constant. The analysis phase ensures that the type
3254 will be correct so the interpreter just needs to perform the
3255 calculation. There is a form of assignment which declares a new
3256 variable as well as assigning a value. If a name is assigned before
3257 it is declared, and error will be raised as the name is created as
3258 `Tlabel` and it is illegal to assign to such names.
3264 ###### declare terminals
3267 ###### SimpleStatement Grammar
3268 | Variable = Expression ${
3274 | VariableDecl = Expression ${
3282 if ($1->var->where_set == NULL) {
3284 "Variable declared with no type or value: %v",
3294 ###### print binode cases
3297 do_indent(indent, "");
3298 print_exec(b->left, indent, bracket);
3300 print_exec(b->right, indent, bracket);
3307 struct variable *v = cast(var, b->left)->var;
3308 do_indent(indent, "");
3309 print_exec(b->left, indent, bracket);
3310 if (cast(var, b->left)->var->constant) {
3311 if (v->where_decl == v->where_set) {
3313 type_print(v->type, stdout);
3318 if (v->where_decl == v->where_set) {
3320 type_print(v->type, stdout);
3327 print_exec(b->right, indent, bracket);
3334 ###### propagate binode cases
3338 /* Both must match and not be labels,
3339 * Type must support 'dup',
3340 * For Assign, left must not be constant.
3343 t = propagate_types(b->left, c, ok, NULL,
3344 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3349 if (propagate_types(b->right, c, ok, t, 0) != t)
3350 if (b->left->type == Xvar)
3351 type_err(c, "info: variable '%v' was set as %1 here.",
3352 cast(var, b->left)->var->where_set, t, rules, NULL);
3354 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3356 propagate_types(b->left, c, ok, t,
3357 (b->op == Assign ? Rnoconstant : 0));
3359 if (t && t->dup == NULL)
3360 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3365 ###### interp binode cases
3368 lleft = linterp_exec(b->left, <ype);
3369 right = interp_exec(b->right, &rtype);
3371 free_value(ltype, lleft);
3372 dup_value(ltype, &right, lleft);
3379 struct variable *v = cast(var, b->left)->var;
3383 right = interp_exec(b->right, &rtype);
3384 free_value(v->type, v->val);
3386 v->val = val_alloc(v->type, &right);
3389 free_value(v->type, v->val);
3390 v->val = val_alloc(v->type, NULL);
3395 ### The `use` statement
3397 The `use` statement is the last "simple" statement. It is needed when
3398 the condition in a conditional statement is a block. `use` works much
3399 like `return` in C, but only completes the `condition`, not the whole
3405 ###### expr precedence
3408 ###### SimpleStatement Grammar
3410 $0 = new_pos(binode, $1);
3413 if ($0->right->type == Xvar) {
3414 struct var *v = cast(var, $0->right);
3415 if (v->var->type == Tnone) {
3416 /* Convert this to a label */
3417 v->var->type = Tlabel;
3418 v->var->val = val_alloc(Tlabel, NULL);
3419 v->var->val->label = v->var->val;
3424 ###### print binode cases
3427 do_indent(indent, "use ");
3428 print_exec(b->right, -1, bracket);
3433 ###### propagate binode cases
3436 /* result matches value */
3437 return propagate_types(b->right, c, ok, type, 0);
3439 ###### interp binode cases
3442 rv = interp_exec(b->right, &rvtype);
3445 ### The Conditional Statement
3447 This is the biggy and currently the only complex statement. This
3448 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3449 It is comprised of a number of parts, all of which are optional though
3450 set combinations apply. Each part is (usually) a key word (`then` is
3451 sometimes optional) followed by either an expression or a code block,
3452 except the `casepart` which is a "key word and an expression" followed
3453 by a code block. The code-block option is valid for all parts and,
3454 where an expression is also allowed, the code block can use the `use`
3455 statement to report a value. If the code block does not report a value
3456 the effect is similar to reporting `True`.
3458 The `else` and `case` parts, as well as `then` when combined with
3459 `if`, can contain a `use` statement which will apply to some
3460 containing conditional statement. `for` parts, `do` parts and `then`
3461 parts used with `for` can never contain a `use`, except in some
3462 subordinate conditional statement.
3464 If there is a `forpart`, it is executed first, only once.
3465 If there is a `dopart`, then it is executed repeatedly providing
3466 always that the `condpart` or `cond`, if present, does not return a non-True
3467 value. `condpart` can fail to return any value if it simply executes
3468 to completion. This is treated the same as returning `True`.
3470 If there is a `thenpart` it will be executed whenever the `condpart`
3471 or `cond` returns True (or does not return any value), but this will happen
3472 *after* `dopart` (when present).
3474 If `elsepart` is present it will be executed at most once when the
3475 condition returns `False` or some value that isn't `True` and isn't
3476 matched by any `casepart`. If there are any `casepart`s, they will be
3477 executed when the condition returns a matching value.
3479 The particular sorts of values allowed in case parts has not yet been
3480 determined in the language design, so nothing is prohibited.
3482 The various blocks in this complex statement potentially provide scope
3483 for variables as described earlier. Each such block must include the
3484 "OpenScope" nonterminal before parsing the block, and must call
3485 `var_block_close()` when closing the block.
3487 The code following "`if`", "`switch`" and "`for`" does not get its own
3488 scope, but is in a scope covering the whole statement, so names
3489 declared there cannot be redeclared elsewhere. Similarly the
3490 condition following "`while`" is in a scope the covers the body
3491 ("`do`" part) of the loop, and which does not allow conditional scope
3492 extension. Code following "`then`" (both looping and non-looping),
3493 "`else`" and "`case`" each get their own local scope.
3495 The type requirements on the code block in a `whilepart` are quite
3496 unusal. It is allowed to return a value of some identifiable type, in
3497 which case the loop aborts and an appropriate `casepart` is run, or it
3498 can return a Boolean, in which case the loop either continues to the
3499 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3500 This is different both from the `ifpart` code block which is expected to
3501 return a Boolean, or the `switchpart` code block which is expected to
3502 return the same type as the casepart values. The correct analysis of
3503 the type of the `whilepart` code block is the reason for the
3504 `Rboolok` flag which is passed to `propagate_types()`.
3506 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3515 struct exec *action;
3516 struct casepart *next;
3518 struct cond_statement {
3520 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3521 struct casepart *casepart;
3524 ###### ast functions
3526 static void free_casepart(struct casepart *cp)
3530 free_exec(cp->value);
3531 free_exec(cp->action);
3538 static void free_cond_statement(struct cond_statement *s)
3542 free_exec(s->forpart);
3543 free_exec(s->condpart);
3544 free_exec(s->dopart);
3545 free_exec(s->thenpart);
3546 free_exec(s->elsepart);
3547 free_casepart(s->casepart);
3551 ###### free exec cases
3552 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3554 ###### ComplexStatement Grammar
3555 | CondStatement ${ $0 = $<1; }$
3557 ###### expr precedence
3558 $TERM for then while do
3565 // A CondStatement must end with EOL, as does CondSuffix and
3567 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3568 // may or may not end with EOL
3569 // WhilePart and IfPart include an appropriate Suffix
3572 // Both ForPart and Whilepart open scopes, and CondSuffix only
3573 // closes one - so in the first branch here we have another to close.
3574 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3577 $0->thenpart = $<TP;
3578 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3579 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3580 var_block_close(c, CloseSequential);
3582 | ForPart OptNL WhilePart CondSuffix ${
3585 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3586 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3587 var_block_close(c, CloseSequential);
3589 | WhilePart CondSuffix ${
3591 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3592 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3594 | SwitchPart OptNL CasePart CondSuffix ${
3596 $0->condpart = $<SP;
3597 $CP->next = $0->casepart;
3598 $0->casepart = $<CP;
3600 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3602 $0->condpart = $<SP;
3603 $CP->next = $0->casepart;
3604 $0->casepart = $<CP;
3606 | IfPart IfSuffix ${
3608 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3609 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3610 // This is where we close an "if" statement
3611 var_block_close(c, CloseSequential);
3614 CondSuffix -> IfSuffix ${
3616 // This is where we close scope of the whole
3617 // "for" or "while" statement
3618 var_block_close(c, CloseSequential);
3620 | Newlines CasePart CondSuffix ${
3622 $CP->next = $0->casepart;
3623 $0->casepart = $<CP;
3625 | CasePart CondSuffix ${
3627 $CP->next = $0->casepart;
3628 $0->casepart = $<CP;
3631 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3632 | Newlines ElsePart ${ $0 = $<EP; }$
3633 | ElsePart ${$0 = $<EP; }$
3635 ElsePart -> else OpenBlock Newlines ${
3636 $0 = new(cond_statement);
3637 $0->elsepart = $<OB;
3638 var_block_close(c, CloseElse);
3640 | else OpenScope CondStatement ${
3641 $0 = new(cond_statement);
3642 $0->elsepart = $<CS;
3643 var_block_close(c, CloseElse);
3647 CasePart -> case Expression OpenScope ColonBlock ${
3648 $0 = calloc(1,sizeof(struct casepart));
3651 var_block_close(c, CloseParallel);
3655 // These scopes are closed in CondSuffix
3656 ForPart -> for OpenBlock ${
3660 ThenPart -> then OpenBlock ${
3662 var_block_close(c, CloseSequential);
3666 // This scope is closed in CondSuffix
3667 WhilePart -> while UseBlock OptNL do Block ${
3671 | while OpenScope Expression ColonBlock ${
3672 $0.condpart = $<Exp;
3676 IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
3680 | if OpenScope Expression OpenScope ColonBlock ClosePara ${
3684 | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
3690 // This scope is closed in CondSuffix
3691 SwitchPart -> switch OpenScope Expression ${
3694 | switch UseBlock ${
3698 ###### print exec cases
3700 case Xcond_statement:
3702 struct cond_statement *cs = cast(cond_statement, e);
3703 struct casepart *cp;
3705 do_indent(indent, "for");
3706 if (bracket) printf(" {\n"); else printf("\n");
3707 print_exec(cs->forpart, indent+1, bracket);
3710 do_indent(indent, "} then {\n");
3712 do_indent(indent, "then\n");
3713 print_exec(cs->thenpart, indent+1, bracket);
3715 if (bracket) do_indent(indent, "}\n");
3719 if (cs->condpart && cs->condpart->type == Xbinode &&
3720 cast(binode, cs->condpart)->op == Block) {
3722 do_indent(indent, "while {\n");
3724 do_indent(indent, "while\n");
3725 print_exec(cs->condpart, indent+1, bracket);
3727 do_indent(indent, "} do {\n");
3729 do_indent(indent, "do\n");
3730 print_exec(cs->dopart, indent+1, bracket);
3732 do_indent(indent, "}\n");
3734 do_indent(indent, "while ");
3735 print_exec(cs->condpart, 0, bracket);
3740 print_exec(cs->dopart, indent+1, bracket);
3742 do_indent(indent, "}\n");
3747 do_indent(indent, "switch");
3749 do_indent(indent, "if");
3750 if (cs->condpart && cs->condpart->type == Xbinode &&
3751 cast(binode, cs->condpart)->op == Block) {
3756 print_exec(cs->condpart, indent+1, bracket);
3758 do_indent(indent, "}\n");
3760 do_indent(indent, "then:\n");
3761 print_exec(cs->thenpart, indent+1, bracket);
3765 print_exec(cs->condpart, 0, bracket);
3771 print_exec(cs->thenpart, indent+1, bracket);
3773 do_indent(indent, "}\n");
3778 for (cp = cs->casepart; cp; cp = cp->next) {
3779 do_indent(indent, "case ");
3780 print_exec(cp->value, -1, 0);
3785 print_exec(cp->action, indent+1, bracket);
3787 do_indent(indent, "}\n");
3790 do_indent(indent, "else");
3795 print_exec(cs->elsepart, indent+1, bracket);
3797 do_indent(indent, "}\n");
3802 ###### propagate exec cases
3803 case Xcond_statement:
3805 // forpart and dopart must return Tnone
3806 // thenpart must return Tnone if there is a dopart,
3807 // otherwise it is like elsepart.
3809 // be bool if there is no casepart
3810 // match casepart->values if there is a switchpart
3811 // either be bool or match casepart->value if there
3813 // elsepart and casepart->action must match the return type
3814 // expected of this statement.
3815 struct cond_statement *cs = cast(cond_statement, prog);
3816 struct casepart *cp;
3818 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3819 if (!type_compat(Tnone, t, 0))
3821 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3822 if (!type_compat(Tnone, t, 0))
3825 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3826 if (!type_compat(Tnone, t, 0))
3829 if (cs->casepart == NULL)
3830 propagate_types(cs->condpart, c, ok, Tbool, 0);
3832 /* Condpart must match case values, with bool permitted */
3834 for (cp = cs->casepart;
3835 cp && !t; cp = cp->next)
3836 t = propagate_types(cp->value, c, ok, NULL, 0);
3837 if (!t && cs->condpart)
3838 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3839 // Now we have a type (I hope) push it down
3841 for (cp = cs->casepart; cp; cp = cp->next)
3842 propagate_types(cp->value, c, ok, t, 0);
3843 propagate_types(cs->condpart, c, ok, t, Rboolok);
3846 // (if)then, else, and case parts must return expected type.
3847 if (!cs->dopart && !type)
3848 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3850 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3851 for (cp = cs->casepart;
3854 type = propagate_types(cp->action, c, ok, NULL, rules);
3857 propagate_types(cs->thenpart, c, ok, type, rules);
3858 propagate_types(cs->elsepart, c, ok, type, rules);
3859 for (cp = cs->casepart; cp ; cp = cp->next)
3860 propagate_types(cp->action, c, ok, type, rules);
3866 ###### interp exec cases
3867 case Xcond_statement:
3869 struct value v, cnd;
3870 struct type *vtype, *cndtype;
3871 struct casepart *cp;
3872 struct cond_statement *c = cast(cond_statement, e);
3875 interp_exec(c->forpart, NULL);
3878 cnd = interp_exec(c->condpart, &cndtype);
3881 if (!(cndtype == Tnone ||
3882 (cndtype == Tbool && cnd.bool != 0)))
3884 // cnd is Tnone or Tbool, doesn't need to be freed
3886 interp_exec(c->dopart, NULL);
3889 rv = interp_exec(c->thenpart, &rvtype);
3890 if (rvtype != Tnone || !c->dopart)
3892 free_value(rvtype, &rv);
3895 } while (c->dopart);
3897 for (cp = c->casepart; cp; cp = cp->next) {
3898 v = interp_exec(cp->value, &vtype);
3899 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
3900 free_value(vtype, &v);
3901 free_value(cndtype, &cnd);
3902 rv = interp_exec(cp->action, &rvtype);
3905 free_value(vtype, &v);
3907 free_value(cndtype, &cnd);
3909 rv = interp_exec(c->elsepart, &rvtype);
3916 ### Top level structure
3918 All the language elements so far can be used in various places. Now
3919 it is time to clarify what those places are.
3921 At the top level of a file there will be a number of declarations.
3922 Many of the things that can be declared haven't been described yet,
3923 such as functions, procedures, imports, and probably more.
3924 For now there are two sorts of things that can appear at the top
3925 level. They are predefined constants, `struct` types, and the main
3926 program. While the syntax will allow the main program to appear
3927 multiple times, that will trigger an error if it is actually attempted.
3929 The various declarations do not return anything. They store the
3930 various declarations in the parse context.
3932 ###### Parser: grammar
3935 Ocean -> OptNL DeclarationList
3937 ## declare terminals
3944 DeclarationList -> Declaration
3945 | DeclarationList Declaration
3947 Declaration -> ERROR Newlines ${
3949 "error: unhandled parse error", &$1);
3955 ## top level grammar
3957 ### The `const` section
3959 As well as being defined in with the code that uses them, constants
3960 can be declared at the top level. These have full-file scope, so they
3961 are always `InScope`. The value of a top level constant can be given
3962 as an expression, and this is evaluated immediately rather than in the
3963 later interpretation stage. Once we add functions to the language, we
3964 will need rules concern which, if any, can be used to define a top
3967 Constants are defined in a section that starts with the reserved word
3968 `const` and then has a block with a list of assignment statements.
3969 For syntactic consistency, these must use the double-colon syntax to
3970 make it clear that they are constants. Type can also be given: if
3971 not, the type will be determined during analysis, as with other
3974 As the types constants are inserted at the head of a list, printing
3975 them in the same order that they were read is not straight forward.
3976 We take a quadratic approach here and count the number of constants
3977 (variables of depth 0), then count down from there, each time
3978 searching through for the Nth constant for decreasing N.
3980 ###### top level grammar
3984 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
3985 | const { SimpleConstList } Newlines
3986 | const IN OptNL ConstList OUT Newlines
3987 | const SimpleConstList Newlines
3989 ConstList -> ConstList SimpleConstLine
3991 SimpleConstList -> SimpleConstList ; Const
3994 SimpleConstLine -> SimpleConstList Newlines
3995 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
3998 CType -> Type ${ $0 = $<1; }$
4001 Const -> IDENTIFIER :: CType = Expression ${ {
4005 v = var_decl(c, $1.txt);
4007 struct var *var = new_pos(var, $1);
4008 v->where_decl = var;
4013 v = var_ref(c, $1.txt);
4014 tok_err(c, "error: name already declared", &$1);
4015 type_err(c, "info: this is where '%v' was first declared",
4016 v->where_decl, NULL, 0, NULL);
4020 propagate_types($5, c, &ok, $3, 0);
4025 struct value res = interp_exec($5, &v->type);
4026 v->val = val_alloc(v->type, &res);
4030 ###### print const decls
4035 while (target != 0) {
4037 for (v = context.in_scope; v; v=v->in_scope)
4038 if (v->depth == 0) {
4049 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4050 type_print(v->type, stdout);
4052 if (v->type == Tstr)
4054 print_value(v->type, v->val);
4055 if (v->type == Tstr)
4063 ### Finally the whole program.
4065 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4066 the keyword "program" and a list of variable names which are assigned
4067 values from command line arguments. Following this is a `block` which
4068 is the code to execute. Unlike Pascal, constants and other
4069 declarations come *before* the program.
4071 As this is the top level, several things are handled a bit
4073 The whole program is not interpreted by `interp_exec` as that isn't
4074 passed the argument list which the program requires. Similarly type
4075 analysis is a bit more interesting at this level.
4080 ###### top level grammar
4082 DeclareProgram -> Program ${ {
4084 type_err(c, "Program defined a second time",
4093 Program -> program OpenScope Varlist ColonBlock Newlines ${
4096 $0->left = reorder_bilist($<Vl);
4098 var_block_close(c, CloseSequential);
4099 if (c->scope_stack && !c->parse_error) abort();
4102 Varlist -> Varlist ArgDecl ${
4111 ArgDecl -> IDENTIFIER ${ {
4112 struct variable *v = var_decl(c, $1.txt);
4119 ###### print binode cases
4121 do_indent(indent, "program");
4122 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4124 print_exec(b2->left, 0, 0);
4130 print_exec(b->right, indent+1, bracket);
4132 do_indent(indent, "}\n");
4135 ###### propagate binode cases
4136 case Program: abort(); // NOTEST
4138 ###### core functions
4140 static int analyse_prog(struct exec *prog, struct parse_context *c)
4142 struct binode *b = cast(binode, prog);
4149 propagate_types(b->right, c, &ok, Tnone, 0);
4154 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4155 struct var *v = cast(var, b->left);
4156 if (!v->var->type) {
4157 v->var->where_set = b;
4158 v->var->type = Tstr;
4162 b = cast(binode, prog);
4165 propagate_types(b->right, c, &ok, Tnone, 0);
4170 /* Make sure everything is still consistent */
4171 propagate_types(b->right, c, &ok, Tnone, 0);
4175 static void interp_prog(struct exec *prog, char **argv)
4177 struct binode *p = cast(binode, prog);
4184 al = cast(binode, p->left);
4186 struct var *v = cast(var, al->left);
4187 struct value *vl = v->var->val;
4189 if (argv[0] == NULL) {
4190 printf("Not enough args\n");
4193 al = cast(binode, al->right);
4195 free_value(v->var->type, vl);
4197 vl = val_alloc(v->var->type, NULL);
4200 free_value(v->var->type, vl);
4201 vl->str.len = strlen(argv[0]);
4202 vl->str.txt = malloc(vl->str.len);
4203 memcpy(vl->str.txt, argv[0], vl->str.len);
4206 v = interp_exec(p->right, &vtype);
4207 free_value(vtype, &v);
4210 ###### interp binode cases
4211 case Program: abort(); // NOTEST
4213 ## And now to test it out.
4215 Having a language requires having a "hello world" program. I'll
4216 provide a little more than that: a program that prints "Hello world"
4217 finds the GCD of two numbers, prints the first few elements of
4218 Fibonacci, performs a binary search for a number, and a few other
4219 things which will likely grow as the languages grows.
4221 ###### File: oceani.mk
4224 @echo "===== DEMO ====="
4225 ./oceani --section "demo: hello" oceani.mdc 55 33
4231 four ::= 2 + 2 ; five ::= 10/2
4232 const pie ::= "I like Pie";
4233 cake ::= "The cake is"
4242 print "Hello World, what lovely oceans you have!"
4243 print "Are there", five, "?"
4244 print pi, pie, "but", cake
4246 A := $Astr; B := $Bstr
4248 /* When a variable is defined in both branches of an 'if',
4249 * and used afterwards, the variables are merged.
4255 print "Is", A, "bigger than", B,"? ", bigger
4256 /* If a variable is not used after the 'if', no
4257 * merge happens, so types can be different
4260 double:string = "yes"
4261 print A, "is more than twice", B, "?", double
4264 print "double", B, "is", double
4269 if a > 0 and then b > 0:
4275 print "GCD of", A, "and", B,"is", a
4277 print a, "is not positive, cannot calculate GCD"
4279 print b, "is not positive, cannot calculate GCD"
4284 print "Fibonacci:", f1,f2,
4285 then togo = togo - 1
4293 /* Binary search... */
4298 mid := (lo + hi) / 2
4310 print "Yay, I found", target
4312 print "Closest I found was", mid
4317 // "middle square" PRNG. Not particularly good, but one my
4318 // Dad taught me - the first one I ever heard of.
4319 for i:=1; then i = i + 1; while i < size:
4320 n := list[i-1] * list[i-1]
4321 list[i] = (n / 100) % 10 000
4323 print "Before sort:",
4324 for i:=0; then i = i + 1; while i < size:
4328 for i := 1; then i=i+1; while i < size:
4329 for j:=i-1; then j=j-1; while j >= 0:
4330 if list[j] > list[j+1]:
4334 print " After sort:",
4335 for i:=0; then i = i + 1; while i < size:
4339 if 1 == 2 then print "yes"; else print "no"
4343 bob.alive = (bob.name == "Hello")
4344 print "bob", "is" if bob.alive else "isn't", "alive"