1 # Ocean Interpreter - Jamison Creek version
3 Ocean is intended to be a compiled language, so this interpreter is
4 not targeted at being the final product. It is, rather, an intermediate
5 stage and fills that role in two distinct ways.
7 Firstly, it exists as a platform to experiment with the early language
8 design. An interpreter is easy to write and easy to get working, so
9 the barrier for entry is lower if I aim to start with an interpreter.
11 Secondly, the plan for the Ocean compiler is to write it in the
12 [Ocean language](http://ocean-lang.org). To achieve this we naturally
13 need some sort of boot-strap process and this interpreter - written in
14 portable C - will fill that role. It will be used to bootstrap the
17 Two features that are not needed to fill either of these roles are
18 performance and completeness. The interpreter only needs to be fast
19 enough to run small test programs and occasionally to run the compiler
20 on itself. It only needs to be complete enough to test aspects of the
21 design which are developed before the compiler is working, and to run
22 the compiler on itself. Any features not used by the compiler when
23 compiling itself are superfluous. They may be included anyway, but
26 Nonetheless, the interpreter should end up being reasonably complete,
27 and any performance bottlenecks which appear and are easily fixed, will
32 This third version of the interpreter exists to test out some initial
33 ideas relating to types. Particularly it adds arrays (indexed from
34 zero) and simple structures. Basic control flow and variable scoping
35 are already fairly well established, as are basic numerical and
38 Some operators that have only recently been added, and so have not
39 generated all that much experience yet are "and then" and "or else" as
40 short-circuit Boolean operators, and the "if ... else" trinary
41 operator which can select between two expressions based on a third
42 (which appears syntactically in the middle).
44 Elements that are present purely to make a usable language, and
45 without any expectation that they will remain, are the "program'
46 clause, which provides a list of variables to received command-line
47 arguments, and the "print" statement which performs simple output.
49 The current scalar types are "number", "Boolean", and "string".
50 Boolean will likely stay in its current form, the other two might, but
51 could just as easily be changed.
55 Versions of the interpreter which obviously do not support a complete
56 language will be named after creeks and streams. This one is Jamison
59 Once we have something reasonably resembling a complete language, the
60 names of rivers will be used.
61 Early versions of the compiler will be named after seas. Major
62 releases of the compiler will be named after oceans. Hopefully I will
63 be finished once I get to the Pacific Ocean release.
67 As well as parsing and executing a program, the interpreter can print
68 out the program from the parsed internal structure. This is useful
69 for validating the parsing.
70 So the main requirements of the interpreter are:
72 - Parse the program, possibly with tracing,
73 - Analyse the parsed program to ensure consistency,
75 - Execute the program, if no parsing or consistency errors were found.
77 This is all performed by a single C program extracted with
80 There will be two formats for printing the program: a default and one
81 that uses bracketing. So a `--bracket` command line option is needed
82 for that. Normally the first code section found is used, however an
83 alternate section can be requested so that a file (such as this one)
84 can contain multiple programs. This is effected with the `--section`
87 This code must be compiled with `-fplan9-extensions` so that anonymous
88 structures can be used.
90 ###### File: oceani.mk
92 myCFLAGS := -Wall -g -fplan9-extensions
93 CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
94 myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
95 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
97 all :: $(LDLIBS) oceani
98 oceani.c oceani.h : oceani.mdc parsergen
99 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
100 oceani.mk: oceani.mdc md2c
103 oceani: oceani.o $(LDLIBS)
104 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
106 ###### Parser: header
109 struct parse_context {
110 struct token_config config;
119 #define container_of(ptr, type, member) ({ \
120 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
121 (type *)( (char *)__mptr - offsetof(type,member) );})
123 #define config2context(_conf) container_of(_conf, struct parse_context, \
126 ###### Parser: reduce
127 struct parse_context *c = config2context(config);
135 #include <sys/mman.h>
154 static char Usage[] =
155 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
156 static const struct option long_options[] = {
157 {"trace", 0, NULL, 't'},
158 {"print", 0, NULL, 'p'},
159 {"noexec", 0, NULL, 'n'},
160 {"brackets", 0, NULL, 'b'},
161 {"section", 1, NULL, 's'},
164 const char *options = "tpnbs";
165 int main(int argc, char *argv[])
170 struct section *s, *ss;
171 char *section = NULL;
172 struct parse_context context = {
174 .ignored = (1 << TK_mark),
175 .number_chars = ".,_+- ",
180 int doprint=0, dotrace=0, doexec=1, brackets=0;
182 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
185 case 't': dotrace=1; break;
186 case 'p': doprint=1; break;
187 case 'n': doexec=0; break;
188 case 'b': brackets=1; break;
189 case 's': section = optarg; break;
190 default: fprintf(stderr, Usage);
194 if (optind >= argc) {
195 fprintf(stderr, "oceani: no input file given\n");
198 fd = open(argv[optind], O_RDONLY);
200 fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
203 context.file_name = argv[optind];
204 len = lseek(fd, 0, 2);
205 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
206 s = code_extract(file, file+len, NULL);
208 fprintf(stderr, "oceani: could not find any code in %s\n",
213 ## context initialization
216 for (ss = s; ss; ss = ss->next) {
217 struct text sec = ss->section;
218 if (sec.len == strlen(section) &&
219 strncmp(sec.txt, section, sec.len) == 0)
223 fprintf(stderr, "oceani: cannot find section %s\n",
229 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
232 fprintf(stderr, "oceani: no program found.\n");
233 context.parse_error = 1;
235 if (context.prog && doprint) {
238 print_exec(context.prog, 0, brackets);
240 if (context.prog && doexec && !context.parse_error) {
241 if (!analyse_prog(context.prog, &context)) {
242 fprintf(stderr, "oceani: type error in program - not running.\n");
245 interp_prog(context.prog, argv+optind+1);
247 free_exec(context.prog);
250 struct section *t = s->next;
256 ## free context types
257 exit(context.parse_error ? 1 : 0);
262 The four requirements of parse, analyse, print, interpret apply to
263 each language element individually so that is how most of the code
266 Three of the four are fairly self explanatory. The one that requires
267 a little explanation is the analysis step.
269 The current language design does not require the types of variables to
270 be declared, but they must still have a single type. Different
271 operations impose different requirements on the variables, for example
272 addition requires both arguments to be numeric, and assignment
273 requires the variable on the left to have the same type as the
274 expression on the right.
276 Analysis involves propagating these type requirements around and
277 consequently setting the type of each variable. If any requirements
278 are violated (e.g. a string is compared with a number) or if a
279 variable needs to have two different types, then an error is raised
280 and the program will not run.
282 If the same variable is declared in both branchs of an 'if/else', or
283 in all cases of a 'switch' then the multiple instances may be merged
284 into just one variable if the variable is referenced after the
285 conditional statement. When this happens, the types must naturally be
286 consistent across all the branches. When the variable is not used
287 outside the if, the variables in the different branches are distinct
288 and can be of different types.
290 Undeclared names may only appear in "use" statements and "case" expressions.
291 These names are given a type of "label" and a unique value.
292 This allows them to fill the role of a name in an enumerated type, which
293 is useful for testing the `switch` statement.
295 As we will see, the condition part of a `while` statement can return
296 either a Boolean or some other type. This requires that the expected
297 type that gets passed around comprises a type and a flag to indicate
298 that `Tbool` is also permitted.
300 As there are, as yet, no distinct types that are compatible, there
301 isn't much subtlety in the analysis. When we have distinct number
302 types, this will become more interesting.
306 When analysis discovers an inconsistency it needs to report an error;
307 just refusing to run the code ensures that the error doesn't cascade,
308 but by itself it isn't very useful. A clear understanding of the sort
309 of error message that are useful will help guide the process of
312 At a simplistic level, the only sort of error that type analysis can
313 report is that the type of some construct doesn't match a contextual
314 requirement. For example, in `4 + "hello"` the addition provides a
315 contextual requirement for numbers, but `"hello"` is not a number. In
316 this particular example no further information is needed as the types
317 are obvious from local information. When a variable is involved that
318 isn't the case. It may be helpful to explain why the variable has a
319 particular type, by indicating the location where the type was set,
320 whether by declaration or usage.
322 Using a recursive-descent analysis we can easily detect a problem at
323 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
324 will detect that one argument is not a number and the usage of `hello`
325 will detect that a number was wanted, but not provided. In this
326 (early) version of the language, we will generate error reports at
327 multiple locations, so the use of `hello` will report an error and
328 explain were the value was set, and the addition will report an error
329 and say why numbers are needed. To be able to report locations for
330 errors, each language element will need to record a file location
331 (line and column) and each variable will need to record the language
332 element where its type was set. For now we will assume that each line
333 of an error message indicates one location in the file, and up to 2
334 types. So we provide a `printf`-like function which takes a format, a
335 location (a `struct exec` which has not yet been introduced), and 2
336 types. "`%1`" reports the first type, "`%2`" reports the second. We
337 will need a function to print the location, once we know how that is
338 stored. e As will be explained later, there are sometimes extra rules for
339 type matching and they might affect error messages, we need to pass those
342 As well as type errors, we sometimes need to report problems with
343 tokens, which might be unexpected or might name a type that has not
344 been defined. For these we have `tok_err()` which reports an error
345 with a given token. Each of the error functions sets the flag in the
346 context so indicate that parsing failed.
350 static void fput_loc(struct exec *loc, FILE *f);
352 ###### core functions
354 static void type_err(struct parse_context *c,
355 char *fmt, struct exec *loc,
356 struct type *t1, int rules, struct type *t2)
358 fprintf(stderr, "%s:", c->file_name);
359 fput_loc(loc, stderr);
360 for (; *fmt ; fmt++) {
367 case '%': fputc(*fmt, stderr); break; // NOTEST
368 default: fputc('?', stderr); break; // NOTEST
370 type_print(t1, stderr);
373 type_print(t2, stderr);
382 static void tok_err(struct parse_context *c, char *fmt, struct token *t)
384 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
385 t->txt.len, t->txt.txt);
389 ## Entities: declared and predeclared.
391 There are various "things" that the language and/or the interpreter
392 needs to know about to parse and execute a program. These include
393 types, variables, values, and executable code. These are all lumped
394 together under the term "entities" (calling them "objects" would be
395 confusing) and introduced here. The following section will present the
396 different specific code elements which comprise or manipulate these
401 Values come in a wide range of types, with more likely to be added.
402 Each type needs to be able to print its own values (for convenience at
403 least) as well as to compare two values, at least for equality and
404 possibly for order. For now, values might need to be duplicated and
405 freed, though eventually such manipulations will be better integrated
408 Rather than requiring every numeric type to support all numeric
409 operations (add, multiple, etc), we allow types to be able to present
410 as one of a few standard types: integer, float, and fraction. The
411 existence of these conversion functions eventually enable types to
412 determine if they are compatible with other types, though such types
413 have not yet been implemented.
415 Named type are stored in a simple linked list. Objects of each type are
416 "values" which are often passed around by value.
423 ## value union fields
431 void (*init)(struct type *type, struct value *val);
432 void (*prepare_type)(struct type *type);
433 void (*print)(struct type *type, struct value *val);
434 void (*print_type)(struct type *type, FILE *f);
435 int (*cmp_order)(struct type *t1, struct type *t2,
436 struct value *v1, struct value *v2);
437 int (*cmp_eq)(struct type *t1, struct type *t2,
438 struct value *v1, struct value *v2);
439 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
440 void (*free)(struct type *type, struct value *val);
441 void (*free_type)(struct type *t);
442 long long (*to_int)(struct value *v);
443 double (*to_float)(struct value *v);
444 int (*to_mpq)(mpq_t *q, struct value *v);
453 struct type *typelist;
457 static struct type *find_type(struct parse_context *c, struct text s)
459 struct type *l = c->typelist;
462 text_cmp(l->name, s) != 0)
467 static struct type *add_type(struct parse_context *c, struct text s,
472 n = calloc(1, sizeof(*n));
475 n->next = c->typelist;
480 static void free_type(struct type *t)
482 /* The type is always a reference to something in the
483 * context, so we don't need to free anything.
487 static void free_value(struct type *type, struct value *v)
493 static void type_print(struct type *type, FILE *f)
496 fputs("*unknown*type*", f);
497 else if (type->name.len)
498 fprintf(f, "%.*s", type->name.len, type->name.txt);
499 else if (type->print_type)
500 type->print_type(type, f);
502 fputs("*invalid*type*", f); // NOTEST
505 static void val_init(struct type *type, struct value *val)
507 if (type && type->init)
508 type->init(type, val);
511 static void dup_value(struct type *type,
512 struct value *vold, struct value *vnew)
514 if (type && type->dup)
515 type->dup(type, vold, vnew);
518 static int value_cmp(struct type *tl, struct type *tr,
519 struct value *left, struct value *right)
521 if (tl && tl->cmp_order)
522 return tl->cmp_order(tl, tr, left, right);
523 if (tl && tl->cmp_eq)
524 return tl->cmp_eq(tl, tr, left, right);
528 static void print_value(struct type *type, struct value *v)
530 if (type && type->print)
531 type->print(type, v);
533 printf("*Unknown*"); // NOTEST
536 static struct value *val_alloc(struct type *t, struct value *init)
540 ret = calloc(1, t->size);
542 memcpy(ret, init, t->size);
550 static void free_value(struct type *type, struct value *v);
551 static int type_compat(struct type *require, struct type *have, int rules);
552 static void type_print(struct type *type, FILE *f);
553 static void val_init(struct type *type, struct value *v);
554 static void dup_value(struct type *type,
555 struct value *vold, struct value *vnew);
556 static int value_cmp(struct type *tl, struct type *tr,
557 struct value *left, struct value *right);
558 static void print_value(struct type *type, struct value *v);
560 ###### free context types
562 while (context.typelist) {
563 struct type *t = context.typelist;
565 context.typelist = t->next;
573 Values of the base types can be numbers, which we represent as
574 multi-precision fractions, strings, Booleans and labels. When
575 analysing the program we also need to allow for places where no value
576 is meaningful (type `Tnone`) and where we don't know what type to
577 expect yet (type is `NULL`).
579 Values are never shared, they are always copied when used, and freed
580 when no longer needed.
582 When propagating type information around the program, we need to
583 determine if two types are compatible, where type `NULL` is compatible
584 with anything. There are two special cases with type compatibility,
585 both related to the Conditional Statement which will be described
586 later. In some cases a Boolean can be accepted as well as some other
587 primary type, and in others any type is acceptable except a label (`Vlabel`).
588 A separate function encoding these cases will simplify some code later.
592 int (*compat)(struct type *this, struct type *other);
596 static int type_compat(struct type *require, struct type *have, int rules)
598 if ((rules & Rboolok) && have == Tbool)
600 if ((rules & Rnolabel) && have == Tlabel)
602 if (!require || !have)
606 return require->compat(require, have);
608 return require == have;
613 #include "parse_string.h"
614 #include "parse_number.h"
617 myLDLIBS := libnumber.o libstring.o -lgmp
618 LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
620 ###### type union fields
621 enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
623 ###### value union fields
630 static void _free_value(struct type *type, struct value *v)
634 switch (type->vtype) {
636 case Vstr: free(v->str.txt); break;
637 case Vnum: mpq_clear(v->num); break;
643 ###### value functions
645 static void _val_init(struct type *type, struct value *val)
647 switch(type->vtype) {
648 case Vnone: // NOTEST
651 mpq_init(val->num); break;
653 val->str.txt = malloc(1);
659 case Vlabel: // NOTEST
660 val->label = NULL; // NOTEST
665 static void _dup_value(struct type *type,
666 struct value *vold, struct value *vnew)
668 switch (type->vtype) {
669 case Vnone: // NOTEST
672 vnew->label = vold->label;
675 vnew->bool = vold->bool;
679 mpq_set(vnew->num, vold->num);
682 vnew->str.len = vold->str.len;
683 vnew->str.txt = malloc(vnew->str.len);
684 memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
689 static int _value_cmp(struct type *tl, struct type *tr,
690 struct value *left, struct value *right)
694 return tl - tr; // NOTEST
696 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
697 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
698 case Vstr: cmp = text_cmp(left->str, right->str); break;
699 case Vbool: cmp = left->bool - right->bool; break;
700 case Vnone: cmp = 0; // NOTEST
705 static void _print_value(struct type *type, struct value *v)
707 switch (type->vtype) {
708 case Vnone: // NOTEST
709 printf("*no-value*"); break; // NOTEST
710 case Vlabel: // NOTEST
711 printf("*label-%p*", v->label); break; // NOTEST
713 printf("%.*s", v->str.len, v->str.txt); break;
715 printf("%s", v->bool ? "True":"False"); break;
720 mpf_set_q(fl, v->num);
721 gmp_printf("%Fg", fl);
728 static void _free_value(struct type *type, struct value *v);
730 static struct type base_prototype = {
732 .print = _print_value,
733 .cmp_order = _value_cmp,
734 .cmp_eq = _value_cmp,
739 static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
742 static struct type *add_base_type(struct parse_context *c, char *n,
743 enum vtype vt, int size)
745 struct text txt = { n, strlen(n) };
748 t = add_type(c, txt, &base_prototype);
751 t->align = size > sizeof(void*) ? sizeof(void*) : size;
752 if (t->size & (t->align - 1))
753 t->size = (t->size | (t->align - 1)) + 1;
757 ###### context initialization
759 Tbool = add_base_type(&context, "Boolean", Vbool, sizeof(char));
760 Tstr = add_base_type(&context, "string", Vstr, sizeof(struct text));
761 Tnum = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
762 Tnone = add_base_type(&context, "none", Vnone, 0);
763 Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
767 Variables are scoped named values. We store the names in a linked list
768 of "bindings" sorted in lexical order, and use sequential search and
775 struct binding *next; // in lexical order
779 This linked list is stored in the parse context so that "reduce"
780 functions can find or add variables, and so the analysis phase can
781 ensure that every variable gets a type.
785 struct binding *varlist; // In lexical order
789 static struct binding *find_binding(struct parse_context *c, struct text s)
791 struct binding **l = &c->varlist;
796 (cmp = text_cmp((*l)->name, s)) < 0)
800 n = calloc(1, sizeof(*n));
807 Each name can be linked to multiple variables defined in different
808 scopes. Each scope starts where the name is declared and continues
809 until the end of the containing code block. Scopes of a given name
810 cannot nest, so a declaration while a name is in-scope is an error.
812 ###### binding fields
813 struct variable *var;
817 struct variable *previous;
820 struct binding *name;
821 struct exec *where_decl;// where name was declared
822 struct exec *where_set; // where type was set
826 While the naming seems strange, we include local constants in the
827 definition of variables. A name declared `var := value` can
828 subsequently be changed, but a name declared `var ::= value` cannot -
831 ###### variable fields
834 Scopes in parallel branches can be partially merged. More
835 specifically, if a given name is declared in both branches of an
836 if/else then its scope is a candidate for merging. Similarly if
837 every branch of an exhaustive switch (e.g. has an "else" clause)
838 declares a given name, then the scopes from the branches are
839 candidates for merging.
841 Note that names declared inside a loop (which is only parallel to
842 itself) are never visible after the loop. Similarly names defined in
843 scopes which are not parallel, such as those started by `for` and
844 `switch`, are never visible after the scope. Only variables defined in
845 both `then` and `else` (including the implicit then after an `if`, and
846 excluding `then` used with `for`) and in all `case`s and `else` of a
847 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
849 Labels, which are a bit like variables, follow different rules.
850 Labels are not explicitly declared, but if an undeclared name appears
851 in a context where a label is legal, that effectively declares the
852 name as a label. The declaration remains in force (or in scope) at
853 least to the end of the immediately containing block and conditionally
854 in any larger containing block which does not declare the name in some
855 other way. Importantly, the conditional scope extension happens even
856 if the label is only used in one parallel branch of a conditional --
857 when used in one branch it is treated as having been declared in all
860 Merge candidates are tentatively visible beyond the end of the
861 branching statement which creates them. If the name is used, the
862 merge is affirmed and they become a single variable visible at the
863 outer layer. If not - if it is redeclared first - the merge lapses.
865 To track scopes we have an extra stack, implemented as a linked list,
866 which roughly parallels the parse stack and which is used exclusively
867 for scoping. When a new scope is opened, a new frame is pushed and
868 the child-count of the parent frame is incremented. This child-count
869 is used to distinguish between the first of a set of parallel scopes,
870 in which declared variables must not be in scope, and subsequent
871 branches, whether they may already be conditionally scoped.
873 To push a new frame *before* any code in the frame is parsed, we need a
874 grammar reduction. This is most easily achieved with a grammar
875 element which derives the empty string, and creates the new scope when
876 it is recognised. This can be placed, for example, between a keyword
877 like "if" and the code following it.
881 struct scope *parent;
887 struct scope *scope_stack;
890 static void scope_pop(struct parse_context *c)
892 struct scope *s = c->scope_stack;
894 c->scope_stack = s->parent;
899 static void scope_push(struct parse_context *c)
901 struct scope *s = calloc(1, sizeof(*s));
903 c->scope_stack->child_count += 1;
904 s->parent = c->scope_stack;
912 OpenScope -> ${ scope_push(c); }$
913 ClosePara -> ${ var_block_close(c, CloseParallel); }$
915 Each variable records a scope depth and is in one of four states:
917 - "in scope". This is the case between the declaration of the
918 variable and the end of the containing block, and also between
919 the usage with affirms a merge and the end of that block.
921 The scope depth is not greater than the current parse context scope
922 nest depth. When the block of that depth closes, the state will
923 change. To achieve this, all "in scope" variables are linked
924 together as a stack in nesting order.
926 - "pending". The "in scope" block has closed, but other parallel
927 scopes are still being processed. So far, every parallel block at
928 the same level that has closed has declared the name.
930 The scope depth is the depth of the last parallel block that
931 enclosed the declaration, and that has closed.
933 - "conditionally in scope". The "in scope" block and all parallel
934 scopes have closed, and no further mention of the name has been
935 seen. This state includes a secondary nest depth which records the
936 outermost scope seen since the variable became conditionally in
937 scope. If a use of the name is found, the variable becomes "in
938 scope" and that secondary depth becomes the recorded scope depth.
939 If the name is declared as a new variable, the old variable becomes
940 "out of scope" and the recorded scope depth stays unchanged.
942 - "out of scope". The variable is neither in scope nor conditionally
943 in scope. It is permanently out of scope now and can be removed from
944 the "in scope" stack.
946 ###### variable fields
947 int depth, min_depth;
948 enum { OutScope, PendingScope, CondScope, InScope } scope;
949 struct variable *in_scope;
953 struct variable *in_scope;
955 All variables with the same name are linked together using the
956 'previous' link. Those variable that have been affirmatively merged all
957 have a 'merged' pointer that points to one primary variable - the most
958 recently declared instance. When merging variables, we need to also
959 adjust the 'merged' pointer on any other variables that had previously
960 been merged with the one that will no longer be primary.
962 A variable that is no longer the most recent instance of a name may
963 still have "pending" scope, if it might still be merged with most
964 recent instance. These variables don't really belong in the
965 "in_scope" list, but are not immediately removed when a new instance
966 is found. Instead, they are detected and ignored when considering the
967 list of in_scope names.
969 ###### variable fields
970 struct variable *merged;
974 static void variable_merge(struct variable *primary, struct variable *secondary)
980 primary = primary->merged;
982 for (v = primary->previous; v; v=v->previous)
983 if (v == secondary || v == secondary->merged ||
984 v->merged == secondary ||
985 (v->merged && v->merged == secondary->merged)) {
991 ###### free context vars
993 while (context.varlist) {
994 struct binding *b = context.varlist;
995 struct variable *v = b->var;
996 context.varlist = b->next;
999 struct variable *t = v;
1002 free_value(t->type, t->val);
1005 // This is a global constant
1006 free_exec(t->where_decl);
1011 #### Manipulating Bindings
1013 When a name is conditionally visible, a new declaration discards the
1014 old binding - the condition lapses. Conversely a usage of the name
1015 affirms the visibility and extends it to the end of the containing
1016 block - i.e. the block that contains both the original declaration and
1017 the latest usage. This is determined from `min_depth`. When a
1018 conditionally visible variable gets affirmed like this, it is also
1019 merged with other conditionally visible variables with the same name.
1021 When we parse a variable declaration we either report an error if the
1022 name is currently bound, or create a new variable at the current nest
1023 depth if the name is unbound or bound to a conditionally scoped or
1024 pending-scope variable. If the previous variable was conditionally
1025 scoped, it and its homonyms becomes out-of-scope.
1027 When we parse a variable reference (including non-declarative assignment
1028 "foo = bar") we report an error if the name is not bound or is bound to
1029 a pending-scope variable; update the scope if the name is bound to a
1030 conditionally scoped variable; or just proceed normally if the named
1031 variable is in scope.
1033 When we exit a scope, any variables bound at this level are either
1034 marked out of scope or pending-scoped, depending on whether the scope
1035 was sequential or parallel. Here a "parallel" scope means the "then"
1036 or "else" part of a conditional, or any "case" or "else" branch of a
1037 switch. Other scopes are "sequential".
1039 When exiting a parallel scope we check if there are any variables that
1040 were previously pending and are still visible. If there are, then
1041 there weren't redeclared in the most recent scope, so they cannot be
1042 merged and must become out-of-scope. If it is not the first of
1043 parallel scopes (based on `child_count`), we check that there was a
1044 previous binding that is still pending-scope. If there isn't, the new
1045 variable must now be out-of-scope.
1047 When exiting a sequential scope that immediately enclosed parallel
1048 scopes, we need to resolve any pending-scope variables. If there was
1049 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1050 we need to mark all pending-scope variable as out-of-scope. Otherwise
1051 all pending-scope variables become conditionally scoped.
1054 enum closetype { CloseSequential, CloseParallel, CloseElse };
1056 ###### ast functions
1058 static struct variable *var_decl(struct parse_context *c, struct text s)
1060 struct binding *b = find_binding(c, s);
1061 struct variable *v = b->var;
1063 switch (v ? v->scope : OutScope) {
1065 /* Caller will report the error */
1069 v && v->scope == CondScope;
1071 v->scope = OutScope;
1075 v = calloc(1, sizeof(*v));
1076 v->previous = b->var;
1079 v->min_depth = v->depth = c->scope_depth;
1081 v->in_scope = c->in_scope;
1087 static struct variable *var_ref(struct parse_context *c, struct text s)
1089 struct binding *b = find_binding(c, s);
1090 struct variable *v = b->var;
1091 struct variable *v2;
1093 switch (v ? v->scope : OutScope) {
1096 /* Caller will report the error */
1099 /* All CondScope variables of this name need to be merged
1100 * and become InScope
1102 v->depth = v->min_depth;
1104 for (v2 = v->previous;
1105 v2 && v2->scope == CondScope;
1107 variable_merge(v, v2);
1115 static void var_block_close(struct parse_context *c, enum closetype ct)
1117 /* Close off all variables that are in_scope */
1118 struct variable *v, **vp, *v2;
1121 for (vp = &c->in_scope;
1122 v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1124 if (v->name->var == v) switch (ct) {
1126 case CloseParallel: /* handle PendingScope */
1130 if (c->scope_stack->child_count == 1)
1131 v->scope = PendingScope;
1132 else if (v->previous &&
1133 v->previous->scope == PendingScope)
1134 v->scope = PendingScope;
1135 else if (v->type == Tlabel)
1136 v->scope = PendingScope;
1137 else if (v->name->var == v)
1138 v->scope = OutScope;
1139 if (ct == CloseElse) {
1140 /* All Pending variables with this name
1141 * are now Conditional */
1143 v2 && v2->scope == PendingScope;
1145 v2->scope = CondScope;
1150 v2 && v2->scope == PendingScope;
1152 if (v2->type != Tlabel)
1153 v2->scope = OutScope;
1155 case OutScope: break;
1158 case CloseSequential:
1159 if (v->type == Tlabel)
1160 v->scope = PendingScope;
1163 v->scope = OutScope;
1166 /* There was no 'else', so we can only become
1167 * conditional if we know the cases were exhaustive,
1168 * and that doesn't mean anything yet.
1169 * So only labels become conditional..
1172 v2 && v2->scope == PendingScope;
1174 if (v2->type == Tlabel) {
1175 v2->scope = CondScope;
1176 v2->min_depth = c->scope_depth;
1178 v2->scope = OutScope;
1181 case OutScope: break;
1185 if (v->scope == OutScope || v->name->var != v)
1194 Executables can be lots of different things. In many cases an
1195 executable is just an operation combined with one or two other
1196 executables. This allows for expressions and lists etc. Other times an
1197 executable is something quite specific like a constant or variable name.
1198 So we define a `struct exec` to be a general executable with a type, and
1199 a `struct binode` which is a subclass of `exec`, forms a node in a
1200 binary tree, and holds an operation. There will be other subclasses,
1201 and to access these we need to be able to `cast` the `exec` into the
1202 various other types. The first field in any `struct exec` is the type
1203 from the `exec_types` enum.
1206 #define cast(structname, pointer) ({ \
1207 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1208 if (__mptr && *__mptr != X##structname) abort(); \
1209 (struct structname *)( (char *)__mptr);})
1211 #define new(structname) ({ \
1212 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1213 __ptr->type = X##structname; \
1214 __ptr->line = -1; __ptr->column = -1; \
1217 #define new_pos(structname, token) ({ \
1218 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1219 __ptr->type = X##structname; \
1220 __ptr->line = token.line; __ptr->column = token.col; \
1229 enum exec_types type;
1237 struct exec *left, *right;
1240 ###### ast functions
1242 static int __fput_loc(struct exec *loc, FILE *f)
1246 if (loc->line >= 0) {
1247 fprintf(f, "%d:%d: ", loc->line, loc->column);
1250 if (loc->type == Xbinode)
1251 return __fput_loc(cast(binode,loc)->left, f) ||
1252 __fput_loc(cast(binode,loc)->right, f);
1255 static void fput_loc(struct exec *loc, FILE *f)
1257 if (!__fput_loc(loc, f))
1258 fprintf(f, "??:??: "); // NOTEST
1261 Each different type of `exec` node needs a number of functions defined,
1262 a bit like methods. We must be able to free it, print it, analyse it
1263 and execute it. Once we have specific `exec` types we will need to
1264 parse them too. Let's take this a bit more slowly.
1268 The parser generator requires a `free_foo` function for each struct
1269 that stores attributes and they will often be `exec`s and subtypes
1270 there-of. So we need `free_exec` which can handle all the subtypes,
1271 and we need `free_binode`.
1273 ###### ast functions
1275 static void free_binode(struct binode *b)
1280 free_exec(b->right);
1284 ###### core functions
1285 static void free_exec(struct exec *e)
1294 ###### forward decls
1296 static void free_exec(struct exec *e);
1298 ###### free exec cases
1299 case Xbinode: free_binode(cast(binode, e)); break;
1303 Printing an `exec` requires that we know the current indent level for
1304 printing line-oriented components. As will become clear later, we
1305 also want to know what sort of bracketing to use.
1307 ###### ast functions
1309 static void do_indent(int i, char *str)
1316 ###### core functions
1317 static void print_binode(struct binode *b, int indent, int bracket)
1321 ## print binode cases
1325 static void print_exec(struct exec *e, int indent, int bracket)
1331 print_binode(cast(binode, e), indent, bracket); break;
1336 ###### forward decls
1338 static void print_exec(struct exec *e, int indent, int bracket);
1342 As discussed, analysis involves propagating type requirements around the
1343 program and looking for errors.
1345 So `propagate_types` is passed an expected type (being a `struct type`
1346 pointer together with some `val_rules` flags) that the `exec` is
1347 expected to return, and returns the type that it does return, either
1348 of which can be `NULL` signifying "unknown". An `ok` flag is passed
1349 by reference. It is set to `0` when an error is found, and `2` when
1350 any change is made. If it remains unchanged at `1`, then no more
1351 propagation is needed.
1355 enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1359 if (rules & Rnolabel)
1360 fputs(" (labels not permitted)", stderr);
1363 ###### core functions
1365 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1366 struct type *type, int rules);
1367 static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1368 struct type *type, int rules)
1375 switch (prog->type) {
1378 struct binode *b = cast(binode, prog);
1380 ## propagate binode cases
1384 ## propagate exec cases
1389 static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1390 struct type *type, int rules)
1392 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1401 Interpreting an `exec` doesn't require anything but the `exec`. State
1402 is stored in variables and each variable will be directly linked from
1403 within the `exec` tree. The exception to this is the whole `program`
1404 which needs to look at command line arguments. The `program` will be
1405 interpreted separately.
1407 Each `exec` can return a value combined with a type in `struct lrval`.
1408 The type may be `Tnone` but must be non-NULL. Some `exec`s will return
1409 the location of a value, which can be updated, in `lval`. Others will
1410 set `lval` to NULL indicating that there is a value of appropriate type
1414 ###### core functions
1418 struct value rval, *lval;
1421 static struct lrval _interp_exec(struct exec *e);
1423 static struct value interp_exec(struct exec *e, struct type **typeret)
1425 struct lrval ret = _interp_exec(e);
1427 if (!ret.type) abort();
1429 *typeret = ret.type;
1431 dup_value(ret.type, ret.lval, &ret.rval);
1435 static struct value *linterp_exec(struct exec *e, struct type **typeret)
1437 struct lrval ret = _interp_exec(e);
1440 *typeret = ret.type;
1442 free_value(ret.type, &ret.rval);
1446 static struct lrval _interp_exec(struct exec *e)
1449 struct value rv = {}, *lrv = NULL;
1450 struct type *rvtype;
1452 rvtype = ret.type = Tnone;
1462 struct binode *b = cast(binode, e);
1463 struct value left, right, *lleft;
1464 struct type *ltype, *rtype;
1465 ltype = rtype = Tnone;
1467 ## interp binode cases
1469 free_value(ltype, &left);
1470 free_value(rtype, &right);
1473 ## interp exec cases
1483 Now that we have the shape of the interpreter in place we can add some
1484 complex types and connected them in to the data structures and the
1485 different phases of parse, analyse, print, interpret.
1487 Thus far we have arrays and structs.
1491 Arrays can be declared by giving a size and a type, as `[size]type' so
1492 `freq:[26]number` declares `freq` to be an array of 26 numbers. The
1493 size can be either a literal number, or a named constant. Some day an
1494 arbitrary expression will be supported.
1496 Arrays cannot be assigned. When pointers are introduced we will also
1497 introduce array slices which can refer to part or all of an array -
1498 the assignment syntax will create a slice. For now, an array can only
1499 ever be referenced by the name it is declared with. It is likely that
1500 a "`copy`" primitive will eventually be define which can be used to
1501 make a copy of an array with controllable recursive depth.
1503 ###### type union fields
1507 struct variable *vsize;
1508 struct type *member;
1511 ###### value functions
1513 static void array_prepare_type(struct type *type)
1516 if (!type->array.vsize)
1520 mpz_tdiv_q(q, mpq_numref(type->array.vsize->val->num),
1521 mpq_denref(type->array.vsize->val->num));
1522 type->array.size = mpz_get_si(q);
1525 type->size = type->array.size * type->array.member->size;
1526 type->align = type->array.member->align;
1529 static void array_init(struct type *type, struct value *val)
1535 for (i = 0; i < type->array.size; i++) {
1537 v = (void*)val->ptr + i * type->array.member->size;
1538 val_init(type->array.member, v);
1542 static void array_free(struct type *type, struct value *val)
1546 for (i = 0; i < type->array.size; i++) {
1548 v = (void*)val->ptr + i * type->array.member->size;
1549 free_value(type->array.member, v);
1553 static int array_compat(struct type *require, struct type *have)
1555 if (have->compat != require->compat)
1557 /* Both are arrays, so we can look at details */
1558 if (!type_compat(require->array.member, have->array.member, 0))
1560 if (require->array.vsize == NULL && have->array.vsize == NULL)
1561 return require->array.size == have->array.size;
1563 return require->array.vsize == have->array.vsize;
1566 static void array_print_type(struct type *type, FILE *f)
1569 if (type->array.vsize) {
1570 struct binding *b = type->array.vsize->name;
1571 fprintf(f, "%.*s]", b->name.len, b->name.txt);
1573 fprintf(f, "%d]", type->array.size);
1574 type_print(type->array.member, f);
1577 static struct type array_prototype = {
1579 .prepare_type = array_prepare_type,
1580 .print_type = array_print_type,
1581 .compat = array_compat,
1585 ###### declare terminals
1590 | [ NUMBER ] Type ${ {
1593 struct text noname = { "", 0 };
1596 $0 = t = add_type(c, noname, &array_prototype);
1597 t->array.member = $<4;
1598 t->array.vsize = NULL;
1599 if (number_parse(num, tail, $2.txt) == 0)
1600 tok_err(c, "error: unrecognised number", &$2);
1602 tok_err(c, "error: unsupported number suffix", &$2);
1604 t->array.size = mpz_get_ui(mpq_numref(num));
1605 if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1606 tok_err(c, "error: array size must be an integer",
1608 } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1609 tok_err(c, "error: array size is too large",
1613 t->size = t->array.size * t->array.member->size;
1614 t->align = t->array.member->align;
1617 | [ IDENTIFIER ] Type ${ {
1618 struct variable *v = var_ref(c, $2.txt);
1619 struct text noname = { "", 0 };
1622 tok_err(c, "error: name undeclared", &$2);
1623 else if (!v->constant)
1624 tok_err(c, "error: array size must be a constant", &$2);
1626 $0 = add_type(c, noname, &array_prototype);
1627 $0->array.member = $<4;
1629 $0->array.vsize = v;
1635 ###### variable grammar
1637 | Variable [ Expression ] ${ {
1638 struct binode *b = new(binode);
1645 ###### print binode cases
1647 print_exec(b->left, -1, bracket);
1649 print_exec(b->right, -1, bracket);
1653 ###### propagate binode cases
1655 /* left must be an array, right must be a number,
1656 * result is the member type of the array
1658 propagate_types(b->right, c, ok, Tnum, 0);
1659 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1660 if (!t || t->compat != array_compat) {
1661 type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1664 if (!type_compat(type, t->array.member, rules)) {
1665 type_err(c, "error: have %1 but need %2", prog,
1666 t->array.member, rules, type);
1668 return t->array.member;
1672 ###### interp binode cases
1677 lleft = linterp_exec(b->left, <ype);
1678 right = interp_exec(b->right, &rtype);
1680 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1684 rvtype = ltype->array.member;
1685 if (i >= 0 && i < ltype->array.size)
1686 lrv = (void*)lleft + i * rvtype->size;
1688 val_init(ltype->array.member, &rv);
1695 A `struct` is a data-type that contains one or more other data-types.
1696 It differs from an array in that each member can be of a different
1697 type, and they are accessed by name rather than by number. Thus you
1698 cannot choose an element by calculation, you need to know what you
1701 The language makes no promises about how a given structure will be
1702 stored in memory - it is free to rearrange fields to suit whatever
1703 criteria seems important.
1705 Structs are declared separately from program code - they cannot be
1706 declared in-line in a variable declaration like arrays can. A struct
1707 is given a name and this name is used to identify the type - the name
1708 is not prefixed by the word `struct` as it would be in C.
1710 Structs are only treated as the same if they have the same name.
1711 Simply having the same fields in the same order is not enough. This
1712 might change once we can create structure initializers from a list of
1715 Each component datum is identified much like a variable is declared,
1716 with a name, one or two colons, and a type. The type cannot be omitted
1717 as there is no opportunity to deduce the type from usage. An initial
1718 value can be given following an equals sign, so
1720 ##### Example: a struct type
1726 would declare a type called "complex" which has two number fields,
1727 each initialised to zero.
1729 Struct will need to be declared separately from the code that uses
1730 them, so we will need to be able to print out the declaration of a
1731 struct when reprinting the whole program. So a `print_type_decl` type
1732 function will be needed.
1734 ###### type union fields
1746 ###### type functions
1747 void (*print_type_decl)(struct type *type, FILE *f);
1749 ###### value functions
1751 static void structure_init(struct type *type, struct value *val)
1755 for (i = 0; i < type->structure.nfields; i++) {
1757 v = (void*) val->ptr + type->structure.fields[i].offset;
1758 val_init(type->structure.fields[i].type, v);
1762 static void structure_free(struct type *type, struct value *val)
1766 for (i = 0; i < type->structure.nfields; i++) {
1768 v = (void*)val->ptr + type->structure.fields[i].offset;
1769 free_value(type->structure.fields[i].type, v);
1773 static void structure_free_type(struct type *t)
1776 for (i = 0; i < t->structure.nfields; i++)
1777 if (t->structure.fields[i].init) {
1778 free_value(t->structure.fields[i].type,
1779 t->structure.fields[i].init);
1780 free(t->structure.fields[i].init);
1782 free(t->structure.fields);
1785 static struct type structure_prototype = {
1786 .init = structure_init,
1787 .free = structure_free,
1788 .free_type = structure_free_type,
1789 .print_type_decl = structure_print_type,
1803 ###### free exec cases
1805 free_exec(cast(fieldref, e)->left);
1809 ###### declare terminals
1812 ###### variable grammar
1814 | Variable . IDENTIFIER ${ {
1815 struct fieldref *fr = new_pos(fieldref, $2);
1822 ###### print exec cases
1826 struct fieldref *f = cast(fieldref, e);
1827 print_exec(f->left, -1, bracket);
1828 printf(".%.*s", f->name.len, f->name.txt);
1832 ###### ast functions
1833 static int find_struct_index(struct type *type, struct text field)
1836 for (i = 0; i < type->structure.nfields; i++)
1837 if (text_cmp(type->structure.fields[i].name, field) == 0)
1842 ###### propagate exec cases
1846 struct fieldref *f = cast(fieldref, prog);
1847 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1850 type_err(c, "error: unknown type for field access", f->left,
1852 else if (st->init != structure_init)
1853 type_err(c, "error: field reference attempted on %1, not a struct",
1854 f->left, st, 0, NULL);
1855 else if (f->index == -2) {
1856 f->index = find_struct_index(st, f->name);
1858 type_err(c, "error: cannot find requested field in %1",
1859 f->left, st, 0, NULL);
1861 if (f->index >= 0) {
1862 struct type *ft = st->structure.fields[f->index].type;
1863 if (!type_compat(type, ft, rules))
1864 type_err(c, "error: have %1 but need %2", prog,
1871 ###### interp exec cases
1874 struct fieldref *f = cast(fieldref, e);
1876 struct value *lleft = linterp_exec(f->left, <ype);
1877 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
1878 rvtype = ltype->structure.fields[f->index].type;
1884 struct fieldlist *prev;
1888 ###### ast functions
1889 static void free_fieldlist(struct fieldlist *f)
1893 free_fieldlist(f->prev);
1895 free_value(f->f.type, f->f.init);
1901 ###### top level grammar
1902 DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
1904 add_type(c, $2.txt, &structure_prototype);
1906 struct fieldlist *f;
1908 for (f = $3; f; f=f->prev)
1911 t->structure.nfields = cnt;
1912 t->structure.fields = calloc(cnt, sizeof(struct field));
1915 int a = f->f.type->align;
1917 t->structure.fields[cnt] = f->f;
1918 if (t->size & (a-1))
1919 t->size = (t->size | (a-1)) + 1;
1920 t->structure.fields[cnt].offset = t->size;
1921 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
1930 FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
1931 | { SimpleFieldList } ${ $0 = $<SFL; }$
1932 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
1933 | SimpleFieldList EOL ${ $0 = $<SFL; }$
1935 FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
1936 | FieldLines SimpleFieldList Newlines ${
1941 SimpleFieldList -> Field ${ $0 = $<F; }$
1942 | SimpleFieldList ; Field ${
1946 | SimpleFieldList ; ${
1949 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
1951 Field -> IDENTIFIER : Type = Expression ${ {
1954 $0 = calloc(1, sizeof(struct fieldlist));
1955 $0->f.name = $1.txt;
1960 propagate_types($<5, c, &ok, $3, 0);
1965 struct value vl = interp_exec($5, NULL);
1966 $0->f.init = val_alloc($0->f.type, &vl);
1969 | IDENTIFIER : Type ${
1970 $0 = calloc(1, sizeof(struct fieldlist));
1971 $0->f.name = $1.txt;
1973 $0->f.init = val_alloc($0->f.type, NULL);
1976 ###### forward decls
1977 static void structure_print_type(struct type *t, FILE *f);
1979 ###### value functions
1980 static void structure_print_type(struct type *t, FILE *f)
1984 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
1986 for (i = 0; i < t->structure.nfields; i++) {
1987 struct field *fl = t->structure.fields + i;
1988 fprintf(f, " %.*s : ", fl->name.len, fl->name.txt);
1989 type_print(fl->type, f);
1990 if (fl->type->print && fl->init) {
1992 if (fl->type == Tstr)
1994 print_value(fl->type, fl->init);
1995 if (fl->type == Tstr)
2002 ###### print type decls
2007 while (target != 0) {
2009 for (t = context.typelist; t ; t=t->next)
2010 if (t->print_type_decl) {
2019 t->print_type_decl(t, stdout);
2025 ## Executables: the elements of code
2027 Each code element needs to be parsed, printed, analysed,
2028 interpreted, and freed. There are several, so let's just start with
2029 the easy ones and work our way up.
2033 We have already met values as separate objects. When manifest
2034 constants appear in the program text, that must result in an executable
2035 which has a constant value. So the `val` structure embeds a value in
2048 ###### ast functions
2049 struct val *new_val(struct type *T, struct token tk)
2051 struct val *v = new_pos(val, tk);
2062 $0 = new_val(Tbool, $1);
2066 $0 = new_val(Tbool, $1);
2070 $0 = new_val(Tnum, $1);
2073 if (number_parse($0->val.num, tail, $1.txt) == 0)
2074 mpq_init($0->val.num);
2076 tok_err(c, "error: unsupported number suffix",
2081 $0 = new_val(Tstr, $1);
2084 string_parse(&$1, '\\', &$0->val.str, tail);
2086 tok_err(c, "error: unsupported string suffix",
2091 $0 = new_val(Tstr, $1);
2094 string_parse(&$1, '\\', &$0->val.str, tail);
2096 tok_err(c, "error: unsupported string suffix",
2101 ###### print exec cases
2104 struct val *v = cast(val, e);
2105 if (v->vtype == Tstr)
2107 print_value(v->vtype, &v->val);
2108 if (v->vtype == Tstr)
2113 ###### propagate exec cases
2116 struct val *val = cast(val, prog);
2117 if (!type_compat(type, val->vtype, rules))
2118 type_err(c, "error: expected %1%r found %2",
2119 prog, type, rules, val->vtype);
2123 ###### interp exec cases
2125 rvtype = cast(val, e)->vtype;
2126 dup_value(rvtype, &cast(val, e)->val, &rv);
2129 ###### ast functions
2130 static void free_val(struct val *v)
2133 free_value(v->vtype, &v->val);
2137 ###### free exec cases
2138 case Xval: free_val(cast(val, e)); break;
2140 ###### ast functions
2141 // Move all nodes from 'b' to 'rv', reversing their order.
2142 // In 'b' 'left' is a list, and 'right' is the last node.
2143 // In 'rv', left' is the first node and 'right' is a list.
2144 static struct binode *reorder_bilist(struct binode *b)
2146 struct binode *rv = NULL;
2149 struct exec *t = b->right;
2153 b = cast(binode, b->left);
2163 Just as we used a `val` to wrap a value into an `exec`, we similarly
2164 need a `var` to wrap a `variable` into an exec. While each `val`
2165 contained a copy of the value, each `var` holds a link to the variable
2166 because it really is the same variable no matter where it appears.
2167 When a variable is used, we need to remember to follow the `->merged`
2168 link to find the primary instance.
2176 struct variable *var;
2184 VariableDecl -> IDENTIFIER : ${ {
2185 struct variable *v = var_decl(c, $1.txt);
2186 $0 = new_pos(var, $1);
2191 v = var_ref(c, $1.txt);
2193 type_err(c, "error: variable '%v' redeclared",
2195 type_err(c, "info: this is where '%v' was first declared",
2196 v->where_decl, NULL, 0, NULL);
2199 | IDENTIFIER :: ${ {
2200 struct variable *v = var_decl(c, $1.txt);
2201 $0 = new_pos(var, $1);
2207 v = var_ref(c, $1.txt);
2209 type_err(c, "error: variable '%v' redeclared",
2211 type_err(c, "info: this is where '%v' was first declared",
2212 v->where_decl, NULL, 0, NULL);
2215 | IDENTIFIER : Type ${ {
2216 struct variable *v = var_decl(c, $1.txt);
2217 $0 = new_pos(var, $1);
2225 v = var_ref(c, $1.txt);
2227 type_err(c, "error: variable '%v' redeclared",
2229 type_err(c, "info: this is where '%v' was first declared",
2230 v->where_decl, NULL, 0, NULL);
2233 | IDENTIFIER :: Type ${ {
2234 struct variable *v = var_decl(c, $1.txt);
2235 $0 = new_pos(var, $1);
2244 v = var_ref(c, $1.txt);
2246 type_err(c, "error: variable '%v' redeclared",
2248 type_err(c, "info: this is where '%v' was first declared",
2249 v->where_decl, NULL, 0, NULL);
2254 Variable -> IDENTIFIER ${ {
2255 struct variable *v = var_ref(c, $1.txt);
2256 $0 = new_pos(var, $1);
2258 /* This might be a label - allocate a var just in case */
2259 v = var_decl(c, $1.txt);
2267 cast(var, $0)->var = v;
2272 Type -> IDENTIFIER ${
2273 $0 = find_type(c, $1.txt);
2276 "error: undefined type", &$1);
2283 ###### print exec cases
2286 struct var *v = cast(var, e);
2288 struct binding *b = v->var->name;
2289 printf("%.*s", b->name.len, b->name.txt);
2296 if (loc->type == Xvar) {
2297 struct var *v = cast(var, loc);
2299 struct binding *b = v->var->name;
2300 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2302 fputs("???", stderr); // NOTEST
2304 fputs("NOTVAR", stderr); // NOTEST
2307 ###### propagate exec cases
2311 struct var *var = cast(var, prog);
2312 struct variable *v = var->var;
2314 type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2315 return Tnone; // NOTEST
2319 if (v->constant && (rules & Rnoconstant)) {
2320 type_err(c, "error: Cannot assign to a constant: %v",
2321 prog, NULL, 0, NULL);
2322 type_err(c, "info: name was defined as a constant here",
2323 v->where_decl, NULL, 0, NULL);
2326 if (v->type == Tnone && v->where_decl == prog)
2327 type_err(c, "error: variable used but not declared: %v",
2328 prog, NULL, 0, NULL);
2329 if (v->type == NULL) {
2330 if (type && *ok != 0) {
2333 v->where_set = prog;
2338 if (!type_compat(type, v->type, rules)) {
2339 type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2340 type, rules, v->type);
2341 type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2342 v->type, rules, NULL);
2349 ###### interp exec cases
2352 struct var *var = cast(var, e);
2353 struct variable *v = var->var;
2362 ###### ast functions
2364 static void free_var(struct var *v)
2369 ###### free exec cases
2370 case Xvar: free_var(cast(var, e)); break;
2372 ### Expressions: Conditional
2374 Our first user of the `binode` will be conditional expressions, which
2375 is a bit odd as they actually have three components. That will be
2376 handled by having 2 binodes for each expression. The conditional
2377 expression is the lowest precedence operator which is why we define it
2378 first - to start the precedence list.
2380 Conditional expressions are of the form "value `if` condition `else`
2381 other_value". They associate to the right, so everything to the right
2382 of `else` is part of an else value, while only a higher-precedence to
2383 the left of `if` is the if values. Between `if` and `else` there is no
2384 room for ambiguity, so a full conditional expression is allowed in
2396 Expression -> Expression if Expression else Expression $$ifelse ${ {
2397 struct binode *b1 = new(binode);
2398 struct binode *b2 = new(binode);
2407 ## expression grammar
2409 ###### print binode cases
2412 b2 = cast(binode, b->right);
2413 if (bracket) printf("(");
2414 print_exec(b2->left, -1, bracket);
2416 print_exec(b->left, -1, bracket);
2418 print_exec(b2->right, -1, bracket);
2419 if (bracket) printf(")");
2422 ###### propagate binode cases
2425 /* cond must be Tbool, others must match */
2426 struct binode *b2 = cast(binode, b->right);
2429 propagate_types(b->left, c, ok, Tbool, 0);
2430 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2431 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2435 ###### interp binode cases
2438 struct binode *b2 = cast(binode, b->right);
2439 left = interp_exec(b->left, <ype);
2441 rv = interp_exec(b2->left, &rvtype);
2443 rv = interp_exec(b2->right, &rvtype);
2447 ### Expressions: Boolean
2449 The next class of expressions to use the `binode` will be Boolean
2450 expressions. "`and then`" and "`or else`" are similar to `and` and `or`
2451 have same corresponding precendence. The difference is that they don't
2452 evaluate the second expression if not necessary.
2461 ###### expr precedence
2466 ###### expression grammar
2467 | Expression or Expression ${ {
2468 struct binode *b = new(binode);
2474 | Expression or else Expression ${ {
2475 struct binode *b = new(binode);
2482 | Expression and Expression ${ {
2483 struct binode *b = new(binode);
2489 | Expression and then Expression ${ {
2490 struct binode *b = new(binode);
2497 | not Expression ${ {
2498 struct binode *b = new(binode);
2504 ###### print binode cases
2506 if (bracket) printf("(");
2507 print_exec(b->left, -1, bracket);
2509 print_exec(b->right, -1, bracket);
2510 if (bracket) printf(")");
2513 if (bracket) printf("(");
2514 print_exec(b->left, -1, bracket);
2515 printf(" and then ");
2516 print_exec(b->right, -1, bracket);
2517 if (bracket) printf(")");
2520 if (bracket) printf("(");
2521 print_exec(b->left, -1, bracket);
2523 print_exec(b->right, -1, bracket);
2524 if (bracket) printf(")");
2527 if (bracket) printf("(");
2528 print_exec(b->left, -1, bracket);
2529 printf(" or else ");
2530 print_exec(b->right, -1, bracket);
2531 if (bracket) printf(")");
2534 if (bracket) printf("(");
2536 print_exec(b->right, -1, bracket);
2537 if (bracket) printf(")");
2540 ###### propagate binode cases
2546 /* both must be Tbool, result is Tbool */
2547 propagate_types(b->left, c, ok, Tbool, 0);
2548 propagate_types(b->right, c, ok, Tbool, 0);
2549 if (type && type != Tbool)
2550 type_err(c, "error: %1 operation found where %2 expected", prog,
2554 ###### interp binode cases
2556 rv = interp_exec(b->left, &rvtype);
2557 right = interp_exec(b->right, &rtype);
2558 rv.bool = rv.bool && right.bool;
2561 rv = interp_exec(b->left, &rvtype);
2563 rv = interp_exec(b->right, NULL);
2566 rv = interp_exec(b->left, &rvtype);
2567 right = interp_exec(b->right, &rtype);
2568 rv.bool = rv.bool || right.bool;
2571 rv = interp_exec(b->left, &rvtype);
2573 rv = interp_exec(b->right, NULL);
2576 rv = interp_exec(b->right, &rvtype);
2580 ### Expressions: Comparison
2582 Of slightly higher precedence that Boolean expressions are Comparisons.
2583 A comparison takes arguments of any comparable type, but the two types
2586 To simplify the parsing we introduce an `eop` which can record an
2587 expression operator, and the `CMPop` non-terminal will match one of them.
2594 ###### ast functions
2595 static void free_eop(struct eop *e)
2609 ###### expr precedence
2610 $LEFT < > <= >= == != CMPop
2612 ###### expression grammar
2613 | Expression CMPop Expression ${ {
2614 struct binode *b = new(binode);
2624 CMPop -> < ${ $0.op = Less; }$
2625 | > ${ $0.op = Gtr; }$
2626 | <= ${ $0.op = LessEq; }$
2627 | >= ${ $0.op = GtrEq; }$
2628 | == ${ $0.op = Eql; }$
2629 | != ${ $0.op = NEql; }$
2631 ###### print binode cases
2639 if (bracket) printf("(");
2640 print_exec(b->left, -1, bracket);
2642 case Less: printf(" < "); break;
2643 case LessEq: printf(" <= "); break;
2644 case Gtr: printf(" > "); break;
2645 case GtrEq: printf(" >= "); break;
2646 case Eql: printf(" == "); break;
2647 case NEql: printf(" != "); break;
2648 default: abort(); // NOTEST
2650 print_exec(b->right, -1, bracket);
2651 if (bracket) printf(")");
2654 ###### propagate binode cases
2661 /* Both must match but not be labels, result is Tbool */
2662 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2664 propagate_types(b->right, c, ok, t, 0);
2666 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2668 t = propagate_types(b->left, c, ok, t, 0);
2670 if (!type_compat(type, Tbool, 0))
2671 type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2672 Tbool, rules, type);
2675 ###### interp binode cases
2684 left = interp_exec(b->left, <ype);
2685 right = interp_exec(b->right, &rtype);
2686 cmp = value_cmp(ltype, rtype, &left, &right);
2689 case Less: rv.bool = cmp < 0; break;
2690 case LessEq: rv.bool = cmp <= 0; break;
2691 case Gtr: rv.bool = cmp > 0; break;
2692 case GtrEq: rv.bool = cmp >= 0; break;
2693 case Eql: rv.bool = cmp == 0; break;
2694 case NEql: rv.bool = cmp != 0; break;
2695 default: rv.bool = 0; break; // NOTEST
2700 ### Expressions: The rest
2702 The remaining expressions with the highest precedence are arithmetic,
2703 string concatenation, and string conversion. String concatenation
2704 (`++`) has the same precedence as multiplication and division, but lower
2707 String conversion is a temporary feature until I get a better type
2708 system. `$` is a prefix operator which expects a string and returns
2711 `+` and `-` are both infix and prefix operations (where they are
2712 absolute value and negation). These have different operator names.
2714 We also have a 'Bracket' operator which records where parentheses were
2715 found. This makes it easy to reproduce these when printing. Possibly I
2716 should only insert brackets were needed for precedence.
2726 ###### expr precedence
2732 ###### expression grammar
2733 | Expression Eop Expression ${ {
2734 struct binode *b = new(binode);
2741 | Expression Top Expression ${ {
2742 struct binode *b = new(binode);
2749 | ( Expression ) ${ {
2750 struct binode *b = new_pos(binode, $1);
2755 | Uop Expression ${ {
2756 struct binode *b = new(binode);
2761 | Value ${ $0 = $<1; }$
2762 | Variable ${ $0 = $<1; }$
2765 Eop -> + ${ $0.op = Plus; }$
2766 | - ${ $0.op = Minus; }$
2768 Uop -> + ${ $0.op = Absolute; }$
2769 | - ${ $0.op = Negate; }$
2770 | $ ${ $0.op = StringConv; }$
2772 Top -> * ${ $0.op = Times; }$
2773 | / ${ $0.op = Divide; }$
2774 | % ${ $0.op = Rem; }$
2775 | ++ ${ $0.op = Concat; }$
2777 ###### print binode cases
2784 if (bracket) printf("(");
2785 print_exec(b->left, indent, bracket);
2787 case Plus: fputs(" + ", stdout); break;
2788 case Minus: fputs(" - ", stdout); break;
2789 case Times: fputs(" * ", stdout); break;
2790 case Divide: fputs(" / ", stdout); break;
2791 case Rem: fputs(" % ", stdout); break;
2792 case Concat: fputs(" ++ ", stdout); break;
2793 default: abort(); // NOTEST
2795 print_exec(b->right, indent, bracket);
2796 if (bracket) printf(")");
2801 if (bracket) printf("(");
2803 case Absolute: fputs("+", stdout); break;
2804 case Negate: fputs("-", stdout); break;
2805 case StringConv: fputs("$", stdout); break;
2806 default: abort(); // NOTEST
2808 print_exec(b->right, indent, bracket);
2809 if (bracket) printf(")");
2813 print_exec(b->right, indent, bracket);
2817 ###### propagate binode cases
2823 /* both must be numbers, result is Tnum */
2826 /* as propagate_types ignores a NULL,
2827 * unary ops fit here too */
2828 propagate_types(b->left, c, ok, Tnum, 0);
2829 propagate_types(b->right, c, ok, Tnum, 0);
2830 if (!type_compat(type, Tnum, 0))
2831 type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2836 /* both must be Tstr, result is Tstr */
2837 propagate_types(b->left, c, ok, Tstr, 0);
2838 propagate_types(b->right, c, ok, Tstr, 0);
2839 if (!type_compat(type, Tstr, 0))
2840 type_err(c, "error: Concat returns %1 but %2 expected", prog,
2845 /* op must be string, result is number */
2846 propagate_types(b->left, c, ok, Tstr, 0);
2847 if (!type_compat(type, Tnum, 0))
2849 "error: Can only convert string to number, not %1",
2850 prog, type, 0, NULL);
2854 return propagate_types(b->right, c, ok, type, 0);
2856 ###### interp binode cases
2859 rv = interp_exec(b->left, &rvtype);
2860 right = interp_exec(b->right, &rtype);
2861 mpq_add(rv.num, rv.num, right.num);
2864 rv = interp_exec(b->left, &rvtype);
2865 right = interp_exec(b->right, &rtype);
2866 mpq_sub(rv.num, rv.num, right.num);
2869 rv = interp_exec(b->left, &rvtype);
2870 right = interp_exec(b->right, &rtype);
2871 mpq_mul(rv.num, rv.num, right.num);
2874 rv = interp_exec(b->left, &rvtype);
2875 right = interp_exec(b->right, &rtype);
2876 mpq_div(rv.num, rv.num, right.num);
2881 left = interp_exec(b->left, <ype);
2882 right = interp_exec(b->right, &rtype);
2883 mpz_init(l); mpz_init(r); mpz_init(rem);
2884 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2885 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2886 mpz_tdiv_r(rem, l, r);
2887 val_init(Tnum, &rv);
2888 mpq_set_z(rv.num, rem);
2889 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2894 rv = interp_exec(b->right, &rvtype);
2895 mpq_neg(rv.num, rv.num);
2898 rv = interp_exec(b->right, &rvtype);
2899 mpq_abs(rv.num, rv.num);
2902 rv = interp_exec(b->right, &rvtype);
2905 left = interp_exec(b->left, <ype);
2906 right = interp_exec(b->right, &rtype);
2908 rv.str = text_join(left.str, right.str);
2911 right = interp_exec(b->right, &rvtype);
2915 struct text tx = right.str;
2918 if (tx.txt[0] == '-') {
2923 if (number_parse(rv.num, tail, tx) == 0)
2926 mpq_neg(rv.num, rv.num);
2928 printf("Unsupported suffix: %.*s\n", tx.len, tx.txt);
2932 ###### value functions
2934 static struct text text_join(struct text a, struct text b)
2937 rv.len = a.len + b.len;
2938 rv.txt = malloc(rv.len);
2939 memcpy(rv.txt, a.txt, a.len);
2940 memcpy(rv.txt+a.len, b.txt, b.len);
2944 ### Blocks, Statements, and Statement lists.
2946 Now that we have expressions out of the way we need to turn to
2947 statements. There are simple statements and more complex statements.
2948 Simple statements do not contain (syntactic) newlines, complex statements do.
2950 Statements often come in sequences and we have corresponding simple
2951 statement lists and complex statement lists.
2952 The former comprise only simple statements separated by semicolons.
2953 The later comprise complex statements and simple statement lists. They are
2954 separated by newlines. Thus the semicolon is only used to separate
2955 simple statements on the one line. This may be overly restrictive,
2956 but I'm not sure I ever want a complex statement to share a line with
2959 Note that a simple statement list can still use multiple lines if
2960 subsequent lines are indented, so
2962 ###### Example: wrapped simple statement list
2967 is a single simple statement list. This might allow room for
2968 confusion, so I'm not set on it yet.
2970 A simple statement list needs no extra syntax. A complex statement
2971 list has two syntactic forms. It can be enclosed in braces (much like
2972 C blocks), or it can be introduced by an indent and continue until an
2973 unindented newline (much like Python blocks). With this extra syntax
2974 it is referred to as a block.
2976 Note that a block does not have to include any newlines if it only
2977 contains simple statements. So both of:
2979 if condition: a=b; d=f
2981 if condition { a=b; print f }
2985 In either case the list is constructed from a `binode` list with
2986 `Block` as the operator. When parsing the list it is most convenient
2987 to append to the end, so a list is a list and a statement. When using
2988 the list it is more convenient to consider a list to be a statement
2989 and a list. So we need a function to re-order a list.
2990 `reorder_bilist` serves this purpose.
2992 The only stand-alone statement we introduce at this stage is `pass`
2993 which does nothing and is represented as a `NULL` pointer in a `Block`
2994 list. Other stand-alone statements will follow once the infrastructure
3005 Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3006 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3007 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3008 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3009 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3011 OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3012 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3013 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3014 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3015 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3017 UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3018 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3019 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3021 ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3022 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3023 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3024 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3025 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3027 Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3029 ComplexStatements -> ComplexStatements ComplexStatement ${
3039 | ComplexStatement ${
3051 ComplexStatement -> SimpleStatements Newlines ${
3052 $0 = reorder_bilist($<SS);
3054 | SimpleStatements ; Newlines ${
3055 $0 = reorder_bilist($<SS);
3057 ## ComplexStatement Grammar
3060 SimpleStatements -> SimpleStatements ; SimpleStatement ${
3066 | SimpleStatement ${
3074 SimpleStatement -> pass ${ $0 = NULL; }$
3075 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3076 ## SimpleStatement Grammar
3078 ###### print binode cases
3082 if (b->left == NULL)
3085 print_exec(b->left, indent, bracket);
3088 print_exec(b->right, indent, bracket);
3091 // block, one per line
3092 if (b->left == NULL)
3093 do_indent(indent, "pass\n");
3095 print_exec(b->left, indent, bracket);
3097 print_exec(b->right, indent, bracket);
3101 ###### propagate binode cases
3104 /* If any statement returns something other than Tnone
3105 * or Tbool then all such must return same type.
3106 * As each statement may be Tnone or something else,
3107 * we must always pass NULL (unknown) down, otherwise an incorrect
3108 * error might occur. We never return Tnone unless it is
3113 for (e = b; e; e = cast(binode, e->right)) {
3114 t = propagate_types(e->left, c, ok, NULL, rules);
3115 if ((rules & Rboolok) && t == Tbool)
3117 if (t && t != Tnone && t != Tbool) {
3121 type_err(c, "error: expected %1%r, found %2",
3122 e->left, type, rules, t);
3128 ###### interp binode cases
3130 while (rvtype == Tnone &&
3133 rv = interp_exec(b->left, &rvtype);
3134 b = cast(binode, b->right);
3138 ### The Print statement
3140 `print` is a simple statement that takes a comma-separated list of
3141 expressions and prints the values separated by spaces and terminated
3142 by a newline. No control of formatting is possible.
3144 `print` faces the same list-ordering issue as blocks, and uses the
3150 ##### expr precedence
3153 ###### SimpleStatement Grammar
3155 | print ExpressionList ${
3156 $0 = reorder_bilist($<2);
3158 | print ExpressionList , ${
3163 $0 = reorder_bilist($0);
3174 ExpressionList -> ExpressionList , Expression ${
3187 ###### print binode cases
3190 do_indent(indent, "print");
3194 print_exec(b->left, -1, bracket);
3198 b = cast(binode, b->right);
3204 ###### propagate binode cases
3207 /* don't care but all must be consistent */
3208 propagate_types(b->left, c, ok, NULL, Rnolabel);
3209 propagate_types(b->right, c, ok, NULL, Rnolabel);
3212 ###### interp binode cases
3218 for ( ; b; b = cast(binode, b->right))
3222 left = interp_exec(b->left, <ype);
3223 print_value(ltype, &left);
3224 free_value(ltype, &left);
3235 ###### Assignment statement
3237 An assignment will assign a value to a variable, providing it hasn't
3238 been declared as a constant. The analysis phase ensures that the type
3239 will be correct so the interpreter just needs to perform the
3240 calculation. There is a form of assignment which declares a new
3241 variable as well as assigning a value. If a name is assigned before
3242 it is declared, and error will be raised as the name is created as
3243 `Tlabel` and it is illegal to assign to such names.
3249 ###### declare terminals
3252 ###### SimpleStatement Grammar
3253 | Variable = Expression ${
3259 | VariableDecl = Expression ${
3267 if ($1->var->where_set == NULL) {
3269 "Variable declared with no type or value: %v",
3279 ###### print binode cases
3282 do_indent(indent, "");
3283 print_exec(b->left, indent, bracket);
3285 print_exec(b->right, indent, bracket);
3292 struct variable *v = cast(var, b->left)->var;
3293 do_indent(indent, "");
3294 print_exec(b->left, indent, bracket);
3295 if (cast(var, b->left)->var->constant) {
3296 if (v->where_decl == v->where_set) {
3298 type_print(v->type, stdout);
3303 if (v->where_decl == v->where_set) {
3305 type_print(v->type, stdout);
3312 print_exec(b->right, indent, bracket);
3319 ###### propagate binode cases
3323 /* Both must match and not be labels,
3324 * Type must support 'dup',
3325 * For Assign, left must not be constant.
3328 t = propagate_types(b->left, c, ok, NULL,
3329 Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3334 if (propagate_types(b->right, c, ok, t, 0) != t)
3335 if (b->left->type == Xvar)
3336 type_err(c, "info: variable '%v' was set as %1 here.",
3337 cast(var, b->left)->var->where_set, t, rules, NULL);
3339 t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3341 propagate_types(b->left, c, ok, t,
3342 (b->op == Assign ? Rnoconstant : 0));
3344 if (t && t->dup == NULL)
3345 type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3350 ###### interp binode cases
3353 lleft = linterp_exec(b->left, <ype);
3354 right = interp_exec(b->right, &rtype);
3356 free_value(ltype, lleft);
3357 dup_value(ltype, &right, lleft);
3364 struct variable *v = cast(var, b->left)->var;
3367 free_value(v->type, v->val);
3369 if (v->type->prepare_type)
3370 // FIXME is this the first usage of the type?
3371 v->type->prepare_type(v->type);
3373 right = interp_exec(b->right, &rtype);
3374 v->val = val_alloc(v->type, &right);
3377 v->val = val_alloc(v->type, NULL);
3382 ### The `use` statement
3384 The `use` statement is the last "simple" statement. It is needed when
3385 the condition in a conditional statement is a block. `use` works much
3386 like `return` in C, but only completes the `condition`, not the whole
3392 ###### expr precedence
3395 ###### SimpleStatement Grammar
3397 $0 = new_pos(binode, $1);
3400 if ($0->right->type == Xvar) {
3401 struct var *v = cast(var, $0->right);
3402 if (v->var->type == Tnone) {
3403 /* Convert this to a label */
3404 v->var->type = Tlabel;
3405 v->var->val = val_alloc(Tlabel, NULL);
3406 v->var->val->label = v->var->val;
3411 ###### print binode cases
3414 do_indent(indent, "use ");
3415 print_exec(b->right, -1, bracket);
3420 ###### propagate binode cases
3423 /* result matches value */
3424 return propagate_types(b->right, c, ok, type, 0);
3426 ###### interp binode cases
3429 rv = interp_exec(b->right, &rvtype);
3432 ### The Conditional Statement
3434 This is the biggy and currently the only complex statement. This
3435 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3436 It is comprised of a number of parts, all of which are optional though
3437 set combinations apply. Each part is (usually) a key word (`then` is
3438 sometimes optional) followed by either an expression or a code block,
3439 except the `casepart` which is a "key word and an expression" followed
3440 by a code block. The code-block option is valid for all parts and,
3441 where an expression is also allowed, the code block can use the `use`
3442 statement to report a value. If the code block does not report a value
3443 the effect is similar to reporting `True`.
3445 The `else` and `case` parts, as well as `then` when combined with
3446 `if`, can contain a `use` statement which will apply to some
3447 containing conditional statement. `for` parts, `do` parts and `then`
3448 parts used with `for` can never contain a `use`, except in some
3449 subordinate conditional statement.
3451 If there is a `forpart`, it is executed first, only once.
3452 If there is a `dopart`, then it is executed repeatedly providing
3453 always that the `condpart` or `cond`, if present, does not return a non-True
3454 value. `condpart` can fail to return any value if it simply executes
3455 to completion. This is treated the same as returning `True`.
3457 If there is a `thenpart` it will be executed whenever the `condpart`
3458 or `cond` returns True (or does not return any value), but this will happen
3459 *after* `dopart` (when present).
3461 If `elsepart` is present it will be executed at most once when the
3462 condition returns `False` or some value that isn't `True` and isn't
3463 matched by any `casepart`. If there are any `casepart`s, they will be
3464 executed when the condition returns a matching value.
3466 The particular sorts of values allowed in case parts has not yet been
3467 determined in the language design, so nothing is prohibited.
3469 The various blocks in this complex statement potentially provide scope
3470 for variables as described earlier. Each such block must include the
3471 "OpenScope" nonterminal before parsing the block, and must call
3472 `var_block_close()` when closing the block.
3474 The code following "`if`", "`switch`" and "`for`" does not get its own
3475 scope, but is in a scope covering the whole statement, so names
3476 declared there cannot be redeclared elsewhere. Similarly the
3477 condition following "`while`" is in a scope the covers the body
3478 ("`do`" part) of the loop, and which does not allow conditional scope
3479 extension. Code following "`then`" (both looping and non-looping),
3480 "`else`" and "`case`" each get their own local scope.
3482 The type requirements on the code block in a `whilepart` are quite
3483 unusal. It is allowed to return a value of some identifiable type, in
3484 which case the loop aborts and an appropriate `casepart` is run, or it
3485 can return a Boolean, in which case the loop either continues to the
3486 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3487 This is different both from the `ifpart` code block which is expected to
3488 return a Boolean, or the `switchpart` code block which is expected to
3489 return the same type as the casepart values. The correct analysis of
3490 the type of the `whilepart` code block is the reason for the
3491 `Rboolok` flag which is passed to `propagate_types()`.
3493 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3502 struct exec *action;
3503 struct casepart *next;
3505 struct cond_statement {
3507 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3508 struct casepart *casepart;
3511 ###### ast functions
3513 static void free_casepart(struct casepart *cp)
3517 free_exec(cp->value);
3518 free_exec(cp->action);
3525 static void free_cond_statement(struct cond_statement *s)
3529 free_exec(s->forpart);
3530 free_exec(s->condpart);
3531 free_exec(s->dopart);
3532 free_exec(s->thenpart);
3533 free_exec(s->elsepart);
3534 free_casepart(s->casepart);
3538 ###### free exec cases
3539 case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3541 ###### ComplexStatement Grammar
3542 | CondStatement ${ $0 = $<1; }$
3544 ###### expr precedence
3545 $TERM for then while do
3552 // A CondStatement must end with EOL, as does CondSuffix and
3554 // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3555 // may or may not end with EOL
3556 // WhilePart and IfPart include an appropriate Suffix
3559 // Both ForPart and Whilepart open scopes, and CondSuffix only
3560 // closes one - so in the first branch here we have another to close.
3561 CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3564 $0->thenpart = $<TP;
3565 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3566 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3567 var_block_close(c, CloseSequential);
3569 | ForPart OptNL WhilePart CondSuffix ${
3572 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3573 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3574 var_block_close(c, CloseSequential);
3576 | WhilePart CondSuffix ${
3578 $0->condpart = $WP.condpart; $WP.condpart = NULL;
3579 $0->dopart = $WP.dopart; $WP.dopart = NULL;
3581 | SwitchPart OptNL CasePart CondSuffix ${
3583 $0->condpart = $<SP;
3584 $CP->next = $0->casepart;
3585 $0->casepart = $<CP;
3587 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3589 $0->condpart = $<SP;
3590 $CP->next = $0->casepart;
3591 $0->casepart = $<CP;
3593 | IfPart IfSuffix ${
3595 $0->condpart = $IP.condpart; $IP.condpart = NULL;
3596 $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3597 // This is where we close an "if" statement
3598 var_block_close(c, CloseSequential);
3601 CondSuffix -> IfSuffix ${
3603 // This is where we close scope of the whole
3604 // "for" or "while" statement
3605 var_block_close(c, CloseSequential);
3607 | Newlines CasePart CondSuffix ${
3609 $CP->next = $0->casepart;
3610 $0->casepart = $<CP;
3612 | CasePart CondSuffix ${
3614 $CP->next = $0->casepart;
3615 $0->casepart = $<CP;
3618 IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3619 | Newlines ElsePart ${ $0 = $<EP; }$
3620 | ElsePart ${$0 = $<EP; }$
3622 ElsePart -> else OpenBlock Newlines ${
3623 $0 = new(cond_statement);
3624 $0->elsepart = $<OB;
3625 var_block_close(c, CloseElse);
3627 | else OpenScope CondStatement ${
3628 $0 = new(cond_statement);
3629 $0->elsepart = $<CS;
3630 var_block_close(c, CloseElse);
3634 CasePart -> case Expression OpenScope ColonBlock ${
3635 $0 = calloc(1,sizeof(struct casepart));
3638 var_block_close(c, CloseParallel);
3642 // These scopes are closed in CondSuffix
3643 ForPart -> for OpenBlock ${
3647 ThenPart -> then OpenBlock ${
3649 var_block_close(c, CloseSequential);
3653 // This scope is closed in CondSuffix
3654 WhilePart -> while UseBlock OptNL do Block ${
3658 | while OpenScope Expression ColonBlock ${
3659 $0.condpart = $<Exp;
3663 IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
3667 | if OpenScope Expression OpenScope ColonBlock ClosePara ${
3671 | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
3677 // This scope is closed in CondSuffix
3678 SwitchPart -> switch OpenScope Expression ${
3681 | switch UseBlock ${
3685 ###### print exec cases
3687 case Xcond_statement:
3689 struct cond_statement *cs = cast(cond_statement, e);
3690 struct casepart *cp;
3692 do_indent(indent, "for");
3693 if (bracket) printf(" {\n"); else printf("\n");
3694 print_exec(cs->forpart, indent+1, bracket);
3697 do_indent(indent, "} then {\n");
3699 do_indent(indent, "then\n");
3700 print_exec(cs->thenpart, indent+1, bracket);
3702 if (bracket) do_indent(indent, "}\n");
3706 if (cs->condpart && cs->condpart->type == Xbinode &&
3707 cast(binode, cs->condpart)->op == Block) {
3709 do_indent(indent, "while {\n");
3711 do_indent(indent, "while\n");
3712 print_exec(cs->condpart, indent+1, bracket);
3714 do_indent(indent, "} do {\n");
3716 do_indent(indent, "do\n");
3717 print_exec(cs->dopart, indent+1, bracket);
3719 do_indent(indent, "}\n");
3721 do_indent(indent, "while ");
3722 print_exec(cs->condpart, 0, bracket);
3727 print_exec(cs->dopart, indent+1, bracket);
3729 do_indent(indent, "}\n");
3734 do_indent(indent, "switch");
3736 do_indent(indent, "if");
3737 if (cs->condpart && cs->condpart->type == Xbinode &&
3738 cast(binode, cs->condpart)->op == Block) {
3743 print_exec(cs->condpart, indent+1, bracket);
3745 do_indent(indent, "}\n");
3747 do_indent(indent, "then:\n");
3748 print_exec(cs->thenpart, indent+1, bracket);
3752 print_exec(cs->condpart, 0, bracket);
3758 print_exec(cs->thenpart, indent+1, bracket);
3760 do_indent(indent, "}\n");
3765 for (cp = cs->casepart; cp; cp = cp->next) {
3766 do_indent(indent, "case ");
3767 print_exec(cp->value, -1, 0);
3772 print_exec(cp->action, indent+1, bracket);
3774 do_indent(indent, "}\n");
3777 do_indent(indent, "else");
3782 print_exec(cs->elsepart, indent+1, bracket);
3784 do_indent(indent, "}\n");
3789 ###### propagate exec cases
3790 case Xcond_statement:
3792 // forpart and dopart must return Tnone
3793 // thenpart must return Tnone if there is a dopart,
3794 // otherwise it is like elsepart.
3796 // be bool if there is no casepart
3797 // match casepart->values if there is a switchpart
3798 // either be bool or match casepart->value if there
3800 // elsepart and casepart->action must match the return type
3801 // expected of this statement.
3802 struct cond_statement *cs = cast(cond_statement, prog);
3803 struct casepart *cp;
3805 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3806 if (!type_compat(Tnone, t, 0))
3808 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3809 if (!type_compat(Tnone, t, 0))
3812 t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3813 if (!type_compat(Tnone, t, 0))
3816 if (cs->casepart == NULL)
3817 propagate_types(cs->condpart, c, ok, Tbool, 0);
3819 /* Condpart must match case values, with bool permitted */
3821 for (cp = cs->casepart;
3822 cp && !t; cp = cp->next)
3823 t = propagate_types(cp->value, c, ok, NULL, 0);
3824 if (!t && cs->condpart)
3825 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3826 // Now we have a type (I hope) push it down
3828 for (cp = cs->casepart; cp; cp = cp->next)
3829 propagate_types(cp->value, c, ok, t, 0);
3830 propagate_types(cs->condpart, c, ok, t, Rboolok);
3833 // (if)then, else, and case parts must return expected type.
3834 if (!cs->dopart && !type)
3835 type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3837 type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3838 for (cp = cs->casepart;
3841 type = propagate_types(cp->action, c, ok, NULL, rules);
3844 propagate_types(cs->thenpart, c, ok, type, rules);
3845 propagate_types(cs->elsepart, c, ok, type, rules);
3846 for (cp = cs->casepart; cp ; cp = cp->next)
3847 propagate_types(cp->action, c, ok, type, rules);
3853 ###### interp exec cases
3854 case Xcond_statement:
3856 struct value v, cnd;
3857 struct type *vtype, *cndtype;
3858 struct casepart *cp;
3859 struct cond_statement *c = cast(cond_statement, e);
3862 interp_exec(c->forpart, NULL);
3865 cnd = interp_exec(c->condpart, &cndtype);
3868 if (!(cndtype == Tnone ||
3869 (cndtype == Tbool && cnd.bool != 0)))
3871 // cnd is Tnone or Tbool, doesn't need to be freed
3873 interp_exec(c->dopart, NULL);
3876 rv = interp_exec(c->thenpart, &rvtype);
3877 if (rvtype != Tnone || !c->dopart)
3879 free_value(rvtype, &rv);
3882 } while (c->dopart);
3884 for (cp = c->casepart; cp; cp = cp->next) {
3885 v = interp_exec(cp->value, &vtype);
3886 if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
3887 free_value(vtype, &v);
3888 free_value(cndtype, &cnd);
3889 rv = interp_exec(cp->action, &rvtype);
3892 free_value(vtype, &v);
3894 free_value(cndtype, &cnd);
3896 rv = interp_exec(c->elsepart, &rvtype);
3903 ### Top level structure
3905 All the language elements so far can be used in various places. Now
3906 it is time to clarify what those places are.
3908 At the top level of a file there will be a number of declarations.
3909 Many of the things that can be declared haven't been described yet,
3910 such as functions, procedures, imports, and probably more.
3911 For now there are two sorts of things that can appear at the top
3912 level. They are predefined constants, `struct` types, and the main
3913 program. While the syntax will allow the main program to appear
3914 multiple times, that will trigger an error if it is actually attempted.
3916 The various declarations do not return anything. They store the
3917 various declarations in the parse context.
3919 ###### Parser: grammar
3922 Ocean -> OptNL DeclarationList
3924 ## declare terminals
3931 DeclarationList -> Declaration
3932 | DeclarationList Declaration
3934 Declaration -> ERROR Newlines ${
3936 "error: unhandled parse error", &$1);
3942 ## top level grammar
3944 ### The `const` section
3946 As well as being defined in with the code that uses them, constants
3947 can be declared at the top level. These have full-file scope, so they
3948 are always `InScope`. The value of a top level constant can be given
3949 as an expression, and this is evaluated immediately rather than in the
3950 later interpretation stage. Once we add functions to the language, we
3951 will need rules concern which, if any, can be used to define a top
3954 Constants are defined in a section that starts with the reserved word
3955 `const` and then has a block with a list of assignment statements.
3956 For syntactic consistency, these must use the double-colon syntax to
3957 make it clear that they are constants. Type can also be given: if
3958 not, the type will be determined during analysis, as with other
3961 As the types constants are inserted at the head of a list, printing
3962 them in the same order that they were read is not straight forward.
3963 We take a quadratic approach here and count the number of constants
3964 (variables of depth 0), then count down from there, each time
3965 searching through for the Nth constant for decreasing N.
3967 ###### top level grammar
3971 DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
3972 | const { SimpleConstList } Newlines
3973 | const IN OptNL ConstList OUT Newlines
3974 | const SimpleConstList Newlines
3976 ConstList -> ConstList SimpleConstLine
3978 SimpleConstList -> SimpleConstList ; Const
3981 SimpleConstLine -> SimpleConstList Newlines
3982 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
3985 CType -> Type ${ $0 = $<1; }$
3988 Const -> IDENTIFIER :: CType = Expression ${ {
3992 v = var_decl(c, $1.txt);
3994 struct var *var = new_pos(var, $1);
3995 v->where_decl = var;
4000 v = var_ref(c, $1.txt);
4001 tok_err(c, "error: name already declared", &$1);
4002 type_err(c, "info: this is where '%v' was first declared",
4003 v->where_decl, NULL, 0, NULL);
4007 propagate_types($5, c, &ok, $3, 0);
4012 struct value res = interp_exec($5, &v->type);
4013 v->val = val_alloc(v->type, &res);
4017 ###### print const decls
4022 while (target != 0) {
4024 for (v = context.in_scope; v; v=v->in_scope)
4025 if (v->depth == 0) {
4036 printf(" %.*s :: ", v->name->name.len, v->name->name.txt);
4037 type_print(v->type, stdout);
4039 if (v->type == Tstr)
4041 print_value(v->type, v->val);
4042 if (v->type == Tstr)
4050 ### Finally the whole program.
4052 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4053 the keyword "program" and a list of variable names which are assigned
4054 values from command line arguments. Following this is a `block` which
4055 is the code to execute. Unlike Pascal, constants and other
4056 declarations come *before* the program.
4058 As this is the top level, several things are handled a bit
4060 The whole program is not interpreted by `interp_exec` as that isn't
4061 passed the argument list which the program requires. Similarly type
4062 analysis is a bit more interesting at this level.
4067 ###### top level grammar
4069 DeclareProgram -> Program ${ {
4071 type_err(c, "Program defined a second time",
4080 Program -> program OpenScope Varlist ColonBlock Newlines ${
4083 $0->left = reorder_bilist($<Vl);
4085 var_block_close(c, CloseSequential);
4086 if (c->scope_stack && !c->parse_error) abort();
4089 Varlist -> Varlist ArgDecl ${
4098 ArgDecl -> IDENTIFIER ${ {
4099 struct variable *v = var_decl(c, $1.txt);
4106 ###### print binode cases
4108 do_indent(indent, "program");
4109 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4111 print_exec(b2->left, 0, 0);
4117 print_exec(b->right, indent+1, bracket);
4119 do_indent(indent, "}\n");
4122 ###### propagate binode cases
4123 case Program: abort(); // NOTEST
4125 ###### core functions
4127 static int analyse_prog(struct exec *prog, struct parse_context *c)
4129 struct binode *b = cast(binode, prog);
4136 propagate_types(b->right, c, &ok, Tnone, 0);
4141 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4142 struct var *v = cast(var, b->left);
4143 if (!v->var->type) {
4144 v->var->where_set = b;
4145 v->var->type = Tstr;
4149 b = cast(binode, prog);
4152 propagate_types(b->right, c, &ok, Tnone, 0);
4157 /* Make sure everything is still consistent */
4158 propagate_types(b->right, c, &ok, Tnone, 0);
4162 static void interp_prog(struct exec *prog, char **argv)
4164 struct binode *p = cast(binode, prog);
4171 al = cast(binode, p->left);
4173 struct var *v = cast(var, al->left);
4174 struct value *vl = v->var->val;
4176 if (argv[0] == NULL) {
4177 printf("Not enough args\n");
4180 al = cast(binode, al->right);
4182 free_value(v->var->type, vl);
4184 vl = val_alloc(v->var->type, NULL);
4187 free_value(v->var->type, vl);
4188 vl->str.len = strlen(argv[0]);
4189 vl->str.txt = malloc(vl->str.len);
4190 memcpy(vl->str.txt, argv[0], vl->str.len);
4193 v = interp_exec(p->right, &vtype);
4194 free_value(vtype, &v);
4197 ###### interp binode cases
4198 case Program: abort(); // NOTEST
4200 ## And now to test it out.
4202 Having a language requires having a "hello world" program. I'll
4203 provide a little more than that: a program that prints "Hello world"
4204 finds the GCD of two numbers, prints the first few elements of
4205 Fibonacci, performs a binary search for a number, and a few other
4206 things which will likely grow as the languages grows.
4208 ###### File: oceani.mk
4211 @echo "===== DEMO ====="
4212 ./oceani --section "demo: hello" oceani.mdc 55 33
4218 four ::= 2 + 2 ; five ::= 10/2
4219 const pie ::= "I like Pie";
4220 cake ::= "The cake is"
4229 print "Hello World, what lovely oceans you have!"
4230 print "Are there", five, "?"
4231 print pi, pie, "but", cake
4233 A := $Astr; B := $Bstr
4235 /* When a variable is defined in both branches of an 'if',
4236 * and used afterwards, the variables are merged.
4242 print "Is", A, "bigger than", B,"? ", bigger
4243 /* If a variable is not used after the 'if', no
4244 * merge happens, so types can be different
4247 double:string = "yes"
4248 print A, "is more than twice", B, "?", double
4251 print "double", B, "is", double
4256 if a > 0 and then b > 0:
4262 print "GCD of", A, "and", B,"is", a
4264 print a, "is not positive, cannot calculate GCD"
4266 print b, "is not positive, cannot calculate GCD"
4271 print "Fibonacci:", f1,f2,
4272 then togo = togo - 1
4280 /* Binary search... */
4285 mid := (lo + hi) / 2
4297 print "Yay, I found", target
4299 print "Closest I found was", mid
4304 // "middle square" PRNG. Not particularly good, but one my
4305 // Dad taught me - the first one I ever heard of.
4306 for i:=1; then i = i + 1; while i < size:
4307 n := list[i-1] * list[i-1]
4308 list[i] = (n / 100) % 10 000
4310 print "Before sort:",
4311 for i:=0; then i = i + 1; while i < size:
4315 for i := 1; then i=i+1; while i < size:
4316 for j:=i-1; then j=j-1; while j >= 0:
4317 if list[j] > list[j+1]:
4321 print " After sort:",
4322 for i:=0; then i = i + 1; while i < size:
4326 if 1 == 2 then print "yes"; else print "no"
4330 bob.alive = (bob.name == "Hello")
4331 print "bob", "is" if bob.alive else "isn't", "alive"