ocean-lang.org Git - ocean/blob - csrc/oceani.mdc

   1 # Ocean Interpreter - Jamison Creek version
   2
   3 Ocean is intended to be a compiled language, so this interpreter is
   4 not targeted at being the final product.  It is, rather, an intermediate
   5 stage and fills that role in two distinct ways.
   6
   7 Firstly, it exists as a platform to experiment with the early language
   8 design.  An interpreter is easy to write and easy to get working, so
   9 the barrier for entry is lower if I aim to start with an interpreter.
  10
  11 Secondly, the plan for the Ocean compiler is to write it in the
  12 [Ocean language](http://ocean-lang.org).  To achieve this we naturally
  13 need some sort of boot-strap process and this interpreter - written in
  14 portable C - will fill that role.  It will be used to bootstrap the
  15 Ocean compiler.
  16
  17 Two features that are not needed to fill either of these roles are
  18 performance and completeness.  The interpreter only needs to be fast
  19 enough to run small test programs and occasionally to run the compiler
  20 on itself.  It only needs to be complete enough to test aspects of the
  21 design which are developed before the compiler is working, and to run
  22 the compiler on itself.  Any features not used by the compiler when
  23 compiling itself are superfluous.  They may be included anyway, but
  24 they may not.
  25
  26 Nonetheless, the interpreter should end up being reasonably complete,
  27 and any performance bottlenecks which appear and are easily fixed, will
  28 be.
  29
  30 ## Current version
  31
  32 This third version of the interpreter exists to test out some initial
  33 ideas relating to types.  Particularly it adds arrays (indexed from
  34 zero) and simple structures.  Basic control flow and variable scoping
  35 are already fairly well established, as are basic numerical and
  36 boolean operators.
  37
  38 Some operators that have only recently been added, and so have not
  39 generated all that much experience yet are "and then" and "or else" as
  40 short-circuit Boolean operators, and the "if ... else" trinary
  41 operator which can select between two expressions based on a third
  42 (which appears syntactically in the middle).
  43
  44 Elements that are present purely to make a usable language, and
  45 without any expectation that they will remain, are the "program'
  46 clause, which provides a list of variables to received command-line
  47 arguments, and the "print" statement which performs simple output.
  48
  49 The current scalar types are "number", "Boolean", and "string".
  50 Boolean will likely stay in its current form, the other two might, but
  51 could just as easily be changed.
  52
  53 ## Naming
  54
  55 Versions of the interpreter which obviously do not support a complete
  56 language will be named after creeks and streams.  This one is Jamison
  57 Creek.
  58
  59 Once we have something reasonably resembling a complete language, the
  60 names of rivers will be used.
  61 Early versions of the compiler will be named after seas.  Major
  62 releases of the compiler will be named after oceans.  Hopefully I will
  63 be finished once I get to the Pacific Ocean release.
  64
  65 ## Outline
  66
  67 As well as parsing and executing a program, the interpreter can print
  68 out the program from the parsed internal structure.  This is useful
  69 for validating the parsing.
  70 So the main requirements of the interpreter are:
  71
  72 - Parse the program, possibly with tracing,
  73 - Analyse the parsed program to ensure consistency,
  74 - Print the program,
  75 - Execute the program, if no parsing or consistency errors were found.
  76
  77 This is all performed by a single C program extracted with
  78 `parsergen`.
  79
  80 There will be two formats for printing the program: a default and one
  81 that uses bracketing.  So a `--bracket` command line option is needed
  82 for that.  Normally the first code section found is used, however an
  83 alternate section can be requested so that a file (such as this one)
  84 can contain multiple programs This is effected with the `--section`
  85 option.
  86
  87 This code must be compiled with `-fplan9-extensions` so that anonymous
  88 structures can be used.
  89
  90 ###### File: oceani.mk
  91
  92         myCFLAGS := -Wall -g -fplan9-extensions
  93         CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
  94         myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
  95         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
  96         ## libs
  97         all :: $(LDLIBS) oceani
  98         oceani.c oceani.h : oceani.mdc parsergen
  99                 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
 100         oceani.mk: oceani.mdc md2c
 101                 ./md2c oceani.mdc
 102
 103         oceani: oceani.o $(LDLIBS)
 104                 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
 105
 106 ###### Parser: header
 107         ## macros
 108         ## ast
 109         struct parse_context {
 110                 struct token_config config;
 111                 char *file_name;
 112                 int parse_error;
 113                 struct exec *prog;
 114                 ## parse context
 115         };
 116
 117 ###### macros
 118
 119         #define container_of(ptr, type, member) ({                      \
 120                 const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
 121                 (type *)( (char *)__mptr - offsetof(type,member) );})
 122
 123         #define config2context(_conf) container_of(_conf, struct parse_context, \
 124                 config)
 125
 126 ###### Parser: reduce
 127         struct parse_context *c = config2context(config);
 128
 129 ###### Parser: code
 130
 131         #include <unistd.h>
 132         #include <stdlib.h>
 133         #include <fcntl.h>
 134         #include <errno.h>
 135         #include <sys/mman.h>
 136         #include <string.h>
 137         #include <stdio.h>
 138         #include <locale.h>
 139         #include <malloc.h>
 140         #include "mdcode.h"
 141         #include "scanner.h"
 142         #include "parser.h"
 143
 144         ## includes
 145
 146         #include "oceani.h"
 147
 148         ## forward decls
 149         ## value functions
 150         ## ast functions
 151         ## core functions
 152
 153         #include <getopt.h>
 154         static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
 155                               "--section=SectionName prog.ocn\n";
 156         static const struct option long_options[] = {
 157                 {"trace",     0, NULL, 't'},
 158                 {"print",     0, NULL, 'p'},
 159                 {"noexec",    0, NULL, 'n'},
 160                 {"brackets",  0, NULL, 'b'},
 161                 {"section",   1, NULL, 's'},
 162                 {NULL,        0, NULL, 0},
 163         };
 164         const char *options = "tpnbs";
 165         int main(int argc, char *argv[])
 166         {
 167                 int fd;
 168                 int len;
 169                 char *file;
 170                 struct section *s, *ss;
 171                 char *section = NULL;
 172                 struct parse_context context = {
 173                         .config = {
 174                                 .ignored = (1 << TK_line_comment)
 175                                          | (1 << TK_block_comment)
 176                                          | (1 << TK_mark),
 177                                 .number_chars = ".,_+- ",
 178                                 .word_start = "_",
 179                                 .word_cont = "_",
 180                         },
 181                 };
 182                 int doprint=0, dotrace=0, doexec=1, brackets=0;
 183                 int opt;
 184                 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
 185                        != -1) {
 186                         switch(opt) {
 187                         case 't': dotrace=1; break;
 188                         case 'p': doprint=1; break;
 189                         case 'n': doexec=0; break;
 190                         case 'b': brackets=1; break;
 191                         case 's': section = optarg; break;
 192                         default: fprintf(stderr, Usage);
 193                                 exit(1);
 194                         }
 195                 }
 196                 if (optind >= argc) {
 197                         fprintf(stderr, "oceani: no input file given\n");
 198                         exit(1);
 199                 }
 200                 fd = open(argv[optind], O_RDONLY);
 201                 if (fd < 0) {
 202                         fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
 203                         exit(1);
 204                 }
 205                 context.file_name = argv[optind];
 206                 len = lseek(fd, 0, 2);
 207                 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
 208                 s = code_extract(file, file+len, NULL);
 209                 if (!s) {
 210                         fprintf(stderr, "oceani: could not find any code in %s\n",
 211                                 argv[optind]);
 212                         exit(1);
 213                 }
 214
 215                 ## context initialization
 216
 217                 if (section) {
 218                         for (ss = s; ss; ss = ss->next) {
 219                                 struct text sec = ss->section;
 220                                 if (sec.len == strlen(section) &&
 221                                     strncmp(sec.txt, section, sec.len) == 0)
 222                                         break;
 223                         }
 224                         if (!ss) {
 225                                 fprintf(stderr, "oceani: cannot find section %s\n",
 226                                         section);
 227                                 exit(1);
 228                         }
 229                 } else
 230                         ss = s;
 231                 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
 232
 233                 if (!context.prog) {
 234                         fprintf(stderr, "oceani: no program found.\n");
 235                         context.parse_error = 1;
 236                 }
 237                 if (context.prog && doprint) {
 238                         ## print const decls
 239                         ## print type decls
 240                         print_exec(context.prog, 0, brackets);
 241                 }
 242                 if (context.prog && doexec && !context.parse_error) {
 243                         if (!analyse_prog(context.prog, &context)) {
 244                                 fprintf(stderr, "oceani: type error in program - not running.\n");
 245                                 exit(1);
 246                         }
 247                         interp_prog(context.prog, argv+optind+1);
 248                 }
 249                 free_exec(context.prog);
 250
 251                 while (s) {
 252                         struct section *t = s->next;
 253                         code_free(s->code);
 254                         free(s);
 255                         s = t;
 256                 }
 257                 ## free context vars
 258                 ## free context types
 259                 exit(context.parse_error ? 1 : 0);
 260         }
 261
 262 ### Analysis
 263
 264 The four requirements of parse, analyse, print, interpret apply to
 265 each language element individually so that is how most of the code
 266 will be structured.
 267
 268 Three of the four are fairly self explanatory.  The one that requires
 269 a little explanation is the analysis step.
 270
 271 The current language design does not require the types of variables to
 272 be declared, but they must still have a single type.  Different
 273 operations impose different requirements on the variables, for example
 274 addition requires both arguments to be numeric, and assignment
 275 requires the variable on the left to have the same type as the
 276 expression on the right.
 277
 278 Analysis involves propagating these type requirements around and
 279 consequently setting the type of each variable.  If any requirements
 280 are violated (e.g. a string is compared with a number) or if a
 281 variable needs to have two different types, then an error is raised
 282 and the program will not run.
 283
 284 If the same variable is declared in both branchs of an 'if/else', or
 285 in all cases of a 'switch' then the multiple instances may be merged
 286 into just one variable if the variable is references after the
 287 conditional statement.  When this happens, the types must naturally be
 288 consistent across all the branches.  When the variable is not used
 289 outside the if, the variables in the different branches are distinct
 290 and can be of different types.
 291
 292 Determining the types of all variables early is important for
 293 processing command line arguments.  These can be assigned to any of
 294 several types of variable, but we must first know the correct type so
 295 any required conversion can happen.  If a variable is associated with
 296 a command line argument but no type can be interpreted (e.g. the
 297 variable is only ever used in a `print` statement), then the type is
 298 set to 'string'.
 299
 300 Undeclared names may only appear in "use" statements and "case" expressions.
 301 These names are given a type of "label" and a unique value.
 302 This allows them to fill the role of a name in an enumerated type, which
 303 is useful for testing the `switch` statement.
 304
 305 As we will see, the condition part of a `while` statement can return
 306 either a Boolean or some other type.  This requires that the expected
 307 type that gets passed around comprises a type and a flag to indicate
 308 that `Tbool` is also permitted.
 309
 310 As there are, as yet, no distinct types that are compatible, there
 311 isn't much subtlety in the analysis.  When we have distinct number
 312 types, this will become more interesting.
 313
 314 #### Error reporting
 315
 316 When analysis discovers an inconsistency it needs to report an error;
 317 just refusing to run the code ensures that the error doesn't cascade,
 318 but by itself it isn't very useful.  A clear understanding of the sort
 319 of error message that are useful will help guide the process of
 320 analysis.
 321
 322 At a simplistic level, the only sort of error that type analysis can
 323 report is that the type of some construct doesn't match a contextual
 324 requirement.  For example, in `4 + "hello"` the addition provides a
 325 contextual requirement for numbers, but `"hello"` is not a number.  In
 326 this particular example no further information is needed as the types
 327 are obvious from local information.  When a variable is involved that
 328 isn't the case.  It may be helpful to explain why the variable has a
 329 particular type, by indicating the location where the type was set,
 330 whether by declaration or usage.
 331
 332 Using a recursive-descent analysis we can easily detect a problem at
 333 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
 334 will detect that one argument is not a number and the usage of `hello`
 335 will detect that a number was wanted, but not provided.  In this
 336 (early) version of the language, we will generate error reports at
 337 multiple locations, so the use of `hello` will report an error and
 338 explain were the value was set, and the addition will report an error
 339 and say why numbers are needed.  To be able to report locations for
 340 errors, each language element will need to record a file location
 341 (line and column) and each variable will need to record the language
 342 element where its type was set.  For now we will assume that each line
 343 of an error message indicates one location in the file, and up to 2
 344 types.  So we provide a `printf`-like function which takes a format, a
 345 language (a `struct exec` which has not yet been introduced), and 2
 346 types. "`%1`" reports the first type, "`%2`" reports the second.  We
 347 will need a function to print the location, once we know how that is
 348 stored.  As will be explained later, there are sometimes extra rules for
 349 type matching and they might affect error messages, we need to pass those
 350 in too.
 351
 352 As well as type errors, we sometimes need to report problems with
 353 tokens, which might be unexpected or might name a type that has not
 354 been defined.  For these we have `tok_err()` which reports an error
 355 with a given token.  Each of the error functions sets the flag in the
 356 context so indicate that parsing failed.
 357
 358 ###### forward decls
 359
 360         static void fput_loc(struct exec *loc, FILE *f);
 361
 362 ###### core functions
 363
 364         static void type_err(struct parse_context *c,
 365                              char *fmt, struct exec *loc,
 366                              struct type *t1, int rules, struct type *t2)
 367         {
 368                 fprintf(stderr, "%s:", c->file_name);
 369                 fput_loc(loc, stderr);
 370                 for (; *fmt ; fmt++) {
 371                         if (*fmt != '%') {
 372                                 fputc(*fmt, stderr);
 373                                 continue;
 374                         }
 375                         fmt++;
 376                         switch (*fmt) {
 377                         case '%': fputc(*fmt, stderr); break;   // NOTEST
 378                         default: fputc('?', stderr); break;     // NOTEST
 379                         case '1':
 380                                 type_print(t1, stderr);
 381                                 break;
 382                         case '2':
 383                                 type_print(t2, stderr);
 384                                 break;
 385                         ## format cases
 386                         }
 387                 }
 388                 fputs("\n", stderr);
 389                 c->parse_error = 1;
 390         }
 391
 392         static void tok_err(struct parse_context *c, char *fmt, struct token *t)
 393         {
 394                 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
 395                         t->txt.len, t->txt.txt);
 396                 c->parse_error = 1;
 397         }
 398
 399 ## Entities: declared and predeclared.
 400
 401 There are various "things" that the language and/or the interpreter
 402 needs to know about to parse and execute a program.  These include
 403 types, variables, values, and executable code.  These are all lumped
 404 together under the term "entities" (calling them "objects" would be
 405 confusing) and introduced here.  These will introduced and described
 406 here.  The following section will present the different specific code
 407 elements which comprise or manipulate these various entities.
 408
 409 ### Types
 410
 411 Values come in a wide range of types, with more likely to be added.
 412 Each type needs to be able to parse and print its own values (for
 413 convenience at least) as well as to compare two values, at least for
 414 equality and possibly for order.  For now, values might need to be
 415 duplicated and freed, though eventually such manipulations will be
 416 better integrated into the language.
 417
 418 Rather than requiring every numeric type to support all numeric
 419 operations (add, multiple, etc), we allow types to be able to present
 420 as one of a few standard types: integer, float, and fraction.  The
 421 existence of these conversion functions eventaully enable types to
 422 determine if they are compatible with other types, though such types
 423 have not yet been implemented.
 424
 425 Named type are stored in a simple linked list.  Objects of each type are "values"
 426 which are often passed around by value.
 427
 428 ###### ast
 429
 430         struct value {
 431                 struct type *type;
 432                 union {
 433                         ## value union fields
 434                 };
 435         };
 436
 437         struct type {
 438                 struct text name;
 439                 struct type *next;
 440                 struct value (*init)(struct type *type);
 441                 struct value (*prepare)(struct type *type);
 442                 struct value (*parse)(struct type *type, char *str);
 443                 void (*print)(struct value val);
 444                 void (*print_type)(struct type *type, FILE *f);
 445                 int (*cmp_order)(struct value v1, struct value v2);
 446                 int (*cmp_eq)(struct value v1, struct value v2);
 447                 struct value (*dup)(struct value val);
 448                 void (*free)(struct value val);
 449                 void (*free_type)(struct type *t);
 450                 int (*compat)(struct type *this, struct type *other);
 451                 long long (*to_int)(struct value *v);
 452                 double (*to_float)(struct value *v);
 453                 int (*to_mpq)(mpq_t *q, struct value *v);
 454                 ## type functions
 455                 union {
 456                         ## type union fields
 457                 };
 458         };
 459
 460 ###### parse context
 461
 462         struct type *typelist;
 463
 464 ###### ast functions
 465
 466         static struct type *find_type(struct parse_context *c, struct text s)
 467         {
 468                 struct type *l = c->typelist;
 469
 470                 while (l &&
 471                        text_cmp(l->name, s) != 0)
 472                                 l = l->next;
 473                 return l;
 474         }
 475
 476         static struct type *add_type(struct parse_context *c, struct text s,
 477                                      struct type *proto)
 478         {
 479                 struct type *n;
 480
 481                 n = calloc(1, sizeof(*n));
 482                 *n = *proto;
 483                 n->name = s;
 484                 n->next = c->typelist;
 485                 c->typelist = n;
 486                 return n;
 487         }
 488
 489         static void free_type(struct type *t)
 490         {
 491                 /* The type is always a reference to something in the
 492                  * context, so we don't need to free anything.
 493                  */
 494         }
 495
 496         static void free_value(struct value v)
 497         {
 498                 if (v.type)
 499                         v.type->free(v);
 500         }
 501
 502         static int type_compat(struct type *require, struct type *have, int rules)
 503         {
 504                 if ((rules & Rboolok) && have == Tbool)
 505                         return 1;
 506                 if ((rules & Rnolabel) && have == Tlabel)
 507                         return 0;
 508                 if (!require || !have)
 509                         return 1;
 510
 511                 if (require->compat)
 512                         return require->compat(require, have);
 513
 514                 return require == have;
 515         }
 516
 517         static void type_print(struct type *type, FILE *f)
 518         {
 519                 if (!type)
 520                         fputs("*unknown*type*", f);
 521                 else if (type->name.len)
 522                         fprintf(f, "%.*s", type->name.len, type->name.txt);
 523                 else if (type->print_type)
 524                         type->print_type(type, f);
 525                 else
 526                         fputs("*invalid*type*", f);     // NOTEST
 527         }
 528
 529         static struct value val_prepare(struct type *type)
 530         {
 531                 struct value rv;
 532
 533                 if (type)
 534                         return type->prepare(type);
 535                 rv.type = type;
 536                 return rv;
 537         }
 538
 539         static struct value val_init(struct type *type)
 540         {
 541                 struct value rv;
 542
 543                 if (type)
 544                         return type->init(type);
 545                 rv.type = type;
 546                 return rv;
 547         }
 548
 549         static struct value dup_value(struct value v)
 550         {
 551                 if (v.type)
 552                         return v.type->dup(v);
 553                 return v;
 554         }
 555
 556         static int value_cmp(struct value left, struct value right)
 557         {
 558                 if (left.type && left.type->cmp_order)
 559                         return left.type->cmp_order(left, right);
 560                 if (left.type && left.type->cmp_eq)
 561                         return left.type->cmp_eq(left, right);
 562                 return -1;
 563         }
 564
 565         static void print_value(struct value v)
 566         {
 567                 if (v.type && v.type->print)
 568                         v.type->print(v);
 569                 else
 570                         printf("*Unknown*");            // NOTEST
 571         }
 572
 573         static struct value parse_value(struct type *type, char *arg)
 574         {
 575                 struct value rv;
 576
 577                 if (type && type->parse)
 578                         return type->parse(type, arg);
 579                 rv.type = NULL;                         // NOTEST
 580                 return rv;                              // NOTEST
 581         }
 582
 583 ###### forward decls
 584
 585         static void free_value(struct value v);
 586         static int type_compat(struct type *require, struct type *have, int rules);
 587         static void type_print(struct type *type, FILE *f);
 588         static struct value val_init(struct type *type);
 589         static struct value dup_value(struct value v);
 590         static int value_cmp(struct value left, struct value right);
 591         static void print_value(struct value v);
 592         static struct value parse_value(struct type *type, char *arg);
 593
 594 ###### free context types
 595
 596         while (context.typelist) {
 597                 struct type *t = context.typelist;
 598
 599                 context.typelist = t->next;
 600                 if (t->free_type)
 601                         t->free_type(t);
 602                 free(t);
 603         }
 604
 605 #### Base Types
 606
 607 Values of the base types can be numbers, which we represent as
 608 multi-precision fractions, strings, Booleans and labels.  When
 609 analysing the program we also need to allow for places where no value
 610 is meaningful (type `Tnone`) and where we don't know what type to
 611 expect yet (type is `NULL`).
 612
 613 Values are never shared, they are always copied when used, and freed
 614 when no longer needed.
 615
 616 When propagating type information around the program, we need to
 617 determine if two types are compatible, where type `NULL` is compatible
 618 with anything.  There are two special cases with type compatibility,
 619 both related to the Conditional Statement which will be described
 620 later.  In some cases a Boolean can be accepted as well as some other
 621 primary type, and in others any type is acceptable except a label (`Vlabel`).
 622 A separate function encoding these cases will simplify some code later.
 623
 624 When assigning command line arguments to variables, we need to be able
 625 to parse each type from a string.
 626
 627 The distinction beteen "prepare" and "init" needs to be explained.
 628 "init" sets up an initial value, such as "zero" or the empty string.
 629 "prepare" simply prepares the data structure so that if "free" gets
 630 called on it, it won't do something silly.  Normally a value will be
 631 stored after "prepare" but before "free", but this might not happen if
 632 there are errors.
 633
 634 ###### includes
 635         #include <gmp.h>
 636         #include "string.h"
 637         #include "number.h"
 638
 639 ###### libs
 640         myLDLIBS := libnumber.o libstring.o -lgmp
 641         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 642
 643 ###### type union fields
 644         enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
 645
 646 ###### value union fields
 647         struct text str;
 648         mpq_t num;
 649         int bool;
 650         void *label;
 651
 652 ###### ast functions
 653         static void _free_value(struct value v)
 654         {
 655                 switch (v.type->vtype) {
 656                 case Vnone: break;
 657                 case Vstr: free(v.str.txt); break;
 658                 case Vnum: mpq_clear(v.num); break;
 659                 case Vlabel:
 660                 case Vbool: break;
 661                 }
 662         }
 663
 664 ###### value functions
 665
 666         static struct value _val_prepare(struct type *type)
 667         {
 668                 struct value rv;
 669
 670                 rv.type = type;
 671                 switch(type->vtype) {
 672                 case Vnone:
 673                         break;
 674                 case Vnum:
 675                         memset(&rv.num, 0, sizeof(rv.num));
 676                         break;
 677                 case Vstr:
 678                         rv.str.txt = NULL;
 679                         rv.str.len = 0;
 680                         break;
 681                 case Vbool:
 682                         rv.bool = 0;
 683                         break;
 684                 case Vlabel:
 685                         rv.label = NULL;
 686                         break;
 687                 }
 688                 return rv;
 689         }
 690
 691         static struct value _val_init(struct type *type)
 692         {
 693                 struct value rv;
 694
 695                 rv.type = type;
 696                 switch(type->vtype) {
 697                 case Vnone:             // NOTEST
 698                         break;          // NOTEST
 699                 case Vnum:
 700                         mpq_init(rv.num); break;
 701                 case Vstr:
 702                         rv.str.txt = malloc(1);
 703                         rv.str.len = 0;
 704                         break;
 705                 case Vbool:
 706                         rv.bool = 0;
 707                         break;
 708                 case Vlabel:                    // NOTEST
 709                         rv.label = NULL;        // NOTEST
 710                         break;                  // NOTEST
 711                 }
 712                 return rv;
 713         }
 714
 715         static struct value _dup_value(struct value v)
 716         {
 717                 struct value rv;
 718                 rv.type = v.type;
 719                 switch (rv.type->vtype) {
 720                 case Vnone:             // NOTEST
 721                         break;          // NOTEST
 722                 case Vlabel:
 723                         rv.label = v.label;
 724                         break;
 725                 case Vbool:
 726                         rv.bool = v.bool;
 727                         break;
 728                 case Vnum:
 729                         mpq_init(rv.num);
 730                         mpq_set(rv.num, v.num);
 731                         break;
 732                 case Vstr:
 733                         rv.str.len = v.str.len;
 734                         rv.str.txt = malloc(rv.str.len);
 735                         memcpy(rv.str.txt, v.str.txt, v.str.len);
 736                         break;
 737                 }
 738                 return rv;
 739         }
 740
 741         static int _value_cmp(struct value left, struct value right)
 742         {
 743                 int cmp;
 744                 if (left.type != right.type)
 745                         return left.type - right.type;  // NOTEST
 746                 switch (left.type->vtype) {
 747                 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
 748                 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
 749                 case Vstr: cmp = text_cmp(left.str, right.str); break;
 750                 case Vbool: cmp = left.bool - right.bool; break;
 751                 case Vnone: cmp = 0;                    // NOTEST
 752                 }
 753                 return cmp;
 754         }
 755
 756         static void _print_value(struct value v)
 757         {
 758                 switch (v.type->vtype) {
 759                 case Vnone:                             // NOTEST
 760                         printf("*no-value*"); break;    // NOTEST
 761                 case Vlabel:                            // NOTEST
 762                         printf("*label-%p*", v.label); break; // NOTEST
 763                 case Vstr:
 764                         printf("%.*s", v.str.len, v.str.txt); break;
 765                 case Vbool:
 766                         printf("%s", v.bool ? "True":"False"); break;
 767                 case Vnum:
 768                         {
 769                         mpf_t fl;
 770                         mpf_init2(fl, 20);
 771                         mpf_set_q(fl, v.num);
 772                         gmp_printf("%Fg", fl);
 773                         mpf_clear(fl);
 774                         break;
 775                         }
 776                 }
 777         }
 778
 779         static struct value _parse_value(struct type *type, char *arg)
 780         {
 781                 struct value val;
 782                 struct text tx;
 783                 int neg = 0;
 784                 char tail[3] = "";
 785
 786                 val.type = type;
 787                 switch(type->vtype) {
 788                 case Vlabel:                            // NOTEST
 789                 case Vnone:                             // NOTEST
 790                         val.type = NULL;                // NOTEST
 791                         break;                          // NOTEST
 792                 case Vstr:
 793                         val.str.len = strlen(arg);
 794                         val.str.txt = malloc(val.str.len);
 795                         memcpy(val.str.txt, arg, val.str.len);
 796                         break;
 797                 case Vnum:
 798                         if (*arg == '-') {
 799                                 neg = 1;
 800                                 arg++;
 801                         }
 802                         tx.txt = arg; tx.len = strlen(tx.txt);
 803                         if (number_parse(val.num, tail, tx) == 0)
 804                                 mpq_init(val.num);
 805                         else if (neg)
 806                                 mpq_neg(val.num, val.num);
 807                         if (tail[0]) {
 808                                 printf("Unsupported suffix: %s\n", arg);
 809                                 val.type = NULL;
 810                         }
 811                         break;
 812                 case Vbool:
 813                         if (strcasecmp(arg, "true") == 0 ||
 814                             strcmp(arg, "1") == 0)
 815                                 val.bool = 1;
 816                         else if (strcasecmp(arg, "false") == 0 ||
 817                                  strcmp(arg, "0") == 0)
 818                                 val.bool = 0;
 819                         else {
 820                                 printf("Bad bool: %s\n", arg);
 821                                 val.type = NULL;
 822                         }
 823                         break;
 824                 }
 825                 return val;
 826         }
 827
 828         static void _free_value(struct value v);
 829
 830         static struct type base_prototype = {
 831                 .init = _val_init,
 832                 .prepare = _val_prepare,
 833                 .parse = _parse_value,
 834                 .print = _print_value,
 835                 .cmp_order = _value_cmp,
 836                 .cmp_eq = _value_cmp,
 837                 .dup = _dup_value,
 838                 .free = _free_value,
 839         };
 840
 841         static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
 842
 843 ###### ast functions
 844         static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
 845         {
 846                 struct text txt = { n, strlen(n) };
 847                 struct type *t;
 848
 849                 t = add_type(c, txt, &base_prototype);
 850                 t->vtype = vt;
 851                 return t;
 852         }
 853
 854 ###### context initialization
 855
 856         Tbool  = add_base_type(&context, "Boolean", Vbool);
 857         Tstr   = add_base_type(&context, "string", Vstr);
 858         Tnum   = add_base_type(&context, "number", Vnum);
 859         Tnone  = add_base_type(&context, "none", Vnone);
 860         Tlabel = add_base_type(&context, "label", Vlabel);
 861
 862 ### Variables
 863
 864 Variables are scoped named values.  We store the names in a linked
 865 list of "bindings" sorted lexically, and use sequential search and
 866 insertion sort.
 867
 868 ###### ast
 869
 870         struct binding {
 871                 struct text name;
 872                 struct binding *next;   // in lexical order
 873                 ## binding fields
 874         };
 875
 876 This linked list is stored in the parse context so that "reduce"
 877 functions can find or add variables, and so the analysis phase can
 878 ensure that every variable gets a type.
 879
 880 ###### parse context
 881
 882         struct binding *varlist;  // In lexical order
 883
 884 ###### ast functions
 885
 886         static struct binding *find_binding(struct parse_context *c, struct text s)
 887         {
 888                 struct binding **l = &c->varlist;
 889                 struct binding *n;
 890                 int cmp = 1;
 891
 892                 while (*l &&
 893                         (cmp = text_cmp((*l)->name, s)) < 0)
 894                                 l = & (*l)->next;
 895                 if (cmp == 0)
 896                         return *l;
 897                 n = calloc(1, sizeof(*n));
 898                 n->name = s;
 899                 n->next = *l;
 900                 *l = n;
 901                 return n;
 902         }
 903
 904 Each name can be linked to multiple variables defined in different
 905 scopes.  Each scope starts where the name is declared and continues
 906 until the end of the containing code block.  Scopes of a given name
 907 cannot nest, so a declaration while a name is in-scope is an error.
 908
 909 ###### binding fields
 910         struct variable *var;
 911
 912 ###### ast
 913         struct variable {
 914                 struct variable *previous;
 915                 struct value val;
 916                 struct binding *name;
 917                 struct exec *where_decl;// where name was declared
 918                 struct exec *where_set; // where type was set
 919                 ## variable fields
 920         };
 921
 922 While the naming seems strange, we include local constants in the
 923 definition of variables.  A name declared `var := value` can
 924 subsequently be changed, but a name declared `var ::= value` cannot -
 925 it is constant
 926
 927 ###### variable fields
 928         int constant;
 929
 930 Scopes in parallel branches can be partially merged.  More
 931 specifically, if a given name is declared in both branches of an
 932 if/else then its scope is a candidate for merging.  Similarly if
 933 every branch of an exhaustive switch (e.g. has an "else" clause)
 934 declares a given name, then the scopes from the branches are
 935 candidates for merging.
 936
 937 Note that names declared inside a loop (which is only parallel to
 938 itself) are never visible after the loop.  Similarly names defined in
 939 scopes which are not parallel, such as those started by `for` and
 940 `switch`, are never visible after the scope.  Only variables defined in
 941 both `then` and `else` (including the implicit then after an `if`, and
 942 excluding `then` used with `for`) and in all `case`s and `else` of a
 943 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
 944
 945 Labels, which are a bit like variables, follow different rules.
 946 Labels are not explicitly declared, but if an undeclared name appears
 947 in a context where a label is legal, that effectively declares the
 948 name as a label.  The declaration remains in force (or in scope) at
 949 least to the end of the immediately containing block and conditionally
 950 in any larger containing block which does not declare the name in some
 951 other way.  Importantly, the conditional scope extension happens even
 952 if the label is only used in one parallel branch of a conditional --
 953 when used in one branch it is treated as having been declared in all
 954 branches.
 955
 956 Merge candidates are tentatively visible beyond the end of the
 957 branching statement which creates them.  If the name is used, the
 958 merge is affirmed and they become a single variable visible at the
 959 outer layer.  If not - if it is redeclared first - the merge lapses.
 960
 961 To track scopes we have an extra stack, implemented as a linked list,
 962 which roughly parallels the parse stack and which is used exclusively
 963 for scoping.  When a new scope is opened, a new frame is pushed and
 964 the child-count of the parent frame is incremented.  This child-count
 965 is used to distinguish between the first of a set of parallel scopes,
 966 in which declared variables must not be in scope, and subsequent
 967 branches, whether they must already be conditionally scoped.
 968
 969 To push a new frame *before* any code in the frame is parsed, we need a
 970 grammar reduction.  This is most easily achieved with a grammar
 971 element which derives the empty string, and creates the new scope when
 972 it is recognized.  This can be placed, for example, between a keyword
 973 like "if" and the code following it.
 974
 975 ###### ast
 976         struct scope {
 977                 struct scope *parent;
 978                 int child_count;
 979         };
 980
 981 ###### parse context
 982         int scope_depth;
 983         struct scope *scope_stack;
 984
 985 ###### ast functions
 986         static void scope_pop(struct parse_context *c)
 987         {
 988                 struct scope *s = c->scope_stack;
 989
 990                 c->scope_stack = s->parent;
 991                 free(s);
 992                 c->scope_depth -= 1;
 993         }
 994
 995         static void scope_push(struct parse_context *c)
 996         {
 997                 struct scope *s = calloc(1, sizeof(*s));
 998                 if (c->scope_stack)
 999                         c->scope_stack->child_count += 1;
1000                 s->parent = c->scope_stack;
1001                 c->scope_stack = s;
1002                 c->scope_depth += 1;
1003         }
1004
1005 ###### Grammar
1006
1007         $void
1008         OpenScope -> ${ scope_push(c); }$
1009
1010 Each variable records a scope depth and is in one of four states:
1011
1012 - "in scope".  This is the case between the declaration of the
1013   variable and the end of the containing block, and also between
1014   the usage with affirms a merge and the end of that block.
1015
1016   The scope depth is not greater than the current parse context scope
1017   nest depth.  When the block of that depth closes, the state will
1018   change.  To achieve this, all "in scope" variables are linked
1019   together as a stack in nesting order.
1020
1021 - "pending".  The "in scope" block has closed, but other parallel
1022   scopes are still being processed.  So far, every parallel block at
1023   the same level that has closed has declared the name.
1024
1025   The scope depth is the depth of the last parallel block that
1026   enclosed the declaration, and that has closed.
1027
1028 - "conditionally in scope".  The "in scope" block and all parallel
1029   scopes have closed, and no further mention of the name has been
1030   seen.  This state includes a secondary nest depth which records the
1031   outermost scope seen since the variable became conditionally in
1032   scope.  If a use of the name is found, the variable becomes "in
1033   scope" and that secondary depth becomes the recorded scope depth.
1034   If the name is declared as a new variable, the old variable becomes
1035   "out of scope" and the recorded scope depth stays unchanged.
1036
1037 - "out of scope".  The variable is neither in scope nor conditionally
1038   in scope.  It is permanently out of scope now and can be removed from
1039   the "in scope" stack.
1040
1041 ###### variable fields
1042         int depth, min_depth;
1043         enum { OutScope, PendingScope, CondScope, InScope } scope;
1044         struct variable *in_scope;
1045
1046 ###### parse context
1047
1048         struct variable *in_scope;
1049
1050 All variables with the same name are linked together using the
1051 'previous' link.  Those variable that have
1052 been affirmatively merged all have a 'merged' pointer that points to
1053 one primary variable - the most recently declared instance. When
1054 merging variables, we need to also adjust the 'merged' pointer on any
1055 other variables that had previously been merged with the one that will
1056 no longer be primary.
1057
1058 A variable that is no longer the most recent instance of a name may
1059 still have "pending" scope, if it might still be merged with most
1060 recent instance.  These variables don't really belong in the
1061 "in_scope" list, but are not immediately removed when a new instance
1062 is found.  Instead, they are detected and ignored when considering the
1063 list of in_scope names.
1064
1065 ###### variable fields
1066         struct variable *merged;
1067
1068 ###### ast functions
1069
1070         static void variable_merge(struct variable *primary, struct variable *secondary)
1071         {
1072                 struct variable *v;
1073
1074                 if (primary->merged)
1075                         // shouldn't happen
1076                         primary = primary->merged;
1077
1078                 for (v = primary->previous; v; v=v->previous)
1079                         if (v == secondary || v == secondary->merged ||
1080                             v->merged == secondary ||
1081                             (v->merged && v->merged == secondary->merged)) {
1082                                 v->scope = OutScope;
1083                                 v->merged = primary;
1084                         }
1085         }
1086
1087 ###### free context vars
1088
1089         while (context.varlist) {
1090                 struct binding *b = context.varlist;
1091                 struct variable *v = b->var;
1092                 context.varlist = b->next;
1093                 free(b);
1094                 while (v) {
1095                         struct variable *t = v;
1096
1097                         v = t->previous;
1098                         free_value(t->val);
1099                         if (t->depth == 0)
1100                                 // This is a global constant
1101                                 free_exec(t->where_decl);
1102                         free(t);
1103                 }
1104         }
1105
1106 #### Manipulating Bindings
1107
1108 When a name is conditionally visible, a new declaration discards the
1109 old binding - the condition lapses.  Conversely a usage of the name
1110 affirms the visibility and extends it to the end of the containing
1111 block - i.e. the block that contains both the original declaration and
1112 the latest usage.  This is determined from `min_depth`.  When a
1113 conditionally visible variable gets affirmed like this, it is also
1114 merged with other conditionally visible variables with the same name.
1115
1116 When we parse a variable declaration we either report an error if the
1117 name is currently bound, or create a new variable at the current nest
1118 depth if the name is unbound or bound to a conditionally scoped or
1119 pending-scope variable.  If the previous variable was conditionally
1120 scoped, it and its homonyms becomes out-of-scope.
1121
1122 When we parse a variable reference (including non-declarative
1123 assignment) we report an error if the name is not bound or is bound to
1124 a pending-scope variable; update the scope if the name is bound to a
1125 conditionally scoped variable; or just proceed normally if the named
1126 variable is in scope.
1127
1128 When we exit a scope, any variables bound at this level are either
1129 marked out of scope or pending-scoped, depending on whether the scope
1130 was sequential or parallel.  Here a "parallel" scope means the "then"
1131 or "else" part of a conditional, or any "case" or "else" branch of a
1132 switch.  Other scopes are "sequential".
1133
1134 When exiting a parallel scope we check if there are any variables that
1135 were previously pending and are still visible. If there are, then
1136 there weren't redeclared in the most recent scope, so they cannot be
1137 merged and must become out-of-scope.  If it is not the first of
1138 parallel scopes (based on `child_count`), we check that there was a
1139 previous binding that is still pending-scope.  If there isn't, the new
1140 variable must now be out-of-scope.
1141
1142 When exiting a sequential scope that immediately enclosed parallel
1143 scopes, we need to resolve any pending-scope variables.  If there was
1144 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1145 we need to mark all pending-scope variable as out-of-scope.  Otherwise
1146 all pending-scope variables become conditionally scoped.
1147
1148 ###### ast
1149         enum closetype { CloseSequential, CloseParallel, CloseElse };
1150
1151 ###### ast functions
1152
1153         static struct variable *var_decl(struct parse_context *c, struct text s)
1154         {
1155                 struct binding *b = find_binding(c, s);
1156                 struct variable *v = b->var;
1157
1158                 switch (v ? v->scope : OutScope) {
1159                 case InScope:
1160                         /* Caller will report the error */
1161                         return NULL;
1162                 case CondScope:
1163                         for (;
1164                              v && v->scope == CondScope;
1165                              v = v->previous)
1166                                 v->scope = OutScope;
1167                         break;
1168                 default: break;
1169                 }
1170                 v = calloc(1, sizeof(*v));
1171                 v->previous = b->var;
1172                 b->var = v;
1173                 v->name = b;
1174                 v->min_depth = v->depth = c->scope_depth;
1175                 v->scope = InScope;
1176                 v->in_scope = c->in_scope;
1177                 c->in_scope = v;
1178                 v->val = val_prepare(NULL);
1179                 return v;
1180         }
1181
1182         static struct variable *var_ref(struct parse_context *c, struct text s)
1183         {
1184                 struct binding *b = find_binding(c, s);
1185                 struct variable *v = b->var;
1186                 struct variable *v2;
1187
1188                 switch (v ? v->scope : OutScope) {
1189                 case OutScope:
1190                 case PendingScope:
1191                         /* Caller will report the error */
1192                         return NULL;
1193                 case CondScope:
1194                         /* All CondScope variables of this name need to be merged
1195                          * and become InScope
1196                          */
1197                         v->depth = v->min_depth;
1198                         v->scope = InScope;
1199                         for (v2 = v->previous;
1200                              v2 && v2->scope == CondScope;
1201                              v2 = v2->previous)
1202                                 variable_merge(v, v2);
1203                         break;
1204                 case InScope:
1205                         break;
1206                 }
1207                 return v;
1208         }
1209
1210         static void var_block_close(struct parse_context *c, enum closetype ct)
1211         {
1212                 /* Close off all variables that are in_scope */
1213                 struct variable *v, **vp, *v2;
1214
1215                 scope_pop(c);
1216                 for (vp = &c->in_scope;
1217                      v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1218                      ) {
1219                         if (v->name->var == v) switch (ct) {
1220                         case CloseElse:
1221                         case CloseParallel: /* handle PendingScope */
1222                                 switch(v->scope) {
1223                                 case InScope:
1224                                 case CondScope:
1225                                         if (c->scope_stack->child_count == 1)
1226                                                 v->scope = PendingScope;
1227                                         else if (v->previous &&
1228                                                  v->previous->scope == PendingScope)
1229                                                 v->scope = PendingScope;
1230                                         else if (v->val.type == Tlabel)
1231                                                 v->scope = PendingScope;
1232                                         else if (v->name->var == v)
1233                                                 v->scope = OutScope;
1234                                         if (ct == CloseElse) {
1235                                                 /* All Pending variables with this name
1236                                                  * are now Conditional */
1237                                                 for (v2 = v;
1238                                                      v2 && v2->scope == PendingScope;
1239                                                      v2 = v2->previous)
1240                                                         v2->scope = CondScope;
1241                                         }
1242                                         break;
1243                                 case PendingScope:
1244                                         for (v2 = v;
1245                                              v2 && v2->scope == PendingScope;
1246                                              v2 = v2->previous)
1247                                                 if (v2->val.type != Tlabel)
1248                                                         v2->scope = OutScope;
1249                                         break;
1250                                 case OutScope: break;
1251                                 }
1252                                 break;
1253                         case CloseSequential:
1254                                 if (v->val.type == Tlabel)
1255                                         v->scope = PendingScope;
1256                                 switch (v->scope) {
1257                                 case InScope:
1258                                         v->scope = OutScope;
1259                                         break;
1260                                 case PendingScope:
1261                                         /* There was no 'else', so we can only become
1262                                          * conditional if we know the cases were exhaustive,
1263                                          * and that doesn't mean anything yet.
1264                                          * So only labels become conditional..
1265                                          */
1266                                         for (v2 = v;
1267                                              v2 && v2->scope == PendingScope;
1268                                              v2 = v2->previous)
1269                                                 if (v2->val.type == Tlabel) {
1270                                                         v2->scope = CondScope;
1271                                                         v2->min_depth = c->scope_depth;
1272                                                 } else
1273                                                         v2->scope = OutScope;
1274                                         break;
1275                                 case CondScope:
1276                                 case OutScope: break;
1277                                 }
1278                                 break;
1279                         }
1280                         if (v->scope == OutScope || v->name->var != v)
1281                                 *vp = v->in_scope;
1282                         else
1283                                 vp = &v->in_scope;
1284                 }
1285         }
1286
1287 ### Executables
1288
1289 Executables can be lots of different things.  In many cases an
1290 executable is just an operation combined with one or two other
1291 executables.  This allows for expressions and lists etc.  Other times
1292 an executable is something quite specific like a constant or variable
1293 name.  So we define a `struct exec` to be a general executable with a
1294 type, and a `struct binode` which is a subclass of `exec`, forms a
1295 node in a binary tree, and holds an operation. There will be other
1296 subclasses, and to access these we need to be able to `cast` the
1297 `exec` into the various other types.
1298
1299 ###### macros
1300         #define cast(structname, pointer) ({            \
1301                 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1302                 if (__mptr && *__mptr != X##structname) abort();                \
1303                 (struct structname *)( (char *)__mptr);})
1304
1305         #define new(structname) ({                                              \
1306                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1307                 __ptr->type = X##structname;                                            \
1308                 __ptr->line = -1; __ptr->column = -1;                                   \
1309                 __ptr;})
1310
1311         #define new_pos(structname, token) ({                                           \
1312                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1313                 __ptr->type = X##structname;                                            \
1314                 __ptr->line = token.line; __ptr->column = token.col;                    \
1315                 __ptr;})
1316
1317 ###### ast
1318         enum exec_types {
1319                 Xbinode,
1320                 ## exec type
1321         };
1322         struct exec {
1323                 enum exec_types type;
1324                 int line, column;
1325         };
1326         struct binode {
1327                 struct exec;
1328                 enum Btype {
1329                         ## Binode types
1330                 } op;
1331                 struct exec *left, *right;
1332         };
1333
1334 ###### ast functions
1335
1336         static int __fput_loc(struct exec *loc, FILE *f)
1337         {
1338                 if (!loc)
1339                         return 0;               // NOTEST
1340                 if (loc->line >= 0) {
1341                         fprintf(f, "%d:%d: ", loc->line, loc->column);
1342                         return 1;
1343                 }
1344                 if (loc->type == Xbinode)
1345                         return __fput_loc(cast(binode,loc)->left, f) ||
1346                                __fput_loc(cast(binode,loc)->right, f);
1347                 return 0;
1348         }
1349         static void fput_loc(struct exec *loc, FILE *f)
1350         {
1351                 if (!__fput_loc(loc, f))
1352                         fprintf(f, "??:??: ");  // NOTEST
1353         }
1354
1355 Each different type of `exec` node needs a number of functions
1356 defined, a bit like methods.  We must be able to be able to free it,
1357 print it, analyse it and execute it.  Once we have specific `exec`
1358 types we will need to parse them too.  Let's take this a bit more
1359 slowly.
1360
1361 #### Freeing
1362
1363 The parser generator requires a `free_foo` function for each struct
1364 that stores attributes and they will often be `exec`s and subtypes
1365 there-of.  So we need `free_exec` which can handle all the subtypes,
1366 and we need `free_binode`.
1367
1368 ###### ast functions
1369
1370         static void free_binode(struct binode *b)
1371         {
1372                 if (!b)
1373                         return;
1374                 free_exec(b->left);
1375                 free_exec(b->right);
1376                 free(b);
1377         }
1378
1379 ###### core functions
1380         static void free_exec(struct exec *e)
1381         {
1382                 if (!e)
1383                         return;
1384                 switch(e->type) {
1385                         ## free exec cases
1386                 }
1387         }
1388
1389 ###### forward decls
1390
1391         static void free_exec(struct exec *e);
1392
1393 ###### free exec cases
1394         case Xbinode: free_binode(cast(binode, e)); break;
1395
1396 #### Printing
1397
1398 Printing an `exec` requires that we know the current indent level for
1399 printing line-oriented components.  As will become clear later, we
1400 also want to know what sort of bracketing to use.
1401
1402 ###### ast functions
1403
1404         static void do_indent(int i, char *str)
1405         {
1406                 while (i--)
1407                         printf("    ");
1408                 printf("%s", str);
1409         }
1410
1411 ###### core functions
1412         static void print_binode(struct binode *b, int indent, int bracket)
1413         {
1414                 struct binode *b2;
1415                 switch(b->op) {
1416                 ## print binode cases
1417                 }
1418         }
1419
1420         static void print_exec(struct exec *e, int indent, int bracket)
1421         {
1422                 if (!e)
1423                         return;         // NOTEST
1424                 switch (e->type) {
1425                 case Xbinode:
1426                         print_binode(cast(binode, e), indent, bracket); break;
1427                 ## print exec cases
1428                 }
1429         }
1430
1431 ###### forward decls
1432
1433         static void print_exec(struct exec *e, int indent, int bracket);
1434
1435 #### Analysing
1436
1437 As discussed, analysis involves propagating type requirements around
1438 the program and looking for errors.
1439
1440 So `propagate_types` is passed an expected type (being a `struct type`
1441 pointer together with some `val_rules` flags) that the `exec` is
1442 expected to return, and returns the type that it does return, either
1443 of which can be `NULL` signifying "unknown".  An `ok` flag is passed
1444 by reference. It is set to `0` when an error is found, and `2` when
1445 any change is made.  If it remains unchanged at `1`, then no more
1446 propagation is needed.
1447
1448 ###### ast
1449
1450         enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1451
1452 ###### format cases
1453         case 'r':
1454                 if (rules & Rnolabel)
1455                         fputs(" (labels not permitted)", stderr);
1456                 break;
1457
1458 ###### core functions
1459
1460         static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1461                                             struct type *type, int rules);
1462         static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1463                                               struct type *type, int rules)
1464         {
1465                 struct type *t;
1466
1467                 if (!prog)
1468                         return Tnone;
1469
1470                 switch (prog->type) {
1471                 case Xbinode:
1472                 {
1473                         struct binode *b = cast(binode, prog);
1474                         switch (b->op) {
1475                         ## propagate binode cases
1476                         }
1477                         break;
1478                 }
1479                 ## propagate exec cases
1480                 }
1481                 return Tnone;
1482         }
1483
1484         static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1485                                             struct type *type, int rules)
1486         {
1487                 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1488
1489                 if (c->parse_error)
1490                         *ok = 0;
1491                 return ret;
1492         }
1493
1494 #### Interpreting
1495
1496 Interpreting an `exec` doesn't require anything but the `exec`.  State
1497 is stored in variables and each variable will be directly linked from
1498 within the `exec` tree.  The exception to this is the whole `program`
1499 which needs to look at command line arguments.  The `program` will be
1500 interpreted separately.
1501
1502 Each `exec` can return a value, which may be `Tnone` but must be
1503 non-NULL;  Some `exec`s will return the location of a value, which can
1504 be updates.  To support this, each exec case must store either a value
1505 in `val` or the pointer to a value in `lval`.  If `lval` is set, but a
1506 simple value is required, `inter_exec()` will dereference `lval` to
1507 get the value.
1508
1509 ###### core functions
1510
1511         struct lrval {
1512                 struct value val, *lval;
1513         };
1514
1515         static struct lrval _interp_exec(struct exec *e);
1516
1517         static struct value interp_exec(struct exec *e)
1518         {
1519                 struct lrval ret = _interp_exec(e);
1520
1521                 if (ret.lval)
1522                         return dup_value(*ret.lval);
1523                 else
1524                         return ret.val;
1525         }
1526
1527         static struct value *linterp_exec(struct exec *e)
1528         {
1529                 struct lrval ret = _interp_exec(e);
1530
1531                 return ret.lval;
1532         }
1533
1534         static struct lrval _interp_exec(struct exec *e)
1535         {
1536                 struct lrval ret;
1537                 struct value rv, *lrv = NULL;
1538                 rv.type = Tnone;
1539                 if (!e) {
1540                         ret.lval = lrv;
1541                         ret.val = rv;
1542                         return ret;
1543                 }
1544
1545                 switch(e->type) {
1546                 case Xbinode:
1547                 {
1548                         struct binode *b = cast(binode, e);
1549                         struct value left, right, *lleft;
1550                         left.type = right.type = Tnone;
1551                         switch (b->op) {
1552                         ## interp binode cases
1553                         }
1554                         free_value(left); free_value(right);
1555                         break;
1556                 }
1557                 ## interp exec cases
1558                 }
1559                 ret.lval = lrv;
1560                 ret.val = rv;
1561                 return ret;
1562         }
1563
1564 ### Complex types
1565
1566 Now that we have the shape of the interpreter in place we can add some
1567 complex types and connected them in to the data structures and the
1568 different phases of parse, analyse, print, interpret.
1569
1570 Thus far we have arrays and structs.
1571
1572 #### Arrays
1573
1574 Arrays can be declared by giving a size and a type, as `[size]type' so
1575 `freq:[26]number` declares `freq` to be an array of 26 numbers.  The
1576 size can be an arbitrary expression which is evaluated when the name
1577 comes into scope.
1578
1579 Arrays cannot be assigned.  When pointers are introduced we will also
1580 introduce array slices which can refer to part or all of an array -
1581 the assignment syntax will create a slice.  For now, an array can only
1582 ever be referenced by the name it is declared with.  It is likely that
1583 a "`copy`" primitive will eventually be define which can be used to
1584 make a copy of an array with controllable depth.
1585
1586 ###### type union fields
1587
1588         struct {
1589                 int size;
1590                 struct variable *vsize;
1591                 struct type *member;
1592         } array;
1593
1594 ###### value union fields
1595         struct {
1596                 struct value *elmnts;
1597         } array;
1598
1599 ###### value functions
1600
1601         static struct value array_prepare(struct type *type)
1602         {
1603                 struct value ret;
1604
1605                 ret.type = type;
1606                 ret.array.elmnts = NULL;
1607                 return ret;
1608         }
1609
1610         static struct value array_init(struct type *type)
1611         {
1612                 struct value ret;
1613                 int i;
1614
1615                 ret.type = type;
1616                 if (type->array.vsize) {
1617                         mpz_t q;
1618                         mpz_init(q);
1619                         mpz_tdiv_q(q, mpq_numref(type->array.vsize->val.num),
1620                                    mpq_denref(type->array.vsize->val.num));
1621                         type->array.size = mpz_get_si(q);
1622                         mpz_clear(q);
1623                 }
1624                 ret.array.elmnts = calloc(type->array.size,
1625                                           sizeof(ret.array.elmnts[0]));
1626                 for (i = 0; ret.array.elmnts && i < type->array.size; i++)
1627                         ret.array.elmnts[i] = val_init(type->array.member);
1628                 return ret;
1629         }
1630
1631         static void array_free(struct value val)
1632         {
1633                 int i;
1634
1635                 if (val.array.elmnts)
1636                         for (i = 0; i < val.type->array.size; i++)
1637                                 free_value(val.array.elmnts[i]);
1638                 free(val.array.elmnts);
1639         }
1640
1641         static int array_compat(struct type *require, struct type *have)
1642         {
1643                 if (have->compat != require->compat)
1644                         return 0;
1645                 /* Both are arrays, so we can look at details */
1646                 if (!type_compat(require->array.member, have->array.member, 0))
1647                         return 0;
1648                 if (require->array.vsize == NULL && have->array.vsize == NULL)
1649                         return require->array.size == have->array.size;
1650
1651                 return require->array.vsize == have->array.vsize;
1652         }
1653
1654         static void array_print_type(struct type *type, FILE *f)
1655         {
1656                 fputs("[", f);
1657                 if (type->array.vsize) {
1658                         struct binding *b = type->array.vsize->name;
1659                         fprintf(f, "%.*s]", b->name.len, b->name.txt);
1660                 } else
1661                         fprintf(f, "%d]", type->array.size);
1662                 type_print(type->array.member, f);
1663         }
1664
1665         static struct type array_prototype = {
1666                 .prepare = array_prepare,
1667                 .init = array_init,
1668                 .print_type = array_print_type,
1669                 .compat = array_compat,
1670                 .free = array_free,
1671         };
1672
1673 ###### type grammar
1674
1675         | [ NUMBER ] Type ${
1676                 $0 = calloc(1, sizeof(struct type));
1677                 *($0) = array_prototype;
1678                 $0->array.member = $<4;
1679                 $0->array.vsize = NULL;
1680                 {
1681                 char tail[3];
1682                 mpq_t num;
1683                 if (number_parse(num, tail, $2.txt) == 0)
1684                         tok_err(c, "error: unrecognised number", &$2);
1685                 else if (tail[0])
1686                         tok_err(c, "error: unsupported number suffix", &$2);
1687                 else {
1688                         $0->array.size = mpz_get_ui(mpq_numref(num));
1689                         if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1690                                 tok_err(c, "error: array size must be an integer",
1691                                         &$2);
1692                         } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1693                                 tok_err(c, "error: array size is too large",
1694                                         &$2);
1695                         mpq_clear(num);
1696                 }
1697                 $0->next= c->anon_typelist;
1698                 c->anon_typelist = $0;
1699                 }
1700         }$
1701
1702         | [ IDENTIFIER ] Type ${ {
1703                 struct variable *v = var_ref(c, $2.txt);
1704
1705                 if (!v)
1706                         tok_err(c, "error: name undeclared", &$2);
1707                 else if (!v->constant)
1708                         tok_err(c, "error: array size must be a constant", &$2);
1709
1710                 $0 = calloc(1, sizeof(struct type));
1711                 *($0) = array_prototype;
1712                 $0->array.member = $<4;
1713                 $0->array.size = 0;
1714                 $0->array.vsize = v;
1715                 $0->next= c->anon_typelist;
1716                 c->anon_typelist = $0;
1717         } }$
1718
1719 ###### parse context
1720
1721         struct type *anon_typelist;
1722
1723 ###### free context types
1724
1725         while (context.anon_typelist) {
1726                 struct type *t = context.anon_typelist;
1727
1728                 context.anon_typelist = t->next;
1729                 free(t);
1730         }
1731
1732 ###### Binode types
1733         Index,
1734
1735 ###### variable grammar
1736
1737         | Variable [ Expression ] ${ {
1738                 struct binode *b = new(binode);
1739                 b->op = Index;
1740                 b->left = $<1;
1741                 b->right = $<3;
1742                 $0 = b;
1743         } }$
1744
1745 ###### print binode cases
1746         case Index:
1747                 print_exec(b->left, -1, bracket);
1748                 printf("[");
1749                 print_exec(b->right, -1, bracket);
1750                 printf("]");
1751                 break;
1752
1753 ###### propagate binode cases
1754         case Index:
1755                 /* left must be an array, right must be a number,
1756                  * result is the member type of the array
1757                  */
1758                 propagate_types(b->right, c, ok, Tnum, 0);
1759                 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1760                 if (!t || t->compat != array_compat) {
1761                         type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1762                         return NULL;
1763                 } else {
1764                         if (!type_compat(type, t->array.member, rules)) {
1765                                 type_err(c, "error: have %1 but need %2", prog,
1766                                          t->array.member, rules, type);
1767                         }
1768                         return t->array.member;
1769                 }
1770                 break;
1771
1772 ###### interp binode cases
1773         case Index: {
1774                 mpz_t q;
1775                 long i;
1776
1777                 lleft = linterp_exec(b->left);
1778                 right = interp_exec(b->right);
1779                 mpz_init(q);
1780                 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1781                 i = mpz_get_si(q);
1782                 mpz_clear(q);
1783
1784                 if (i >= 0 && i < lleft->type->array.size)
1785                         lrv = &lleft->array.elmnts[i];
1786                 else
1787                         rv = val_init(lleft->type->array.member);
1788                 break;
1789         }
1790
1791 #### Structs
1792
1793 A `struct` is a data-type that contains one or more other data-types.
1794 It differs from an array in that each member can be of a different
1795 type, and they are accessed by name rather than by number.  Thus you
1796 cannot choose an element by calculation, you need to know what you
1797 want up-front.
1798
1799 The language makes no promises about how a given structure will be
1800 stored in memory - it is free to rearrange fields to suit whatever
1801 criteria seems important.
1802
1803 Structs are declared separately from program code - they cannot be
1804 declared in-line in a variable declaration like arrays can.  A struct
1805 is given a name and this name is used to identify the type - the name
1806 is not prefixed by the word `struct` as it would be in C.
1807
1808 Structs are only treated as the same if they have the same name.
1809 Simply having the same fields in the same order is not enough.  This
1810 might change once we can create structure initializes from a list of
1811 values.
1812
1813 Each component datum is identified much like a variable is declared,
1814 with a name, one or two colons, and a type.  The type cannot be omitted
1815 as there is no opportunity to deduce the type from usage.  An initial
1816 value can be given following an equals sign, so
1817
1818 ##### Example: a struct type
1819
1820         struct complex:
1821                 x:number = 0
1822                 y:number = 0
1823
1824 would declare a type called "complex" which has two number fields,
1825 each initialised to zero.
1826
1827 Struct will need to be declared separately from the code that uses
1828 them, so we will need to be able to print out the declaration of a
1829 struct when reprinting the whole program.  So a `print_type_decl` type
1830 function will be needed.
1831
1832 ###### type union fields
1833
1834         struct {
1835                 int nfields;
1836                 struct field {
1837                         struct text name;
1838                         struct type *type;
1839                         struct value init;
1840                 } *fields;
1841         } structure;
1842
1843 ###### value union fields
1844         struct {
1845                 struct value *fields;
1846         } structure;
1847
1848 ###### type functions
1849         void (*print_type_decl)(struct type *type, FILE *f);
1850
1851 ###### value functions
1852
1853         static struct value structure_prepare(struct type *type)
1854         {
1855                 struct value ret;
1856
1857                 ret.type = type;
1858                 ret.structure.fields = NULL;
1859                 return ret;
1860         }
1861
1862         static struct value structure_init(struct type *type)
1863         {
1864                 struct value ret;
1865                 int i;
1866
1867                 ret.type = type;
1868                 ret.structure.fields = calloc(type->structure.nfields,
1869                                               sizeof(ret.structure.fields[0]));
1870                 for (i = 0; ret.structure.fields && i < type->structure.nfields; i++)
1871                         ret.structure.fields[i] = val_init(type->structure.fields[i].type);
1872                 return ret;
1873         }
1874
1875         static void structure_free(struct value val)
1876         {
1877                 int i;
1878
1879                 if (val.structure.fields)
1880                         for (i = 0; i < val.type->structure.nfields; i++)
1881                                 free_value(val.structure.fields[i]);
1882                 free(val.structure.fields);
1883         }
1884
1885         static void structure_free_type(struct type *t)
1886         {
1887                 int i;
1888                 for (i = 0; i < t->structure.nfields; i++)
1889                         free_value(t->structure.fields[i].init);
1890                 free(t->structure.fields);
1891         }
1892
1893         static struct type structure_prototype = {
1894                 .prepare = structure_prepare,
1895                 .init = structure_init,
1896                 .free = structure_free,
1897                 .free_type = structure_free_type,
1898                 .print_type_decl = structure_print_type,
1899         };
1900
1901 ###### exec type
1902         Xfieldref,
1903
1904 ###### ast
1905         struct fieldref {
1906                 struct exec;
1907                 struct exec *left;
1908                 int index;
1909                 struct text name;
1910         };
1911
1912 ###### free exec cases
1913         case Xfieldref:
1914                 free_exec(cast(fieldref, e)->left);
1915                 free(e);
1916                 break;
1917
1918 ###### variable grammar
1919
1920         | Variable . IDENTIFIER ${ {
1921                 struct fieldref *fr = new_pos(fieldref, $2);
1922                 fr->left = $<1;
1923                 fr->name = $3.txt;
1924                 fr->index = -2;
1925                 $0 = fr;
1926         } }$
1927
1928 ###### print exec cases
1929
1930         case Xfieldref:
1931         {
1932                 struct fieldref *f = cast(fieldref, e);
1933                 print_exec(f->left, -1, bracket);
1934                 printf(".%.*s", f->name.len, f->name.txt);
1935                 break;
1936         }
1937
1938 ###### ast functions
1939         static int find_struct_index(struct type *type, struct text field)
1940         {
1941                 int i;
1942                 for (i = 0; i < type->structure.nfields; i++)
1943                         if (text_cmp(type->structure.fields[i].name, field) == 0)
1944                                 return i;
1945                 return -1;
1946         }
1947
1948 ###### propagate exec cases
1949
1950         case Xfieldref:
1951         {
1952                 struct fieldref *f = cast(fieldref, prog);
1953                 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1954
1955                 if (!st)
1956                         type_err(c, "error: unknown type for field access", f->left,
1957                                  NULL, 0, NULL);
1958                 else if (st->prepare != structure_prepare)
1959                         type_err(c, "error: field reference attempted on %1, not a struct",
1960                                  f->left, st, 0, NULL);
1961                 else if (f->index == -2) {
1962                         f->index = find_struct_index(st, f->name);
1963                         if (f->index < 0)
1964                                 type_err(c, "error: cannot find requested field in %1",
1965                                          f->left, st, 0, NULL);
1966                 }
1967                 if (f->index >= 0) {
1968                         struct type *ft = st->structure.fields[f->index].type;
1969                         if (!type_compat(type, ft, rules))
1970                                 type_err(c, "error: have %1 but need %2", prog,
1971                                          ft, rules, type);
1972                         return ft;
1973                 }
1974                 break;
1975         }
1976
1977 ###### interp exec cases
1978         case Xfieldref:
1979         {
1980                 struct fieldref *f = cast(fieldref, e);
1981                 struct value *lleft = linterp_exec(f->left);
1982                 lrv = &lleft->structure.fields[f->index];
1983                 break;
1984         }
1985
1986 ###### ast
1987         struct fieldlist {
1988                 struct fieldlist *prev;
1989                 struct field f;
1990         };
1991
1992 ###### ast functions
1993         static void free_fieldlist(struct fieldlist *f)
1994         {
1995                 if (!f)
1996                         return;
1997                 free_fieldlist(f->prev);
1998                 free_value(f->f.init);
1999                 free(f);
2000         }
2001
2002 ###### top level grammar
2003         DeclareStruct -> struct IDENTIFIER FieldBlock ${ {
2004                 struct type *t =
2005                         add_type(c, $2.txt, &structure_prototype);
2006                 int cnt = 0;
2007                 struct fieldlist *f;
2008
2009                 for (f = $3; f; f=f->prev)
2010                         cnt += 1;
2011
2012                 t->structure.nfields = cnt;
2013                 t->structure.fields = calloc(cnt, sizeof(struct field));
2014                 f = $3;
2015                 while (cnt > 0) {
2016                         cnt -= 1;
2017                         t->structure.fields[cnt] = f->f;
2018                         f->f.init = val_prepare(Tnone);
2019                         f = f->prev;
2020                 }
2021         } }$
2022
2023         $void
2024         Open -> {
2025                 | NEWLINE Open
2026         Close -> }
2027                 | NEWLINE Close
2028         $*fieldlist
2029         FieldBlock -> Open FieldList Close ${ $0 = $<2; }$
2030                 | Open SimpleFieldList } ${ $0 = $<2; }$
2031                 | : FieldList  ${ $0 = $<2; }$
2032
2033         FieldList -> SimpleFieldList NEWLINE ${ $0 = $<1; }$
2034                 | FieldList SimpleFieldList NEWLINE ${
2035                         $2->prev = $<1;
2036                         $0 = $<2;
2037                 }$
2038
2039         SimpleFieldList -> Field ${ $0 = $<1; }$
2040                 | SimpleFieldList ; Field ${
2041                         $3->prev = $<1;
2042                         $0 = $<3;
2043                 }$
2044                 | SimpleFieldList ; ${
2045                         $0 = $<1;
2046                 }$
2047
2048         Field -> IDENTIFIER : Type = Expression ${ {
2049                         int ok;
2050
2051                         $0 = calloc(1, sizeof(struct fieldlist));
2052                         $0->f.name = $1.txt;
2053                         $0->f.type = $<3;
2054                         $0->f.init = val_prepare($0->f.type);
2055                         do {
2056                                 ok = 1;
2057                                 propagate_types($<5, c, &ok, $3, 0);
2058                         } while (ok == 2);
2059                         if (!ok)
2060                                 c->parse_error = 1;
2061                         else
2062                                 $0->f.init = interp_exec($5);
2063                 } }$
2064                 | IDENTIFIER : Type ${
2065                         $0 = calloc(1, sizeof(struct fieldlist));
2066                         $0->f.name = $1.txt;
2067                         $0->f.type = $<3;
2068                         $0->f.init = val_init($3);
2069                 }$
2070                 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2071
2072 ###### forward decls
2073         static void structure_print_type(struct type *t, FILE *f);
2074
2075 ###### value functions
2076         static void structure_print_type(struct type *t, FILE *f)
2077         {
2078                 int i;
2079
2080                 fprintf(f, "struct %.*s:\n", t->name.len, t->name.txt);
2081
2082                 for (i = 0; i < t->structure.nfields; i++) {
2083                         struct field *fl = t->structure.fields + i;
2084                         fprintf(f, "    %.*s : ", fl->name.len, fl->name.txt);
2085                         type_print(fl->type, f);
2086                         if (fl->init.type->print) {
2087                                 fprintf(f, " = ");
2088                                 if (fl->init.type == Tstr)
2089                                         fprintf(f, "\"");
2090                                 print_value(fl->init);
2091                                 if (fl->init.type == Tstr)
2092                                         fprintf(f, "\"");
2093                         }
2094                         printf("\n");
2095                 }
2096         }
2097
2098 ###### print type decls
2099         {
2100                 struct type *t;
2101                 int target = -1;
2102
2103                 while (target != 0) {
2104                         int i = 0;
2105                         for (t = context.typelist; t ; t=t->next)
2106                                 if (t->print_type_decl) {
2107                                         i += 1;
2108                                         if (i == target)
2109                                                 break;
2110                                 }
2111
2112                         if (target == -1) {
2113                                 target = i;
2114                         } else {
2115                                 t->print_type_decl(t, stdout);
2116                                 target -= 1;
2117                         }
2118                 }
2119         }
2120
2121 ## Executables: the elements of code
2122
2123 Each code element needs to be parsed, printed, analysed,
2124 interpreted, and freed.  There are several, so let's just start with
2125 the easy ones and work our way up.
2126
2127 ### Values
2128
2129 We have already met values as separate objects.  When manifest
2130 constants appear in the program text, that must result in an executable
2131 which has a constant value.  So the `val` structure embeds a value in
2132 an executable.
2133
2134 ###### exec type
2135         Xval,
2136
2137 ###### ast
2138         struct val {
2139                 struct exec;
2140                 struct value val;
2141         };
2142
2143 ###### Grammar
2144
2145         $*val
2146         Value ->  True ${
2147                         $0 = new_pos(val, $1);
2148                         $0->val.type = Tbool;
2149                         $0->val.bool = 1;
2150                         }$
2151                 | False ${
2152                         $0 = new_pos(val, $1);
2153                         $0->val.type = Tbool;
2154                         $0->val.bool = 0;
2155                         }$
2156                 | NUMBER ${
2157                         $0 = new_pos(val, $1);
2158                         $0->val.type = Tnum;
2159                         {
2160                         char tail[3];
2161                         if (number_parse($0->val.num, tail, $1.txt) == 0)
2162                                 mpq_init($0->val.num);
2163                                 if (tail[0])
2164                                         tok_err(c, "error: unsupported number suffix",
2165                                                 &$1);
2166                         }
2167                         }$
2168                 | STRING ${
2169                         $0 = new_pos(val, $1);
2170                         $0->val.type = Tstr;
2171                         {
2172                         char tail[3];
2173                         string_parse(&$1, '\\', &$0->val.str, tail);
2174                         if (tail[0])
2175                                 tok_err(c, "error: unsupported string suffix",
2176                                         &$1);
2177                         }
2178                         }$
2179                 | MULTI_STRING ${
2180                         $0 = new_pos(val, $1);
2181                         $0->val.type = Tstr;
2182                         {
2183                         char tail[3];
2184                         string_parse(&$1, '\\', &$0->val.str, tail);
2185                         if (tail[0])
2186                                 tok_err(c, "error: unsupported string suffix",
2187                                         &$1);
2188                         }
2189                         }$
2190
2191 ###### print exec cases
2192         case Xval:
2193         {
2194                 struct val *v = cast(val, e);
2195                 if (v->val.type == Tstr)
2196                         printf("\"");
2197                 print_value(v->val);
2198                 if (v->val.type == Tstr)
2199                         printf("\"");
2200                 break;
2201         }
2202
2203 ###### propagate exec cases
2204         case Xval:
2205         {
2206                 struct val *val = cast(val, prog);
2207                 if (!type_compat(type, val->val.type, rules))
2208                         type_err(c, "error: expected %1%r found %2",
2209                                    prog, type, rules, val->val.type);
2210                 return val->val.type;
2211         }
2212
2213 ###### interp exec cases
2214         case Xval:
2215                 rv = dup_value(cast(val, e)->val);
2216                 break;
2217
2218 ###### ast functions
2219         static void free_val(struct val *v)
2220         {
2221                 if (!v)
2222                         return;
2223                 free_value(v->val);
2224                 free(v);
2225         }
2226
2227 ###### free exec cases
2228         case Xval: free_val(cast(val, e)); break;
2229
2230 ###### ast functions
2231         // Move all nodes from 'b' to 'rv', reversing the order.
2232         // In 'b' 'left' is a list, and 'right' is the last node.
2233         // In 'rv', left' is the first node and 'right' is a list.
2234         static struct binode *reorder_bilist(struct binode *b)
2235         {
2236                 struct binode *rv = NULL;
2237
2238                 while (b) {
2239                         struct exec *t = b->right;
2240                         b->right = rv;
2241                         rv = b;
2242                         if (b->left)
2243                                 b = cast(binode, b->left);
2244                         else
2245                                 b = NULL;
2246                         rv->left = t;
2247                 }
2248                 return rv;
2249         }
2250
2251 ### Variables
2252
2253 Just as we used a `val` to wrap a value into an `exec`, we similarly
2254 need a `var` to wrap a `variable` into an exec.  While each `val`
2255 contained a copy of the value, each `var` hold a link to the variable
2256 because it really is the same variable no matter where it appears.
2257 When a variable is used, we need to remember to follow the `->merged`
2258 link to find the primary instance.
2259
2260 ###### exec type
2261         Xvar,
2262
2263 ###### ast
2264         struct var {
2265                 struct exec;
2266                 struct variable *var;
2267         };
2268
2269 ###### Grammar
2270
2271         $*var
2272         VariableDecl -> IDENTIFIER : ${ {
2273                 struct variable *v = var_decl(c, $1.txt);
2274                 $0 = new_pos(var, $1);
2275                 $0->var = v;
2276                 if (v)
2277                         v->where_decl = $0;
2278                 else {
2279                         v = var_ref(c, $1.txt);
2280                         $0->var = v;
2281                         type_err(c, "error: variable '%v' redeclared",
2282                                  $0, NULL, 0, NULL);
2283                         type_err(c, "info: this is where '%v' was first declared",
2284                                  v->where_decl, NULL, 0, NULL);
2285                 }
2286         } }$
2287             | IDENTIFIER :: ${ {
2288                 struct variable *v = var_decl(c, $1.txt);
2289                 $0 = new_pos(var, $1);
2290                 $0->var = v;
2291                 if (v) {
2292                         v->where_decl = $0;
2293                         v->constant = 1;
2294                 } else {
2295                         v = var_ref(c, $1.txt);
2296                         $0->var = v;
2297                         type_err(c, "error: variable '%v' redeclared",
2298                                  $0, NULL, 0, NULL);
2299                         type_err(c, "info: this is where '%v' was first declared",
2300                                  v->where_decl, NULL, 0, NULL);
2301                 }
2302         } }$
2303             | IDENTIFIER : Type ${ {
2304                 struct variable *v = var_decl(c, $1.txt);
2305                 $0 = new_pos(var, $1);
2306                 $0->var = v;
2307                 if (v) {
2308                         v->where_decl = $0;
2309                         v->where_set = $0;
2310                         v->val = val_prepare($<3);
2311                 } else {
2312                         v = var_ref(c, $1.txt);
2313                         $0->var = v;
2314                         type_err(c, "error: variable '%v' redeclared",
2315                                  $0, NULL, 0, NULL);
2316                         type_err(c, "info: this is where '%v' was first declared",
2317                                  v->where_decl, NULL, 0, NULL);
2318                 }
2319         } }$
2320             | IDENTIFIER :: Type ${ {
2321                 struct variable *v = var_decl(c, $1.txt);
2322                 $0 = new_pos(var, $1);
2323                 $0->var = v;
2324                 if (v) {
2325                         v->where_decl = $0;
2326                         v->where_set = $0;
2327                         v->val = val_prepare($<3);
2328                         v->constant = 1;
2329                 } else {
2330                         v = var_ref(c, $1.txt);
2331                         $0->var = v;
2332                         type_err(c, "error: variable '%v' redeclared",
2333                                  $0, NULL, 0, NULL);
2334                         type_err(c, "info: this is where '%v' was first declared",
2335                                  v->where_decl, NULL, 0, NULL);
2336                 }
2337         } }$
2338
2339         $*exec
2340         Variable -> IDENTIFIER ${ {
2341                 struct variable *v = var_ref(c, $1.txt);
2342                 $0 = new_pos(var, $1);
2343                 if (v == NULL) {
2344                         /* This might be a label - allocate a var just in case */
2345                         v = var_decl(c, $1.txt);
2346                         if (v) {
2347                                 v->val = val_prepare(Tnone);
2348                                 v->where_decl = $0;
2349                                 v->where_set = $0;
2350                         }
2351                 }
2352                 cast(var, $0)->var = v;
2353         } }$
2354         ## variable grammar
2355
2356         $*type
2357         Type -> IDENTIFIER ${
2358                 $0 = find_type(c, $1.txt);
2359                 if (!$0) {
2360                         tok_err(c,
2361                                 "error: undefined type", &$1);
2362
2363                         $0 = Tnone;
2364                 }
2365         }$
2366         ## type grammar
2367
2368 ###### print exec cases
2369         case Xvar:
2370         {
2371                 struct var *v = cast(var, e);
2372                 if (v->var) {
2373                         struct binding *b = v->var->name;
2374                         printf("%.*s", b->name.len, b->name.txt);
2375                 }
2376                 break;
2377         }
2378
2379 ###### format cases
2380         case 'v':
2381                 if (loc->type == Xvar) {
2382                         struct var *v = cast(var, loc);
2383                         if (v->var) {
2384                                 struct binding *b = v->var->name;
2385                                 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2386                         } else
2387                                 fputs("???", stderr);   // NOTEST
2388                 } else
2389                         fputs("NOTVAR", stderr);        // NOTEST
2390                 break;
2391
2392 ###### propagate exec cases
2393
2394         case Xvar:
2395         {
2396                 struct var *var = cast(var, prog);
2397                 struct variable *v = var->var;
2398                 if (!v) {
2399                         type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2400                         return Tnone;                                   // NOTEST
2401                 }
2402                 if (v->merged)
2403                         v = v->merged;
2404                 if (v->constant && (rules & Rnoconstant)) {
2405                         type_err(c, "error: Cannot assign to a constant: %v",
2406                                  prog, NULL, 0, NULL);
2407                         type_err(c, "info: name was defined as a constant here",
2408                                  v->where_decl, NULL, 0, NULL);
2409                         return v->val.type;
2410                 }
2411                 if (v->val.type == Tnone && v->where_decl == prog)
2412                         type_err(c, "error: variable used but not declared: %v",
2413                                  prog, NULL, 0, NULL);
2414                 if (v->val.type == NULL) {
2415                         if (type && *ok != 0) {
2416                                 v->val = val_prepare(type);
2417                                 v->where_set = prog;
2418                                 *ok = 2;
2419                         }
2420                         return type;
2421                 }
2422                 if (!type_compat(type, v->val.type, rules)) {
2423                         type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2424                                  type, rules, v->val.type);
2425                         type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2426                                  v->val.type, rules, NULL);
2427                 }
2428                 if (!type)
2429                         return v->val.type;
2430                 return type;
2431         }
2432
2433 ###### interp exec cases
2434         case Xvar:
2435         {
2436                 struct var *var = cast(var, e);
2437                 struct variable *v = var->var;
2438
2439                 if (v->merged)
2440                         v = v->merged;
2441                 lrv = &v->val;
2442                 break;
2443         }
2444
2445 ###### ast functions
2446
2447         static void free_var(struct var *v)
2448         {
2449                 free(v);
2450         }
2451
2452 ###### free exec cases
2453         case Xvar: free_var(cast(var, e)); break;
2454
2455 ### Expressions: Conditional
2456
2457 Our first user of the `binode` will be conditional expressions, which
2458 is a bit odd as they actually have three components.  That will be
2459 handled by having 2 binodes for each expression.  The conditional
2460 expression is the lowest precedence operatior, so it gets to define
2461 what an "Expression" is.  The next level up is "BoolExpr", which
2462 comes next.
2463
2464 Conditional expressions are of the form "value `if` condition `else`
2465 other_value".  They associate to the right, so everything to the right
2466 of `else` is part of an else value, while only the BoolExpr to the
2467 left of `if` is the if values.  Between `if` and `else` there is no
2468 room for ambiguity, so a full conditional expression is allowed in there.
2469
2470 ###### Binode types
2471         CondExpr,
2472
2473 ###### Grammar
2474
2475         $*exec
2476         Expression -> BoolExpr if Expression else Expression ${ {
2477                         struct binode *b1 = new(binode);
2478                         struct binode *b2 = new(binode);
2479                         b1->op = CondExpr;
2480                         b1->left = $<3;
2481                         b1->right = b2;
2482                         b2->op = CondExpr;
2483                         b2->left = $<1;
2484                         b2->right = $<5;
2485                         $0 = b1;
2486                 } }$
2487                 | BoolExpr ${ $0 = $<1; }$
2488
2489 ###### print binode cases
2490
2491         case CondExpr:
2492                 b2 = cast(binode, b->right);
2493                 if (bracket) printf("(");
2494                 print_exec(b2->left, -1, bracket);
2495                 printf(" if ");
2496                 print_exec(b->left, -1, bracket);
2497                 printf(" else ");
2498                 print_exec(b2->right, -1, bracket);
2499                 if (bracket) printf(")");
2500                 break;
2501
2502 ###### propagate binode cases
2503
2504         case CondExpr: {
2505                 /* cond must be Tbool, others must match */
2506                 struct binode *b2 = cast(binode, b->right);
2507                 struct type *t2;
2508
2509                 propagate_types(b->left, c, ok, Tbool, 0);
2510                 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2511                 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2512                 return t ?: t2;
2513         }
2514
2515 ###### interp binode cases
2516
2517         case CondExpr: {
2518                 struct binode *b2 = cast(binode, b->right);
2519                 left = interp_exec(b->left);
2520                 if (left.bool)
2521                         rv = interp_exec(b2->left);
2522                 else
2523                         rv = interp_exec(b2->right);
2524                 }
2525                 break;
2526
2527 ### Expressions: Boolean
2528
2529 The next class of expressions to use the `binode` will be Boolean
2530 expressions.  As I haven't implemented precedence in the parser
2531 generator yet, we need different names for each precedence level used
2532 by expressions.  The outer most or lowest level precedence after
2533 conditional expressions are Boolean operators which form an `BoolExpr`
2534 out of `BTerm`s and `BFact`s.  As well as `or` `and`, and `not` we
2535 have `and then` and `or else` which only evaluate the second operand
2536 if the result would make a difference.
2537
2538 ###### Binode types
2539         And,
2540         AndThen,
2541         Or,
2542         OrElse,
2543         Not,
2544
2545 ###### Grammar
2546
2547         $*exec
2548         BoolExpr -> BoolExpr or BTerm ${ {
2549                         struct binode *b = new(binode);
2550                         b->op = Or;
2551                         b->left = $<1;
2552                         b->right = $<3;
2553                         $0 = b;
2554                 } }$
2555                 | BoolExpr or else BTerm ${ {
2556                         struct binode *b = new(binode);
2557                         b->op = OrElse;
2558                         b->left = $<1;
2559                         b->right = $<4;
2560                         $0 = b;
2561                 } }$
2562                 | BTerm ${ $0 = $<1; }$
2563
2564         BTerm -> BTerm and BFact ${ {
2565                         struct binode *b = new(binode);
2566                         b->op = And;
2567                         b->left = $<1;
2568                         b->right = $<3;
2569                         $0 = b;
2570                 } }$
2571                 | BTerm and then BFact ${ {
2572                         struct binode *b = new(binode);
2573                         b->op = AndThen;
2574                         b->left = $<1;
2575                         b->right = $<4;
2576                         $0 = b;
2577                 } }$
2578                 | BFact ${ $0 = $<1; }$
2579
2580         BFact -> not BFact ${ {
2581                         struct binode *b = new(binode);
2582                         b->op = Not;
2583                         b->right = $<2;
2584                         $0 = b;
2585                 } }$
2586                 ## other BFact
2587
2588 ###### print binode cases
2589         case And:
2590                 if (bracket) printf("(");
2591                 print_exec(b->left, -1, bracket);
2592                 printf(" and ");
2593                 print_exec(b->right, -1, bracket);
2594                 if (bracket) printf(")");
2595                 break;
2596         case AndThen:
2597                 if (bracket) printf("(");
2598                 print_exec(b->left, -1, bracket);
2599                 printf(" and then ");
2600                 print_exec(b->right, -1, bracket);
2601                 if (bracket) printf(")");
2602                 break;
2603         case Or:
2604                 if (bracket) printf("(");
2605                 print_exec(b->left, -1, bracket);
2606                 printf(" or ");
2607                 print_exec(b->right, -1, bracket);
2608                 if (bracket) printf(")");
2609                 break;
2610         case OrElse:
2611                 if (bracket) printf("(");
2612                 print_exec(b->left, -1, bracket);
2613                 printf(" or else ");
2614                 print_exec(b->right, -1, bracket);
2615                 if (bracket) printf(")");
2616                 break;
2617         case Not:
2618                 if (bracket) printf("(");
2619                 printf("not ");
2620                 print_exec(b->right, -1, bracket);
2621                 if (bracket) printf(")");
2622                 break;
2623
2624 ###### propagate binode cases
2625         case And:
2626         case AndThen:
2627         case Or:
2628         case OrElse:
2629         case Not:
2630                 /* both must be Tbool, result is Tbool */
2631                 propagate_types(b->left, c, ok, Tbool, 0);
2632                 propagate_types(b->right, c, ok, Tbool, 0);
2633                 if (type && type != Tbool)
2634                         type_err(c, "error: %1 operation found where %2 expected", prog,
2635                                    Tbool, 0, type);
2636                 return Tbool;
2637
2638 ###### interp binode cases
2639         case And:
2640                 rv = interp_exec(b->left);
2641                 right = interp_exec(b->right);
2642                 rv.bool = rv.bool && right.bool;
2643                 break;
2644         case AndThen:
2645                 rv = interp_exec(b->left);
2646                 if (rv.bool)
2647                         rv = interp_exec(b->right);
2648                 break;
2649         case Or:
2650                 rv = interp_exec(b->left);
2651                 right = interp_exec(b->right);
2652                 rv.bool = rv.bool || right.bool;
2653                 break;
2654         case OrElse:
2655                 rv = interp_exec(b->left);
2656                 if (!rv.bool)
2657                         rv = interp_exec(b->right);
2658                 break;
2659         case Not:
2660                 rv = interp_exec(b->right);
2661                 rv.bool = !rv.bool;
2662                 break;
2663
2664 ### Expressions: Comparison
2665
2666 Of slightly higher precedence that Boolean expressions are
2667 Comparisons.
2668 A comparison takes arguments of any comparable type, but the two types must be
2669 the same.
2670
2671 To simplify the parsing we introduce an `eop` which can record an
2672 expression operator.
2673
2674 ###### ast
2675         struct eop {
2676                 enum Btype op;
2677         };
2678
2679 ###### ast functions
2680         static void free_eop(struct eop *e)
2681         {
2682                 if (e)
2683                         free(e);
2684         }
2685
2686 ###### Binode types
2687         Less,
2688         Gtr,
2689         LessEq,
2690         GtrEq,
2691         Eql,
2692         NEql,
2693
2694 ###### other BFact
2695         | Expr CMPop Expr ${ {
2696                 struct binode *b = new(binode);
2697                 b->op = $2.op;
2698                 b->left = $<1;
2699                 b->right = $<3;
2700                 $0 = b;
2701         } }$
2702         | Expr ${ $0 = $<1; }$
2703
2704 ###### Grammar
2705
2706         $eop
2707         CMPop ->   < ${ $0.op = Less; }$
2708                 |  > ${ $0.op = Gtr; }$
2709                 |  <= ${ $0.op = LessEq; }$
2710                 |  >= ${ $0.op = GtrEq; }$
2711                 |  == ${ $0.op = Eql; }$
2712                 |  != ${ $0.op = NEql; }$
2713
2714 ###### print binode cases
2715
2716         case Less:
2717         case LessEq:
2718         case Gtr:
2719         case GtrEq:
2720         case Eql:
2721         case NEql:
2722                 if (bracket) printf("(");
2723                 print_exec(b->left, -1, bracket);
2724                 switch(b->op) {
2725                 case Less:   printf(" < "); break;
2726                 case LessEq: printf(" <= "); break;
2727                 case Gtr:    printf(" > "); break;
2728                 case GtrEq:  printf(" >= "); break;
2729                 case Eql:    printf(" == "); break;
2730                 case NEql:   printf(" != "); break;
2731                 default: abort();               // NOTEST
2732                 }
2733                 print_exec(b->right, -1, bracket);
2734                 if (bracket) printf(")");
2735                 break;
2736
2737 ###### propagate binode cases
2738         case Less:
2739         case LessEq:
2740         case Gtr:
2741         case GtrEq:
2742         case Eql:
2743         case NEql:
2744                 /* Both must match but not be labels, result is Tbool */
2745                 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2746                 if (t)
2747                         propagate_types(b->right, c, ok, t, 0);
2748                 else {
2749                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2750                         if (t)
2751                                 t = propagate_types(b->left, c, ok, t, 0);
2752                 }
2753                 if (!type_compat(type, Tbool, 0))
2754                         type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2755                                     Tbool, rules, type);
2756                 return Tbool;
2757
2758 ###### interp binode cases
2759         case Less:
2760         case LessEq:
2761         case Gtr:
2762         case GtrEq:
2763         case Eql:
2764         case NEql:
2765         {
2766                 int cmp;
2767                 left = interp_exec(b->left);
2768                 right = interp_exec(b->right);
2769                 cmp = value_cmp(left, right);
2770                 rv.type = Tbool;
2771                 switch (b->op) {
2772                 case Less:      rv.bool = cmp <  0; break;
2773                 case LessEq:    rv.bool = cmp <= 0; break;
2774                 case Gtr:       rv.bool = cmp >  0; break;
2775                 case GtrEq:     rv.bool = cmp >= 0; break;
2776                 case Eql:       rv.bool = cmp == 0; break;
2777                 case NEql:      rv.bool = cmp != 0; break;
2778                 default: rv.bool = 0; break;    // NOTEST
2779                 }
2780                 break;
2781         }
2782
2783 ### Expressions: The rest
2784
2785 The remaining expressions with the highest precedence are arithmetic
2786 and string concatenation.  They are `Expr`, `Term`, and `Factor`.
2787 The `Factor` is where the `Value` and `Variable` that we already have
2788 are included.
2789
2790 `+` and `-` are both infix and prefix operations (where they are
2791 absolute value and negation).  These have different operator names.
2792
2793 We also have a 'Bracket' operator which records where parentheses were
2794 found.  This makes it easy to reproduce these when printing.  Once
2795 precedence is handled better I might be able to discard this.
2796
2797 ###### Binode types
2798         Plus, Minus,
2799         Times, Divide, Rem,
2800         Concat,
2801         Absolute, Negate,
2802         Bracket,
2803
2804 ###### Grammar
2805
2806         $*exec
2807         Expr -> Expr Eop Term ${ {
2808                         struct binode *b = new(binode);
2809                         b->op = $2.op;
2810                         b->left = $<1;
2811                         b->right = $<3;
2812                         $0 = b;
2813                 } }$
2814                 | Term ${ $0 = $<1; }$
2815
2816         Term -> Term Top Factor ${ {
2817                         struct binode *b = new(binode);
2818                         b->op = $2.op;
2819                         b->left = $<1;
2820                         b->right = $<3;
2821                         $0 = b;
2822                 } }$
2823                 | Factor ${ $0 = $<1; }$
2824
2825         Factor -> ( Expression ) ${ {
2826                         struct binode *b = new_pos(binode, $1);
2827                         b->op = Bracket;
2828                         b->right = $<2;
2829                         $0 = b;
2830                 } }$
2831                 | Uop Factor ${ {
2832                         struct binode *b = new(binode);
2833                         b->op = $1.op;
2834                         b->right = $<2;
2835                         $0 = b;
2836                 } }$
2837                 | Value ${ $0 = $<1; }$
2838                 | Variable ${ $0 = $<1; }$
2839
2840         $eop
2841         Eop ->    + ${ $0.op = Plus; }$
2842                 | - ${ $0.op = Minus; }$
2843
2844         Uop ->    + ${ $0.op = Absolute; }$
2845                 | - ${ $0.op = Negate; }$
2846
2847         Top ->    * ${ $0.op = Times; }$
2848                 | / ${ $0.op = Divide; }$
2849                 | % ${ $0.op = Rem; }$
2850                 | ++ ${ $0.op = Concat; }$
2851
2852 ###### print binode cases
2853         case Plus:
2854         case Minus:
2855         case Times:
2856         case Divide:
2857         case Concat:
2858         case Rem:
2859                 if (bracket) printf("(");
2860                 print_exec(b->left, indent, bracket);
2861                 switch(b->op) {
2862                 case Plus:   fputs(" + ", stdout); break;
2863                 case Minus:  fputs(" - ", stdout); break;
2864                 case Times:  fputs(" * ", stdout); break;
2865                 case Divide: fputs(" / ", stdout); break;
2866                 case Rem:    fputs(" % ", stdout); break;
2867                 case Concat: fputs(" ++ ", stdout); break;
2868                 default: abort();       // NOTEST
2869                 }                       // NOTEST
2870                 print_exec(b->right, indent, bracket);
2871                 if (bracket) printf(")");
2872                 break;
2873         case Absolute:
2874                 if (bracket) printf("(");
2875                 printf("+");
2876                 print_exec(b->right, indent, bracket);
2877                 if (bracket) printf(")");
2878                 break;
2879         case Negate:
2880                 if (bracket) printf("(");
2881                 printf("-");
2882                 print_exec(b->right, indent, bracket);
2883                 if (bracket) printf(")");
2884                 break;
2885         case Bracket:
2886                 printf("(");
2887                 print_exec(b->right, indent, bracket);
2888                 printf(")");
2889                 break;
2890
2891 ###### propagate binode cases
2892         case Plus:
2893         case Minus:
2894         case Times:
2895         case Rem:
2896         case Divide:
2897                 /* both must be numbers, result is Tnum */
2898         case Absolute:
2899         case Negate:
2900                 /* as propagate_types ignores a NULL,
2901                  * unary ops fit here too */
2902                 propagate_types(b->left, c, ok, Tnum, 0);
2903                 propagate_types(b->right, c, ok, Tnum, 0);
2904                 if (!type_compat(type, Tnum, 0))
2905                         type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2906                                    Tnum, rules, type);
2907                 return Tnum;
2908
2909         case Concat:
2910                 /* both must be Tstr, result is Tstr */
2911                 propagate_types(b->left, c, ok, Tstr, 0);
2912                 propagate_types(b->right, c, ok, Tstr, 0);
2913                 if (!type_compat(type, Tstr, 0))
2914                         type_err(c, "error: Concat returns %1 but %2 expected", prog,
2915                                    Tstr, rules, type);
2916                 return Tstr;
2917
2918         case Bracket:
2919                 return propagate_types(b->right, c, ok, type, 0);
2920
2921 ###### interp binode cases
2922
2923         case Plus:
2924                 rv = interp_exec(b->left);
2925                 right = interp_exec(b->right);
2926                 mpq_add(rv.num, rv.num, right.num);
2927                 break;
2928         case Minus:
2929                 rv = interp_exec(b->left);
2930                 right = interp_exec(b->right);
2931                 mpq_sub(rv.num, rv.num, right.num);
2932                 break;
2933         case Times:
2934                 rv = interp_exec(b->left);
2935                 right = interp_exec(b->right);
2936                 mpq_mul(rv.num, rv.num, right.num);
2937                 break;
2938         case Divide:
2939                 rv = interp_exec(b->left);
2940                 right = interp_exec(b->right);
2941                 mpq_div(rv.num, rv.num, right.num);
2942                 break;
2943         case Rem: {
2944                 mpz_t l, r, rem;
2945
2946                 left = interp_exec(b->left);
2947                 right = interp_exec(b->right);
2948                 mpz_init(l); mpz_init(r); mpz_init(rem);
2949                 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
2950                 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
2951                 mpz_tdiv_r(rem, l, r);
2952                 rv = val_init(Tnum);
2953                 mpq_set_z(rv.num, rem);
2954                 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
2955                 break;
2956         }
2957         case Negate:
2958                 rv = interp_exec(b->right);
2959                 mpq_neg(rv.num, rv.num);
2960                 break;
2961         case Absolute:
2962                 rv = interp_exec(b->right);
2963                 mpq_abs(rv.num, rv.num);
2964                 break;
2965         case Bracket:
2966                 rv = interp_exec(b->right);
2967                 break;
2968         case Concat:
2969                 left = interp_exec(b->left);
2970                 right = interp_exec(b->right);
2971                 rv.type = Tstr;
2972                 rv.str = text_join(left.str, right.str);
2973                 break;
2974
2975 ###### value functions
2976
2977         static struct text text_join(struct text a, struct text b)
2978         {
2979                 struct text rv;
2980                 rv.len = a.len + b.len;
2981                 rv.txt = malloc(rv.len);
2982                 memcpy(rv.txt, a.txt, a.len);
2983                 memcpy(rv.txt+a.len, b.txt, b.len);
2984                 return rv;
2985         }
2986
2987 ### Blocks, Statements, and Statement lists.
2988
2989 Now that we have expressions out of the way we need to turn to
2990 statements.  There are simple statements and more complex statements.
2991 Simple statements do not contain (syntactic) newlines, complex statements do.
2992
2993 Statements often come in sequences and we have corresponding simple
2994 statement lists and complex statement lists.
2995 The former comprise only simple statements separated by semicolons.
2996 The later comprise complex statements and simple statement lists.  They are
2997 separated by newlines.  Thus the semicolon is only used to separate
2998 simple statements on the one line.  This may be overly restrictive,
2999 but I'm not sure I ever want a complex statement to share a line with
3000 anything else.
3001
3002 Note that a simple statement list can still use multiple lines if
3003 subsequent lines are indented, so
3004
3005 ###### Example: wrapped simple statement list
3006
3007         a = b; c = d;
3008            e = f; print g
3009
3010 is a single simple statement list.  This might allow room for
3011 confusion, so I'm not set on it yet.
3012
3013 A simple statement list needs no extra syntax.  A complex statement
3014 list has two syntactic forms.  It can be enclosed in braces (much like
3015 C blocks), or it can be introduced by a colon and continue until an
3016 unindented newline (much like Python blocks).  With this extra syntax
3017 it is referred to as a block.
3018
3019 Note that a block does not have to include any newlines if it only
3020 contains simple statements.  So both of:
3021
3022         if condition: a=b; d=f
3023
3024         if condition { a=b; print f }
3025
3026 are valid.
3027
3028 In either case the list is constructed from a `binode` list with
3029 `Block` as the operator.  When parsing the list it is most convenient
3030 to append to the end, so a list is a list and a statement.  When using
3031 the list it is more convenient to consider a list to be a statement
3032 and a list.  So we need a function to re-order a list.
3033 `reorder_bilist` serves this purpose.
3034
3035 The only stand-alone statement we introduce at this stage is `pass`
3036 which does nothing and is represented as a `NULL` pointer in a `Block`
3037 list.  Other stand-alone statements will follow once the infrastructure
3038 is in-place.
3039
3040 ###### Binode types
3041         Block,
3042
3043 ###### Grammar
3044
3045         $void
3046         Newlines -> NEWLINE
3047                 | Newlines NEWLINE
3048
3049         $*binode
3050         Block -> Open Statementlist Close ${ $0 = $<2; }$
3051                 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
3052                 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
3053                 | : Statementlist  ${ $0 = $<2; }$
3054
3055         Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
3056
3057         ComplexStatements -> ComplexStatements ComplexStatement ${
3058                         if ($2 == NULL) {
3059                                 $0 = $<1;
3060                         } else {
3061                                 $0 = new(binode);
3062                                 $0->op = Block;
3063                                 $0->left = $<1;
3064                                 $0->right = $<2;
3065                         }
3066                 }$
3067                 | ComplexStatement ${
3068                         if ($1 == NULL) {
3069                                 $0 = NULL;
3070                         } else {
3071                                 $0 = new(binode);
3072                                 $0->op = Block;
3073                                 $0->left = NULL;
3074                                 $0->right = $<1;
3075                         }
3076                 }$
3077
3078         $*exec
3079         ComplexStatement -> SimpleStatements NEWLINE ${
3080                         $0 = reorder_bilist($<1);
3081                         }$
3082                 | Newlines ${ $0 = NULL; }$
3083                 ## ComplexStatement Grammar
3084
3085         $*binode
3086         SimpleStatements -> SimpleStatements ; SimpleStatement ${
3087                         $0 = new(binode);
3088                         $0->op = Block;
3089                         $0->left = $<1;
3090                         $0->right = $<3;
3091                         }$
3092                 | SimpleStatement ${
3093                         $0 = new(binode);
3094                         $0->op = Block;
3095                         $0->left = NULL;
3096                         $0->right = $<1;
3097                         }$
3098                 | SimpleStatements ; ${ $0 = $<1; }$
3099
3100         SimpleStatement -> pass ${ $0 = NULL; }$
3101                 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3102                 ## SimpleStatement Grammar
3103
3104 ###### print binode cases
3105         case Block:
3106                 if (indent < 0) {
3107                         // simple statement
3108                         if (b->left == NULL)
3109                                 printf("pass");
3110                         else
3111                                 print_exec(b->left, indent, bracket);
3112                         if (b->right) {
3113                                 printf("; ");
3114                                 print_exec(b->right, indent, bracket);
3115                         }
3116                 } else {
3117                         // block, one per line
3118                         if (b->left == NULL)
3119                                 do_indent(indent, "pass\n");
3120                         else
3121                                 print_exec(b->left, indent, bracket);
3122                         if (b->right)
3123                                 print_exec(b->right, indent, bracket);
3124                 }
3125                 break;
3126
3127 ###### propagate binode cases
3128         case Block:
3129         {
3130                 /* If any statement returns something other than Tnone
3131                  * or Tbool then all such must return same type.
3132                  * As each statement may be Tnone or something else,
3133                  * we must always pass NULL (unknown) down, otherwise an incorrect
3134                  * error might occur.  We never return Tnone unless it is
3135                  * passed in.
3136                  */
3137                 struct binode *e;
3138
3139                 for (e = b; e; e = cast(binode, e->right)) {
3140                         t = propagate_types(e->left, c, ok, NULL, rules);
3141                         if ((rules & Rboolok) && t == Tbool)
3142                                 t = NULL;
3143                         if (t && t != Tnone && t != Tbool) {
3144                                 if (!type)
3145                                         type = t;
3146                                 else if (t != type)
3147                                         type_err(c, "error: expected %1%r, found %2",
3148                                                  e->left, type, rules, t);
3149                         }
3150                 }
3151                 return type;
3152         }
3153
3154 ###### interp binode cases
3155         case Block:
3156                 while (rv.type == Tnone &&
3157                        b) {
3158                         if (b->left)
3159                                 rv = interp_exec(b->left);
3160                         b = cast(binode, b->right);
3161                 }
3162                 break;
3163
3164 ### The Print statement
3165
3166 `print` is a simple statement that takes a comma-separated list of
3167 expressions and prints the values separated by spaces and terminated
3168 by a newline.  No control of formatting is possible.
3169
3170 `print` faces the same list-ordering issue as blocks, and uses the
3171 same solution.
3172
3173 ###### Binode types
3174         Print,
3175
3176 ###### SimpleStatement Grammar
3177
3178         | print ExpressionList ${
3179                 $0 = reorder_bilist($<2);
3180         }$
3181         | print ExpressionList , ${
3182                 $0 = new(binode);
3183                 $0->op = Print;
3184                 $0->right = NULL;
3185                 $0->left = $<2;
3186                 $0 = reorder_bilist($0);
3187         }$
3188         | print ${
3189                 $0 = new(binode);
3190                 $0->op = Print;
3191                 $0->right = NULL;
3192         }$
3193
3194 ###### Grammar
3195
3196         $*binode
3197         ExpressionList -> ExpressionList , Expression ${
3198                 $0 = new(binode);
3199                 $0->op = Print;
3200                 $0->left = $<1;
3201                 $0->right = $<3;
3202                 }$
3203                 | Expression ${
3204                         $0 = new(binode);
3205                         $0->op = Print;
3206                         $0->left = NULL;
3207                         $0->right = $<1;
3208                 }$
3209
3210 ###### print binode cases
3211
3212         case Print:
3213                 do_indent(indent, "print");
3214                 while (b) {
3215                         if (b->left) {
3216                                 printf(" ");
3217                                 print_exec(b->left, -1, bracket);
3218                                 if (b->right)
3219                                         printf(",");
3220                         }
3221                         b = cast(binode, b->right);
3222                 }
3223                 if (indent >= 0)
3224                         printf("\n");
3225                 break;
3226
3227 ###### propagate binode cases
3228
3229         case Print:
3230                 /* don't care but all must be consistent */
3231                 propagate_types(b->left, c, ok, NULL, Rnolabel);
3232                 propagate_types(b->right, c, ok, NULL, Rnolabel);
3233                 break;
3234
3235 ###### interp binode cases
3236
3237         case Print:
3238         {
3239                 char sep = 0;
3240                 int eol = 1;
3241                 for ( ; b; b = cast(binode, b->right))
3242                         if (b->left) {
3243                                 if (sep)
3244                                         putchar(sep);
3245                                 left = interp_exec(b->left);
3246                                 print_value(left);
3247                                 free_value(left);
3248                                 if (b->right)
3249                                         sep = ' ';
3250                         } else if (sep)
3251                                 eol = 0;
3252                 left.type = Tnone;
3253                 if (eol)
3254                         printf("\n");
3255                 break;
3256         }
3257
3258 ###### Assignment statement
3259
3260 An assignment will assign a value to a variable, providing it hasn't
3261 be declared as a constant.  The analysis phase ensures that the type
3262 will be correct so the interpreter just needs to perform the
3263 calculation.  There is a form of assignment which declares a new
3264 variable as well as assigning a value.  If a name is assigned before
3265 it is declared, and error will be raised as the name is created as
3266 `Tlabel` and it is illegal to assign to such names.
3267
3268 ###### Binode types
3269         Assign,
3270         Declare,
3271
3272 ###### SimpleStatement Grammar
3273         | Variable = Expression ${
3274                         $0 = new(binode);
3275                         $0->op = Assign;
3276                         $0->left = $<1;
3277                         $0->right = $<3;
3278                 }$
3279         | VariableDecl = Expression ${
3280                         $0 = new(binode);
3281                         $0->op = Declare;
3282                         $0->left = $<1;
3283                         $0->right =$<3;
3284                 }$
3285
3286         | VariableDecl ${
3287                         if ($1->var->where_set == NULL) {
3288                                 type_err(c,
3289                                          "Variable declared with no type or value: %v",
3290                                          $1, NULL, 0, NULL);
3291                         } else {
3292                                 $0 = new(binode);
3293                                 $0->op = Declare;
3294                                 $0->left = $<1;
3295                                 $0->right = NULL;
3296                         }
3297                 }$
3298
3299 ###### print binode cases
3300
3301         case Assign:
3302                 do_indent(indent, "");
3303                 print_exec(b->left, indent, bracket);
3304                 printf(" = ");
3305                 print_exec(b->right, indent, bracket);
3306                 if (indent >= 0)
3307                         printf("\n");
3308                 break;
3309
3310         case Declare:
3311                 {
3312                 struct variable *v = cast(var, b->left)->var;
3313                 do_indent(indent, "");
3314                 print_exec(b->left, indent, bracket);
3315                 if (cast(var, b->left)->var->constant) {
3316                         if (v->where_decl == v->where_set) {
3317                                 printf("::");
3318                                 type_print(v->val.type, stdout);
3319                                 printf(" ");
3320                         } else
3321                                 printf(" ::");
3322                 } else {
3323                         if (v->where_decl == v->where_set) {
3324                                 printf(":");
3325                                 type_print(v->val.type, stdout);
3326                                 printf(" ");
3327                         } else
3328                                 printf(" :");
3329                 }
3330                 if (b->right) {
3331                         printf("= ");
3332                         print_exec(b->right, indent, bracket);
3333                 }
3334                 if (indent >= 0)
3335                         printf("\n");
3336                 }
3337                 break;
3338
3339 ###### propagate binode cases
3340
3341         case Assign:
3342         case Declare:
3343                 /* Both must match and not be labels,
3344                  * Type must support 'dup',
3345                  * For Assign, left must not be constant.
3346                  * result is Tnone
3347                  */
3348                 t = propagate_types(b->left, c, ok, NULL,
3349                                     Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3350                 if (!b->right)
3351                         return Tnone;
3352
3353                 if (t) {
3354                         if (propagate_types(b->right, c, ok, t, 0) != t)
3355                                 if (b->left->type == Xvar)
3356                                         type_err(c, "info: variable '%v' was set as %1 here.",
3357                                                  cast(var, b->left)->var->where_set, t, rules, NULL);
3358                 } else {
3359                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3360                         if (t)
3361                                 propagate_types(b->left, c, ok, t,
3362                                                 (b->op == Assign ? Rnoconstant : 0));
3363                 }
3364                 if (t && t->dup == NULL)
3365                         type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3366                 return Tnone;
3367
3368                 break;
3369
3370 ###### interp binode cases
3371
3372         case Assign:
3373                 lleft = linterp_exec(b->left);
3374                 right = interp_exec(b->right);
3375                 if (lleft) {
3376                         free_value(*lleft);
3377                         *lleft = right;
3378                 } else
3379                         free_value(right);      // NOTEST
3380                 right.type = NULL;
3381                 break;
3382
3383         case Declare:
3384         {
3385                 struct variable *v = cast(var, b->left)->var;
3386                 if (v->merged)
3387                         v = v->merged;
3388                 if (b->right)
3389                         right = interp_exec(b->right);
3390                 else
3391                         right = val_init(v->val.type);
3392                 free_value(v->val);
3393                 v->val = right;
3394                 right.type = NULL;
3395                 break;
3396         }
3397
3398 ### The `use` statement
3399
3400 The `use` statement is the last "simple" statement.  It is needed when
3401 the condition in a conditional statement is a block.  `use` works much
3402 like `return` in C, but only completes the `condition`, not the whole
3403 function.
3404
3405 ###### Binode types
3406         Use,
3407
3408 ###### SimpleStatement Grammar
3409         | use Expression ${
3410                 $0 = new_pos(binode, $1);
3411                 $0->op = Use;
3412                 $0->right = $<2;
3413                 if ($0->right->type == Xvar) {
3414                         struct var *v = cast(var, $0->right);
3415                         if (v->var->val.type == Tnone) {
3416                                 /* Convert this to a label */
3417                                 v->var->val = val_prepare(Tlabel);
3418                                 v->var->val.label = &v->var->val;
3419                         }
3420                 }
3421         }$
3422
3423 ###### print binode cases
3424
3425         case Use:
3426                 do_indent(indent, "use ");
3427                 print_exec(b->right, -1, bracket);
3428                 if (indent >= 0)
3429                         printf("\n");
3430                 break;
3431
3432 ###### propagate binode cases
3433
3434         case Use:
3435                 /* result matches value */
3436                 return propagate_types(b->right, c, ok, type, 0);
3437
3438 ###### interp binode cases
3439
3440         case Use:
3441                 rv = interp_exec(b->right);
3442                 break;
3443
3444 ### The Conditional Statement
3445
3446 This is the biggy and currently the only complex statement.  This
3447 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3448 It is comprised of a number of parts, all of which are optional though
3449 set combinations apply.  Each part is (usually) a key word (`then` is
3450 sometimes optional) followed by either an expression or a code block,
3451 except the `casepart` which is a "key word and an expression" followed
3452 by a code block.  The code-block option is valid for all parts and,
3453 where an expression is also allowed, the code block can use the `use`
3454 statement to report a value.  If the code block does not report a value
3455 the effect is similar to reporting `True`.
3456
3457 The `else` and `case` parts, as well as `then` when combined with
3458 `if`, can contain a `use` statement which will apply to some
3459 containing conditional statement. `for` parts, `do` parts and `then`
3460 parts used with `for` can never contain a `use`, except in some
3461 subordinate conditional statement.
3462
3463 If there is a `forpart`, it is executed first, only once.
3464 If there is a `dopart`, then it is executed repeatedly providing
3465 always that the `condpart` or `cond`, if present, does not return a non-True
3466 value.  `condpart` can fail to return any value if it simply executes
3467 to completion.  This is treated the same as returning `True`.
3468
3469 If there is a `thenpart` it will be executed whenever the `condpart`
3470 or `cond` returns True (or does not return any value), but this will happen
3471 *after* `dopart` (when present).
3472
3473 If `elsepart` is present it will be executed at most once when the
3474 condition returns `False` or some value that isn't `True` and isn't
3475 matched by any `casepart`.  If there are any `casepart`s, they will be
3476 executed when the condition returns a matching value.
3477
3478 The particular sorts of values allowed in case parts has not yet been
3479 determined in the language design, so nothing is prohibited.
3480
3481 The various blocks in this complex statement potentially provide scope
3482 for variables as described earlier.  Each such block must include the
3483 "OpenScope" nonterminal before parsing the block, and must call
3484 `var_block_close()` when closing the block.
3485
3486 The code following "`if`", "`switch`" and "`for`" does not get its own
3487 scope, but is in a scope covering the whole statement, so names
3488 declared there cannot be redeclared elsewhere.  Similarly the
3489 condition following "`while`" is in a scope the covers the body
3490 ("`do`" part) of the loop, and which does not allow conditional scope
3491 extension.  Code following "`then`" (both looping and non-looping),
3492 "`else`" and "`case`" each get their own local scope.
3493
3494 The type requirements on the code block in a `whilepart` are quite
3495 unusal.  It is allowed to return a value of some identifiable type, in
3496 which case the loop aborts and an appropriate `casepart` is run, or it
3497 can return a Boolean, in which case the loop either continues to the
3498 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3499 This is different both from the `ifpart` code block which is expected to
3500 return a Boolean, or the `switchpart` code block which is expected to
3501 return the same type as the casepart values.  The correct analysis of
3502 the type of the `whilepart` code block is the reason for the
3503 `Rboolok` flag which is passed to `propagate_types()`.
3504
3505 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3506 defined.
3507
3508 ###### exec type
3509         Xcond_statement,
3510
3511 ###### ast
3512         struct casepart {
3513                 struct exec *value;
3514                 struct exec *action;
3515                 struct casepart *next;
3516         };
3517         struct cond_statement {
3518                 struct exec;
3519                 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3520                 struct casepart *casepart;
3521         };
3522
3523 ###### ast functions
3524
3525         static void free_casepart(struct casepart *cp)
3526         {
3527                 while (cp) {
3528                         struct casepart *t;
3529                         free_exec(cp->value);
3530                         free_exec(cp->action);
3531                         t = cp->next;
3532                         free(cp);
3533                         cp = t;
3534                 }
3535         }
3536
3537         static void free_cond_statement(struct cond_statement *s)
3538         {
3539                 if (!s)
3540                         return;
3541                 free_exec(s->forpart);
3542                 free_exec(s->condpart);
3543                 free_exec(s->dopart);
3544                 free_exec(s->thenpart);
3545                 free_exec(s->elsepart);
3546                 free_casepart(s->casepart);
3547                 free(s);
3548         }
3549
3550 ###### free exec cases
3551         case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3552
3553 ###### ComplexStatement Grammar
3554         | CondStatement ${ $0 = $<1; }$
3555
3556 ###### Grammar
3557
3558         $*cond_statement
3559         // both ForThen and Whilepart open scopes, and CondSuffix only
3560         // closes one - so in the first branch here we have another to close.
3561         CondStatement -> forPart ThenPart WhilePart CondSuffix ${
3562                         $0 = $<4;
3563                         $0->forpart = $<1;
3564                         $0->thenpart = $<2;
3565                         $0->condpart = $3.condpart; $3.condpart = NULL;
3566                         $0->dopart = $3.dopart; $3.dopart = NULL;
3567                         var_block_close(c, CloseSequential);
3568                         }$
3569                 |  forPart WhilePart CondSuffix ${
3570                         $0 = $<3;
3571                         $0->forpart = $<1;
3572                         $0->thenpart = NULL;
3573                         $0->condpart = $2.condpart; $2.condpart = NULL;
3574                         $0->dopart = $2.dopart; $2.dopart = NULL;
3575                         var_block_close(c, CloseSequential);
3576                         }$
3577                 | whilePart CondSuffix ${
3578                         $0 = $<2;
3579                         $0->condpart = $1.condpart; $1.condpart = NULL;
3580                         $0->dopart = $1.dopart; $1.dopart = NULL;
3581                         }$
3582                 | switchPart CondSuffix ${
3583                         $0 = $<2;
3584                         $0->condpart = $<1;
3585                         }$
3586                 | ifPart IfSuffix ${
3587                         $0 = $<2;
3588                         $0->condpart = $1.condpart; $1.condpart = NULL;
3589                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
3590                         // This is where we close an "if" statement
3591                         var_block_close(c, CloseSequential);
3592                         }$
3593
3594         CondSuffix -> IfSuffix ${
3595                         $0 = $<1;
3596                         // This is where we close scope of the whole
3597                         // "for" or "while" statement
3598                         var_block_close(c, CloseSequential);
3599                 }$
3600                 | CasePart CondSuffix ${
3601                         $0 = $<2;
3602                         $1->next = $0->casepart;
3603                         $0->casepart = $<1;
3604                 }$
3605
3606         $void
3607         Case -> case
3608                 | NEWLINE Case
3609         $*casepart
3610         CasePart -> Case Expression OpenScope Block ${
3611                         $0 = calloc(1,sizeof(struct casepart));
3612                         $0->value = $<2;
3613                         $0->action = $<4;
3614                         var_block_close(c, CloseParallel);
3615                 }$
3616
3617         $*cond_statement
3618         IfSuffix ->  ${ $0 = new(cond_statement); }$
3619                 | NEWLINE IfSuffix ${ $0 = $<2; }$
3620                 | else OpenScope Block ${
3621                         $0 = new(cond_statement);
3622                         $0->elsepart = $<3;
3623                         var_block_close(c, CloseElse);
3624                 }$
3625                 | else OpenScope CondStatement ${
3626                         $0 = new(cond_statement);
3627                         $0->elsepart = $<3;
3628                         var_block_close(c, CloseElse);
3629                 }$
3630
3631         $void
3632         Then -> then
3633                 | NEWLINE Then
3634         While -> while
3635                 | NEWLINE While
3636         Do -> do
3637                 | NEWLINE Do
3638         $*exec
3639         // These scopes are closed in CondSuffix
3640         forPart -> for OpenScope SimpleStatements ${
3641                         $0 = reorder_bilist($<3);
3642                 }$
3643                 |  for OpenScope Block ${
3644                         $0 = $<3;
3645                 }$
3646
3647         ThenPart -> Then OpenScope SimpleStatements ${
3648                         $0 = reorder_bilist($<3);
3649                         var_block_close(c, CloseSequential);
3650                 }$
3651                 |  Then OpenScope Block ${
3652                         $0 = $<3;
3653                         var_block_close(c, CloseSequential);
3654                 }$
3655
3656         // This scope is closed in CondSuffix
3657         WhileHead -> While OpenScope Block ${
3658                 $0 = $<3;
3659                 }$
3660         whileHead -> while OpenScope Block ${
3661                 $0 = $<3;
3662                 }$
3663
3664         $cond_statement
3665         // This scope is closed in CondSuffix
3666         whilePart -> while OpenScope Expression Block ${
3667                         $0.type = Xcond_statement;
3668                         $0.condpart = $<3;
3669                         $0.dopart = $<4;
3670                 }$
3671                 | whileHead Do Block ${
3672                         $0.type = Xcond_statement;
3673                         $0.condpart = $<1;
3674                         $0.dopart = $<3;
3675                 }$
3676         WhilePart -> While OpenScope Expression Block ${
3677                         $0.type = Xcond_statement;
3678                         $0.condpart = $<3;
3679                         $0.dopart = $<4;
3680                 }$
3681                 | WhileHead Do Block ${
3682                         $0.type = Xcond_statement;
3683                         $0.condpart = $<1;
3684                         $0.dopart = $<3;
3685                 }$
3686
3687         ifPart -> if OpenScope Expression OpenScope Block ${
3688                         $0.type = Xcond_statement;
3689                         $0.condpart = $<3;
3690                         $0.thenpart = $<5;
3691                         var_block_close(c, CloseParallel);
3692                 }$
3693                 | if OpenScope Block Then OpenScope Block ${
3694                         $0.type = Xcond_statement;
3695                         $0.condpart = $<3;
3696                         $0.thenpart = $<6;
3697                         var_block_close(c, CloseParallel);
3698                 }$
3699
3700         $*exec
3701         // This scope is closed in CondSuffix
3702         switchPart -> switch OpenScope Expression ${
3703                         $0 = $<3;
3704                 }$
3705                 | switch OpenScope Block ${
3706                         $0 = $<3;
3707                 }$
3708
3709 ###### print exec cases
3710
3711         case Xcond_statement:
3712         {
3713                 struct cond_statement *cs = cast(cond_statement, e);
3714                 struct casepart *cp;
3715                 if (cs->forpart) {
3716                         do_indent(indent, "for");
3717                         if (bracket) printf(" {\n"); else printf(":\n");
3718                         print_exec(cs->forpart, indent+1, bracket);
3719                         if (cs->thenpart) {
3720                                 if (bracket)
3721                                         do_indent(indent, "} then {\n");
3722                                 else
3723                                         do_indent(indent, "then:\n");
3724                                 print_exec(cs->thenpart, indent+1, bracket);
3725                         }
3726                         if (bracket) do_indent(indent, "}\n");
3727                 }
3728                 if (cs->dopart) {
3729                         // a loop
3730                         if (cs->condpart && cs->condpart->type == Xbinode &&
3731                             cast(binode, cs->condpart)->op == Block) {
3732                                 if (bracket)
3733                                         do_indent(indent, "while {\n");
3734                                 else
3735                                         do_indent(indent, "while:\n");
3736                                 print_exec(cs->condpart, indent+1, bracket);
3737                                 if (bracket)
3738                                         do_indent(indent, "} do {\n");
3739                                 else
3740                                         do_indent(indent, "do:\n");
3741                                 print_exec(cs->dopart, indent+1, bracket);
3742                                 if (bracket)
3743                                         do_indent(indent, "}\n");
3744                         } else {
3745                                 do_indent(indent, "while ");
3746                                 print_exec(cs->condpart, 0, bracket);
3747                                 if (bracket)
3748                                         printf(" {\n");
3749                                 else
3750                                         printf(":\n");
3751                                 print_exec(cs->dopart, indent+1, bracket);
3752                                 if (bracket)
3753                                         do_indent(indent, "}\n");
3754                         }
3755                 } else {
3756                         // a condition
3757                         if (cs->casepart)
3758                                 do_indent(indent, "switch");
3759                         else
3760                                 do_indent(indent, "if");
3761                         if (cs->condpart && cs->condpart->type == Xbinode &&
3762                             cast(binode, cs->condpart)->op == Block) {
3763                                 if (bracket)
3764                                         printf(" {\n");
3765                                 else
3766                                         printf(":\n");
3767                                 print_exec(cs->condpart, indent+1, bracket);
3768                                 if (bracket)
3769                                         do_indent(indent, "}\n");
3770                                 if (cs->thenpart) {
3771                                         do_indent(indent, "then:\n");
3772                                         print_exec(cs->thenpart, indent+1, bracket);
3773                                 }
3774                         } else {
3775                                 printf(" ");
3776                                 print_exec(cs->condpart, 0, bracket);
3777                                 if (cs->thenpart) {
3778                                         if (bracket)
3779                                                 printf(" {\n");
3780                                         else
3781                                                 printf(":\n");
3782                                         print_exec(cs->thenpart, indent+1, bracket);
3783                                         if (bracket)
3784                                                 do_indent(indent, "}\n");
3785                                 } else
3786                                         printf("\n");
3787                         }
3788                 }
3789                 for (cp = cs->casepart; cp; cp = cp->next) {
3790                         do_indent(indent, "case ");
3791                         print_exec(cp->value, -1, 0);
3792                         if (bracket)
3793                                 printf(" {\n");
3794                         else
3795                                 printf(":\n");
3796                         print_exec(cp->action, indent+1, bracket);
3797                         if (bracket)
3798                                 do_indent(indent, "}\n");
3799                 }
3800                 if (cs->elsepart) {
3801                         do_indent(indent, "else");
3802                         if (bracket)
3803                                 printf(" {\n");
3804                         else
3805                                 printf(":\n");
3806                         print_exec(cs->elsepart, indent+1, bracket);
3807                         if (bracket)
3808                                 do_indent(indent, "}\n");
3809                 }
3810                 break;
3811         }
3812
3813 ###### propagate exec cases
3814         case Xcond_statement:
3815         {
3816                 // forpart and dopart must return Tnone
3817                 // thenpart must return Tnone if there is a dopart,
3818                 // otherwise it is like elsepart.
3819                 // condpart must:
3820                 //    be bool if there is no casepart
3821                 //    match casepart->values if there is a switchpart
3822                 //    either be bool or match casepart->value if there
3823                 //             is a whilepart
3824                 // elsepart and casepart->action must match the return type
3825                 //   expected of this statement.
3826                 struct cond_statement *cs = cast(cond_statement, prog);
3827                 struct casepart *cp;
3828
3829                 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3830                 if (!type_compat(Tnone, t, 0))
3831                         *ok = 0;
3832                 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3833                 if (!type_compat(Tnone, t, 0))
3834                         *ok = 0;
3835                 if (cs->dopart) {
3836                         t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3837                         if (!type_compat(Tnone, t, 0))
3838                                 *ok = 0;
3839                 }
3840                 if (cs->casepart == NULL)
3841                         propagate_types(cs->condpart, c, ok, Tbool, 0);
3842                 else {
3843                         /* Condpart must match case values, with bool permitted */
3844                         t = NULL;
3845                         for (cp = cs->casepart;
3846                              cp && !t; cp = cp->next)
3847                                 t = propagate_types(cp->value, c, ok, NULL, 0);
3848                         if (!t && cs->condpart)
3849                                 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3850                         // Now we have a type (I hope) push it down
3851                         if (t) {
3852                                 for (cp = cs->casepart; cp; cp = cp->next)
3853                                         propagate_types(cp->value, c, ok, t, 0);
3854                                 propagate_types(cs->condpart, c, ok, t, Rboolok);
3855                         }
3856                 }
3857                 // (if)then, else, and case parts must return expected type.
3858                 if (!cs->dopart && !type)
3859                         type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3860                 if (!type)
3861                         type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3862                 for (cp = cs->casepart;
3863                      cp && !type;
3864                      cp = cp->next)
3865                         type = propagate_types(cp->action, c, ok, NULL, rules);
3866                 if (type) {
3867                         if (!cs->dopart)
3868                                 propagate_types(cs->thenpart, c, ok, type, rules);
3869                         propagate_types(cs->elsepart, c, ok, type, rules);
3870                         for (cp = cs->casepart; cp ; cp = cp->next)
3871                                 propagate_types(cp->action, c, ok, type, rules);
3872                         return type;
3873                 } else
3874                         return NULL;
3875         }
3876
3877 ###### interp exec cases
3878         case Xcond_statement:
3879         {
3880                 struct value v, cnd;
3881                 struct casepart *cp;
3882                 struct cond_statement *c = cast(cond_statement, e);
3883
3884                 if (c->forpart)
3885                         interp_exec(c->forpart);
3886                 do {
3887                         if (c->condpart)
3888                                 cnd = interp_exec(c->condpart);
3889                         else
3890                                 cnd.type = Tnone;
3891                         if (!(cnd.type == Tnone ||
3892                               (cnd.type == Tbool && cnd.bool != 0)))
3893                                 break;
3894                         // cnd is Tnone or Tbool, doesn't need to be freed
3895                         if (c->dopart)
3896                                 interp_exec(c->dopart);
3897
3898                         if (c->thenpart) {
3899                                 rv = interp_exec(c->thenpart);
3900                                 if (rv.type != Tnone || !c->dopart)
3901                                         goto Xcond_done;
3902                                 free_value(rv);
3903                         }
3904                 } while (c->dopart);
3905
3906                 for (cp = c->casepart; cp; cp = cp->next) {
3907                         v = interp_exec(cp->value);
3908                         if (value_cmp(v, cnd) == 0) {
3909                                 free_value(v);
3910                                 free_value(cnd);
3911                                 rv = interp_exec(cp->action);
3912                                 goto Xcond_done;
3913                         }
3914                         free_value(v);
3915                 }
3916                 free_value(cnd);
3917                 if (c->elsepart)
3918                         rv = interp_exec(c->elsepart);
3919                 else
3920                         rv.type = Tnone;
3921         Xcond_done:
3922                 break;
3923         }
3924
3925 ### Top level structure
3926
3927 All the language elements so far can be used in various places.  Now
3928 it is time to clarify what those places are.
3929
3930 At the top level of a file there will be a number of declarations.
3931 Many of the things that can be declared haven't been described yet,
3932 such as functions, procedures, imports, and probably more.
3933 For now there are two sorts of things that can appear at the top
3934 level.  They are predefined constants, `struct` types, and the main
3935 program.  While the syntax will allow the main program to appear
3936 multiple times, that will trigger an error if it is actually attempted.
3937
3938 The various declarations do not return anything.  They store the
3939 various declarations in the parse context.
3940
3941 ###### Parser: grammar
3942
3943         $void
3944         Ocean -> DeclarationList
3945
3946         DeclarationList -> Declaration
3947                 | DeclarationList Declaration
3948
3949         Declaration -> DeclareConstant
3950                 | DeclareProgram
3951                 | DeclareStruct
3952                 | NEWLINE
3953                 | ERROR NEWLINE ${
3954                         tok_err(c,
3955                                 "error: unhandled parse error", &$1);
3956                 }$
3957
3958         ## top level grammar
3959
3960 ### The `const` section
3961
3962 As well as being defined in with the code that uses them, constants
3963 can be declared at the top level.  These have full-file scope, so they
3964 are always `InScope`.  The value of a top level constant can be given
3965 as an expression, and this is evaluated immediately rather than in the
3966 later interpretation stage.  Once we add functions to the language, we
3967 will need rules concern which, if any, can be used to define a top
3968 level constant.
3969
3970 Constants are defined in a section that starts with the reserved word
3971 `const` and then has a block with a list of assignment statements.
3972 For syntactic consistency, these must use the double-colon syntax to
3973 make it clear that they are constants.  Type can also be given: if
3974 not, the type will be determined during analysis, as with other
3975 constants.
3976
3977 As the types constants are inserted at the head of a list, printing
3978 them in the same order that they were read is not straight forward.
3979 We take a quadratic approach here and count the number of constants
3980 (variables of depth 0), then count down from there, each time
3981 searching through for the Nth constant for decreasing N.
3982
3983 ###### top level grammar
3984
3985         DeclareConstant -> const Open ConstList Close
3986                 | const Open SimpleConstList }
3987                 | const : ConstList
3988                 | const SimpleConstList NEWLINE
3989
3990         ConstList -> ComplexConsts
3991                 | NEWLINE ConstList
3992         ComplexConsts -> ComplexConst ComplexConsts
3993                 | ComplexConst
3994         ComplexConst -> SimpleConstList NEWLINE
3995         SimpleConstList -> SimpleConstList ; Const
3996                 | Const
3997                 | SimpleConstList ;
3998
3999         $*type
4000         CType -> Type   ${ $0 = $<1; }$
4001                 |       ${ $0 = NULL; }$
4002         $void
4003         Const -> IDENTIFIER :: CType = Expression ${ {
4004                 int ok;
4005                 struct variable *v;
4006
4007                 v = var_decl(c, $1.txt);
4008                 if (v) {
4009                         struct var *var = new_pos(var, $1);
4010                         v->where_decl = var;
4011                         v->where_set = var;
4012                         var->var = v;
4013                         v->constant = 1;
4014                 } else {
4015                         v = var_ref(c, $1.txt);
4016                         tok_err(c, "error: name already declared", &$1);
4017                         type_err(c, "info: this is where '%v' was first declared",
4018                                  v->where_decl, NULL, 0, NULL);
4019                 }
4020                 do {
4021                         ok = 1;
4022                         propagate_types($5, c, &ok, $3, 0);
4023                 } while (ok == 2);
4024                 if (!ok)
4025                         c->parse_error = 1;
4026                 else if (v) {
4027                         v->val = interp_exec($5);
4028                 }
4029         } }$
4030         | ERROR NEWLINE ${ tok_err(c, "Syntax error in constant", &$1); }$
4031
4032 ###### print const decls
4033         {
4034                 struct variable *v;
4035                 int target = -1;
4036
4037                 while (target != 0) {
4038                         int i = 0;
4039                         for (v = context.in_scope; v; v=v->in_scope)
4040                                 if (v->depth == 0) {
4041                                         i += 1;
4042                                         if (i == target)
4043                                                 break;
4044                                 }
4045
4046                         if (target == -1) {
4047                                 if (i)
4048                                         printf("const:\n");
4049                                 target = i;
4050                         } else {
4051                                 printf("    %.*s :: ", v->name->name.len, v->name->name.txt);
4052                                 type_print(v->val.type, stdout);
4053                                 printf(" = ");
4054                                 if (v->val.type == Tstr)
4055                                         printf("\"");
4056                                 print_value(v->val);
4057                                 if (v->val.type == Tstr)
4058                                         printf("\"");
4059                                 printf("\n");
4060                                 target -= 1;
4061                         }
4062                 }
4063         }
4064
4065 ### Finally the whole program.
4066
4067 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4068 the keyword "program" and a list of variable names which are assigned
4069 values from command line arguments.  Following this is a `block` which
4070 is the code to execute.  Unlike Pascal, constants and other
4071 declarations come *before* the program.
4072
4073 As this is the top level, several things are handled a bit
4074 differently.
4075 The whole program is not interpreted by `interp_exec` as that isn't
4076 passed the argument list which the program requires.  Similarly type
4077 analysis is a bit more interesting at this level.
4078
4079 ###### Binode types
4080         Program,
4081
4082 ###### top level grammar
4083
4084         DeclareProgram -> Program ${ {
4085                 if (c->prog)
4086                         type_err(c, "Program defined a second time",
4087                                  $1, NULL, 0, NULL);
4088                 else
4089                         c->prog = $<1;
4090         } }$
4091
4092         $*binode
4093         Program -> program OpenScope Varlist Block ${
4094                 $0 = new(binode);
4095                 $0->op = Program;
4096                 $0->left = reorder_bilist($<3);
4097                 $0->right = $<4;
4098                 var_block_close(c, CloseSequential);
4099                 if (c->scope_stack && !c->parse_error) abort();
4100                 }$
4101                 | ERROR ${
4102                         tok_err(c,
4103                                 "error: unhandled parse error", &$1);
4104                 }$
4105
4106         Varlist -> Varlist ArgDecl ${
4107                         $0 = new(binode);
4108                         $0->op = Program;
4109                         $0->left = $<1;
4110                         $0->right = $<2;
4111                 }$
4112                 | ${ $0 = NULL; }$
4113
4114         $*var
4115         ArgDecl -> IDENTIFIER ${ {
4116                 struct variable *v = var_decl(c, $1.txt);
4117                 $0 = new(var);
4118                 $0->var = v;
4119         } }$
4120
4121         ## Grammar
4122
4123 ###### print binode cases
4124         case Program:
4125                 do_indent(indent, "program");
4126                 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4127                         printf(" ");
4128                         print_exec(b2->left, 0, 0);
4129                 }
4130                 if (bracket)
4131                         printf(" {\n");
4132                 else
4133                         printf(":\n");
4134                 print_exec(b->right, indent+1, bracket);
4135                 if (bracket)
4136                         do_indent(indent, "}\n");
4137                 break;
4138
4139 ###### propagate binode cases
4140         case Program: abort();          // NOTEST
4141
4142 ###### core functions
4143
4144         static int analyse_prog(struct exec *prog, struct parse_context *c)
4145         {
4146                 struct binode *b = cast(binode, prog);
4147                 int ok = 1;
4148
4149                 if (!b)
4150                         return 0;       // NOTEST
4151                 do {
4152                         ok = 1;
4153                         propagate_types(b->right, c, &ok, Tnone, 0);
4154                 } while (ok == 2);
4155                 if (!ok)
4156                         return 0;
4157
4158                 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
4159                         struct var *v = cast(var, b->left);
4160                         if (!v->var->val.type) {
4161                                 v->var->where_set = b;
4162                                 v->var->val = val_prepare(Tstr);
4163                         }
4164                 }
4165                 b = cast(binode, prog);
4166                 do {
4167                         ok = 1;
4168                         propagate_types(b->right, c, &ok, Tnone, 0);
4169                 } while (ok == 2);
4170                 if (!ok)
4171                         return 0;
4172
4173                 /* Make sure everything is still consistent */
4174                 propagate_types(b->right, c, &ok, Tnone, 0);
4175                 return !!ok;
4176         }
4177
4178         static void interp_prog(struct exec *prog, char **argv)
4179         {
4180                 struct binode *p = cast(binode, prog);
4181                 struct binode *al;
4182                 struct value v;
4183
4184                 if (!prog)
4185                         return;         // NOTEST
4186                 al = cast(binode, p->left);
4187                 while (al) {
4188                         struct var *v = cast(var, al->left);
4189                         struct value *vl = &v->var->val;
4190
4191                         if (argv[0] == NULL) {
4192                                 printf("Not enough args\n");
4193                                 exit(1);
4194                         }
4195                         al = cast(binode, al->right);
4196                         free_value(*vl);
4197                         *vl = parse_value(vl->type, argv[0]);
4198                         if (vl->type == NULL)
4199                                 exit(1);
4200                         argv++;
4201                 }
4202                 v = interp_exec(p->right);
4203                 free_value(v);
4204         }
4205
4206 ###### interp binode cases
4207         case Program: abort();  // NOTEST
4208
4209 ## And now to test it out.
4210
4211 Having a language requires having a "hello world" program.  I'll
4212 provide a little more than that: a program that prints "Hello world"
4213 finds the GCD of two numbers, prints the first few elements of
4214 Fibonacci, performs a binary search for a number, and a few other
4215 things which will likely grow as the languages grows.
4216
4217 ###### File: oceani.mk
4218         demos :: sayhello
4219         sayhello : oceani
4220                 @echo "===== DEMO ====="
4221                 ./oceani --section "demo: hello" oceani.mdc 55 33
4222
4223 ###### demo: hello
4224
4225         const:
4226                 pi ::= 3.141_592_6
4227                 four ::= 2 + 2 ; five ::= 10/2
4228         const pie ::= "I like Pie";
4229                 cake ::= "The cake is"
4230                   ++ " a lie"
4231
4232         struct fred:
4233                 size:[four]number
4234                 name:string
4235                 alive:Boolean
4236
4237         program A B:
4238                 print "Hello World, what lovely oceans you have!"
4239                 print "Are there", five, "?"
4240                 print pi, pie, "but", cake
4241
4242                 /* When a variable is defined in both branches of an 'if',
4243                  * and used afterwards, the variables are merged.
4244                  */
4245                 if A > B:
4246                         bigger := "yes"
4247                 else:
4248                         bigger := "no"
4249                 print "Is", A, "bigger than", B,"? ", bigger
4250                 /* If a variable is not used after the 'if', no
4251                  * merge happens, so types can be different
4252                  */
4253                 if A > B * 2:
4254                         double:string = "yes"
4255                         print A, "is more than twice", B, "?", double
4256                 else:
4257                         double := B*2
4258                         print "double", B, "is", double
4259
4260                 a : number
4261                 a = A;
4262                 b:number = B
4263                 if a > 0 and then b > 0:
4264                         while a != b:
4265                                 if a < b:
4266                                         b = b - a
4267                                 else:
4268                                         a = a - b
4269                         print "GCD of", A, "and", B,"is", a
4270                 else if a <= 0:
4271                         print a, "is not positive, cannot calculate GCD"
4272                 else:
4273                         print b, "is not positive, cannot calculate GCD"
4274
4275                 for:
4276                         togo := 10
4277                         f1 := 1; f2 := 1
4278                         print "Fibonacci:", f1,f2,
4279                 then togo = togo - 1
4280                 while togo > 0:
4281                         f3 := f1 + f2
4282                         print "", f3,
4283                         f1 = f2
4284                         f2 = f3
4285                 print ""
4286
4287                 /* Binary search... */
4288                 for:
4289                         lo:= 0; hi := 100
4290                         target := 77
4291                 while:
4292                         mid := (lo + hi) / 2
4293                         if mid == target:
4294                                 use Found
4295                         if mid < target:
4296                                 lo = mid
4297                         else:
4298                                 hi = mid
4299                         if hi - lo < 1:
4300                                 use GiveUp
4301                         use True
4302                 do: pass
4303                 case Found:
4304                         print "Yay, I found", target
4305                 case GiveUp:
4306                         print "Closest I found was", mid
4307
4308                 size::= 10
4309                 list:[size]number
4310                 list[0] = 1234
4311                 // "middle square" PRNG.  Not particularly good, but one my
4312                 // Dad taught me - the first one I ever heard of.
4313                 for i:=1; then i = i + 1; while i < size:
4314                         n := list[i-1] * list[i-1]
4315                         list[i] = (n / 100) % 10 000
4316
4317                 print "Before sort:",
4318                 for i:=0; then i = i + 1; while i < size:
4319                         print "", list[i],
4320                 print
4321
4322                 for i := 1; then i=i+1; while i < size:
4323                         for j:=i-1; then j=j-1; while j >= 0:
4324                                 if list[j] > list[j+1]:
4325                                         t:= list[j]
4326                                         list[j] = list[j+1]
4327                                         list[j+1] = t
4328                 print " After sort:",
4329                 for i:=0; then i = i + 1; while i < size:
4330                         print "", list[i],
4331                 print
4332
4333                 bob:fred
4334                 bob.name = "Hello"
4335                 bob.alive = (bob.name == "Hello")
4336                 print "bob", "is" if  bob.alive else "isn't", "alive"