ocean-lang.org Git - ocean/blob - csrc/oceani.mdc

   1 # Ocean Interpreter - Falls Creek version
   2
   3 Ocean is intended to be an compiled language, so this interpreter is
   4 not targeted at being the final product.  It is very much an intermediate
   5 stage, and fills that role in two distinct ways.
   6
   7 Firstly, it exists as a platform to experiment with the early language
   8 design.  An interpreter is easy to write and easy to get working, so
   9 the barrier for entry is lower if I aim to start with an interpreter.
  10
  11 Secondly, the plan for the Ocean compiler is to write it in the
  12 [Ocean language](http://ocean-lang.org).  To achieve this we naturally
  13 need some sort of boot-strap process and this interpreter - written in
  14 portable C - will fill that role.  It will be used to bootstrap the
  15 Ocean compiler.
  16
  17 Two features that are not needed to fill either of these roles are
  18 performance and completeness.  The interpreter only needs to be fast
  19 enough to run small test programs and occasionally to run the compiler
  20 on itself.  It only needs to be complete enough to test aspects of the
  21 design which are developed before the compiler is working, and to run
  22 the compiler on itself.  Any features not used by the compiler when
  23 compiling itself are superfluous.  They may be included anyway, but
  24 they may not.
  25
  26 Nonetheless, the interpreter should end up being reasonably complete,
  27 and any performance bottlenecks which appear and are easily fixed, will
  28 be.
  29
  30 ## Current version
  31
  32 This initial version of the interpreter exists to test out the
  33 structured statement providing conditions and iteration.  Clearly we
  34 need some minimal other functionality so that values can be tested and
  35 instructions iterated over.  All that functionality is clearly not
  36 normative at this stage (not that anything is **really** normative
  37 yet) and will change, so early test code will certainly break.
  38
  39 Beyond the structured statement and the `use` statement which is
  40 intimately related to it we have:
  41
  42  - "blocks" of multiple statements.
  43  - `pass`: a statement which does nothing.
  44  - variables: any identifier is assumed to store a number, string,
  45    or Boolean.
  46  - expressions: `+`, `-`, `*`, `/` can apply to integers and `++` can
  47    catenate strings.  `and`, `or`, `not` manipulate Booleans, and
  48    normal comparison operators can work on all three types.
  49  - assignments: can assign the value of an expression to a variable.
  50  - `print`: will print the values in a list of expressions.
  51  - `program`: is given a list of identifiers to initialize from
  52    arguments.
  53
  54 ## Naming
  55
  56 Versions of the interpreter which obviously do not support a complete
  57 language will be named after creeks and streams.  This one is Falls
  58 Creek.
  59
  60 Once we have something reasonably resembling a complete language, the
  61 names of rivers will be used.
  62 Early versions of the compiler will be named after seas.  Major
  63 releases of the compiler will be named after oceans.  Hopefully I will
  64 be finished once I get to the Pacific Ocean release.
  65
  66 ## Outline
  67
  68 As well as parsing and executing a program, the interpreter can print
  69 out the program from the parsed internal structure.  This is useful
  70 for validating the parsing.
  71 So the main requirements of the interpreter are:
  72
  73 - Parse the program
  74 - Analyse the parsed program to ensure consistency
  75 - print the program
  76 - execute the program
  77
  78 This is all performed by a single C program extracted with
  79 `parsergen`.
  80
  81 There will be two formats for printing the program a default and one
  82 that uses bracketing.  So an extra command line option is needed for
  83 that.
  84
  85 ###### File: oceani.mk
  86
  87         myCFLAGS := -Wall -g -fplan9-extensions
  88         CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
  89         myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
  90         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
  91         ## libs
  92         all :: oceani
  93         oceani.c oceani.h : oceani.mdc parsergen
  94                 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
  95         oceani.mk: oceani.mdc md2c
  96                 ./md2c oceani.mdc
  97
  98         oceani: oceani.o
  99
 100 ###### Parser: header
 101         ## macros
 102         ## ast
 103         struct parse_context {
 104                 struct token_config config;
 105                 ## parse context
 106         };
 107
 108 ###### Parser: code
 109
 110         #include <unistd.h>
 111         #include <stdlib.h>
 112         #include <fcntl.h>
 113         #include <errno.h>
 114         #include <sys/mman.h>
 115         #include <string.h>
 116         #include <stdio.h>
 117         #include <locale.h>
 118         #include <malloc.h>
 119         #include "mdcode.h"
 120         #include "scanner.h"
 121         #include "parser.h"
 122
 123         ## includes
 124
 125         #include "oceani.h"
 126
 127         ## forward decls
 128         ## value functions
 129         ## ast functions
 130         ## core functions
 131
 132         #include <getopt.h>
 133         static char Usage[] = "Usage: oceani --trace --print --noexec prog.ocn\n";
 134         static const struct option long_options[] = {
 135                 {"trace",     0, NULL, 't'},
 136                 {"print",     0, NULL, 'p'},
 137                 {"noexec",    0, NULL, 'n'},
 138                 {"brackets",  0, NULL, 'b'},
 139                 {NULL,        0, NULL, 0},
 140         };
 141         const char *options = "tpnb";
 142         int main(int argc, char *argv[])
 143         {
 144                 int fd;
 145                 int len;
 146                 char *file;
 147                 struct section *s;
 148                 struct parse_context context = {
 149                         .config = {
 150                                 .ignored = (1 << TK_line_comment)
 151                                          | (1 << TK_block_comment),
 152                                 .number_chars = ".,_+-",
 153                                 .word_start = "_",
 154                                 .word_cont = "_",
 155                         },
 156                 };
 157                 int doprint=0, dotrace=0, doexec=1, brackets=0;
 158                 struct exec **prog;
 159                 int opt;
 160                 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
 161                        != -1) {
 162                         switch(opt) {
 163                         case 't': dotrace=1; break;
 164                         case 'p': doprint=1; break;
 165                         case 'n': doexec=0; break;
 166                         case 'b': brackets=1; break;
 167                         default: fprintf(stderr, Usage);
 168                                 exit(1);
 169                         }
 170                 }
 171                 if (optind >= argc) {
 172                         fprintf(stderr, "oceani: no input file given\n");
 173                         exit(1);
 174                 }
 175                 fd = open(argv[optind], O_RDONLY);
 176                 if (fd < 0) {
 177                         fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
 178                         exit(1);
 179                 }
 180                 len = lseek(fd, 0, 2);
 181                 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
 182                 s = code_extract(file, file+len, NULL);
 183                 if (!s) {
 184                         fprintf(stderr, "oceani: could not find any code in %s\n",
 185                                 argv[optind]);
 186                         exit(1);
 187                 }
 188                 prog = parse_oceani(s->code, &context.config,
 189                                     dotrace ? stderr : NULL);
 190                 if (prog && doprint)
 191                         print_exec(*prog, 0, brackets);
 192                 if (prog && doexec) {
 193                         if (!analyse_prog(*prog, &context)) {
 194                                 fprintf(stderr, "oceani: type error in program\n");
 195                                 exit(1);
 196                         }
 197                         interp_prog(*prog, argv+optind+1);
 198                 }
 199                 if (prog) {
 200                         free_exec(*prog);
 201                         free(prog);
 202                 }
 203                 while (s) {
 204                         struct section *t = s->next;
 205                         code_free(s->code);
 206                         free(s);
 207                         s = t;
 208                 }
 209                 ## free context
 210                 exit(0);
 211         }
 212
 213 ### Analysis
 214
 215 These four requirements of parse, analyse, print, interpret apply to
 216 each language element individually so that is how most of the code
 217 will be structured.
 218
 219 Three of the four are fairly self explanatory.  The one that requires
 220 a little explanation is the analysis step.
 221
 222 The current language design does not require variables to be declared,
 223 but they must have a single type.  Different operations impose
 224 different requirements on the variables, for example addition requires
 225 both arguments to be numeric, and assignment requires the variable on
 226 the left to have the same type as the expression on the right.
 227
 228 Analysis involves propagating these type requirements around
 229 consequently setting the type of each variable.  If any requirements
 230 are violated (e.g. a string is compared with a number) or if a
 231 variable needs to have two different types, then an error is raised
 232 and the program will not run.
 233
 234 Determining the types of all variables early is important for
 235 processing command line arguments.  These can be assigned to any type
 236 of variable, but we must first know the correct type so any required
 237 conversion can happen.  If a variable is associated with a command
 238 line argument but no type can be interpreted (e.g. the variable is
 239 only ever used in a `print` statement), then the type is set to
 240 'string'.
 241
 242 If the type of a variable cannot be determined at all, then it is set
 243 to be a number and given a unique value.  This allows it to fill the
 244 role of a name in an enumerated type, which is useful for testing the
 245 `switch` statement.
 246
 247 ## Data Structures
 248
 249 One last introductory step before detailing the language elements and
 250 providing their four requirements is to establish the data structures
 251 to store these elements.
 252
 253 There are two key objects that we need to work with: executable
 254 elements which comprise the program, and values which the program
 255 works with.  Between these is the set of variables which hold the
 256 values.
 257
 258 ### Values
 259
 260 Values can be numbers, which we represent as multi-precision
 261 fractions, strings and Booleans.  When analysing the program we also
 262 need to allow for places where no value is meaningful (`Vnone`) and
 263 where we don't know what type to expect yet (`Vunknown`).
 264 A 2 character 'tail' is included in each value as the scanner wants
 265 to parse that from the end of numbers and we need somewhere to put
 266 it.  It is currently ignored but one day might allow for
 267 e.g. "imaginary" numbers.
 268
 269 Values are never shared, they are always copied when used, and freed
 270 when no longer needed.
 271
 272 ###### includes
 273         #include <gmp.h>
 274         #include "string.h"
 275         #include "number.h"
 276
 277 ###### libs
 278         myLDLIBS := libnumber.o libstring.o -lgmp
 279         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 280
 281 ###### ast
 282         struct value {
 283                 enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool} vtype;
 284                 union {
 285                         struct text str;
 286                         mpq_t num;
 287                         int bool;
 288                 };
 289                 char tail[2];
 290         };
 291
 292 ###### ast functions
 293         void free_value(struct value v)
 294         {
 295                 switch (v.vtype) {
 296                 case Vnone:
 297                 case Vunknown: break;
 298                 case Vstr: free(v.str.txt); break;
 299                 case Vnum: mpq_clear(v.num); break;
 300                 case Vbool: break;
 301                 }
 302         }
 303
 304 ###### value functions
 305
 306         static void val_init(struct value *val, enum vtype type)
 307         {
 308                 val->vtype = type;
 309                 switch(type) {
 310                 case Vnone:abort();
 311                 case Vunknown: break;
 312                 case Vnum:
 313                         mpq_init(val->num); break;
 314                 case Vstr:
 315                         val->str.txt = malloc(1);
 316                         val->str.len = 0;
 317                         break;
 318                 case Vbool:
 319                         val->bool = 0;
 320                         break;
 321                 }
 322         }
 323
 324         static struct value dup_value(struct value v)
 325         {
 326                 struct value rv;
 327                 rv.vtype = v.vtype;
 328                 switch (rv.vtype) {
 329                 case Vnone:
 330                 case Vunknown: break;
 331                 case Vbool:
 332                         rv.bool = v.bool;
 333                         break;
 334                 case Vnum:
 335                         mpq_init(rv.num);
 336                         mpq_set(rv.num, v.num);
 337                         break;
 338                 case Vstr:
 339                         rv.str.len = v.str.len;
 340                         rv.str.txt = malloc(rv.str.len);
 341                         memcpy(rv.str.txt, v.str.txt, v.str.len);
 342                         break;
 343                 }
 344                 return rv;
 345         }
 346
 347         static int value_cmp(struct value left, struct value right)
 348         {
 349                 int cmp;
 350                 if (left.vtype != right.vtype)
 351                         return left.vtype - right.vtype;
 352                 switch (left.vtype) {
 353                 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
 354                 case Vstr: cmp = text_cmp(left.str, right.str); break;
 355                 case Vbool: cmp = left.bool - right.bool; break;
 356                 case Vnone:
 357                 case Vunknown: cmp = 0;
 358                 }
 359                 return cmp;
 360         }
 361
 362         static struct text text_join(struct text a, struct text b)
 363         {
 364                 struct text rv;
 365                 rv.len = a.len + b.len;
 366                 rv.txt = malloc(rv.len);
 367                 memcpy(rv.txt, a.txt, a.len);
 368                 memcpy(rv.txt+a.len, b.txt, b.len);
 369                 return rv;
 370         }
 371
 372         static void print_value(struct value v)
 373         {
 374                 switch (v.vtype) {
 375                 case Vunknown:
 376                         printf("*Unknown*"); break;
 377                 case Vnone:
 378                         printf("*no-value*"); break;
 379                 case Vstr:
 380                         printf("%.*s", v.str.len, v.str.txt); break;
 381                 case Vbool:
 382                         printf("%s", v.bool ? "True":"False"); break;
 383                 case Vnum:
 384                         {
 385                         mpf_t fl;
 386                         mpf_init2(fl, 20);
 387                         mpf_set_q(fl, v.num);
 388                         gmp_printf("%Fg", fl);
 389                         mpf_clear(fl);
 390                         break;
 391                         }
 392                 }
 393         }
 394
 395         static int parse_value(struct value *vl, char *arg)
 396         {
 397                 struct text tx;
 398                 switch(vl->vtype) {
 399                 case Vunknown:
 400                 case Vnone:
 401                         return 0;
 402                 case Vstr:
 403                         vl->str.txt = arg;
 404                         vl->str.len = strlen(arg);
 405                         break;
 406                 case Vnum:
 407                         tx.txt = arg; tx.len = strlen(tx.txt);
 408                         if (number_parse(vl->num, vl->tail, tx) == 0)
 409                                 mpq_init(vl->num);
 410                         break;
 411                 case Vbool:
 412                         if (strcasecmp(arg, "true") == 0 ||
 413                             strcmp(arg, "1") == 0)
 414                                 vl->bool = 1;
 415                         else if (strcasecmp(arg, "false") == 0 ||
 416                             strcmp(arg, "0") == 0)
 417                                 vl->bool = 2;
 418                         else {
 419                                 printf("Bad bool: %s\n", arg);
 420                                 return 0;
 421                         }
 422                         break;
 423                 }
 424                 return 1;
 425         }
 426
 427 ### Variables
 428
 429 Variables are simply named values.  We store them in a linked list
 430 sorted by name and use sequential search and insertion sort.
 431
 432 This linked list is stored in the parse context so that reduce
 433 functions can find or add variables, and so the analysis phase can
 434 ensure that every variable gets a type.
 435
 436 ###### ast
 437
 438         struct variable {
 439                 struct text name;
 440                 struct variable *next;
 441                 struct value val;
 442         };
 443
 444 ###### macros
 445
 446         #define container_of(ptr, type, member) ({                      \
 447                 const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
 448                 (type *)( (char *)__mptr - offsetof(type,member) );})
 449
 450 ###### parse context
 451
 452         struct variable *varlist;
 453
 454 ###### free context
 455         while (context.varlist) {
 456                 struct variable *v = context.varlist;
 457                 context.varlist = v->next;
 458                 free_value(v->val);
 459                 free(v);
 460         }
 461
 462 ###### ast functions
 463
 464         static struct variable *find_variable(struct token_config *conf, struct text s)
 465         {
 466                 struct variable **l = &container_of(conf, struct parse_context,
 467                                                     config)->varlist;
 468                 struct variable *n;
 469                 int cmp = 1;
 470
 471                 while (*l &&
 472                         (cmp = text_cmp((*l)->name, s)) < 0)
 473                                 l = & (*l)->next;
 474                 if (cmp == 0)
 475                         return *l;
 476                 n = calloc(1, sizeof(*n));
 477                 n->name = s;
 478                 n->val.vtype = Vunknown;
 479                 n->next = *l;
 480                 *l = n;
 481                 return n;
 482         }
 483
 484 ### Executables
 485
 486 Executables can be lots of different things.  In many cases an
 487 executable is just an operation combined with one or two other
 488 executables.  This allows for expressions and lists etc.  Other times
 489 an executable is something quite specific like a constant or variable
 490 name.  So we define a `struct exec` to be a general executable with a
 491 type, and a `struct binode` which is a subclass of `exec` and forms a
 492 node in a binary tree and holding an operation. There will be other
 493 subclasses, and to access these we need to be able to `cast` the
 494 `exec` into the various other types.
 495
 496 ###### macros
 497         #define cast(structname, pointer) ({            \
 498                 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
 499                 if (__mptr && *__mptr != X##structname) abort();                \
 500                 (struct structname *)( (char *)__mptr);})
 501
 502         #define new(structname) ({                      \
 503                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
 504                 __ptr->type = X##structname;            \
 505                 __ptr;})
 506
 507 ###### ast
 508         enum exec_types {
 509                 Xbinode,
 510                 ## exec type
 511         };
 512         struct exec {
 513                 enum exec_types type;
 514         };
 515         struct binode {
 516                 struct exec;
 517                 enum Btype {
 518                         ## Binode types
 519                 } op;
 520                 struct exec *left, *right;
 521         };
 522
 523 Each different type of `exec` node needs a number of functions
 524 defined, a bit like methods.  We must be able to be able to free it,
 525 print it, analyse it and execute it.  Once we have specific `exec`
 526 types we will need to parse them to.  Let's take this a bit more
 527 slowly.
 528
 529 #### Freeing
 530
 531 The parser generator requires as `free_foo` function for each struct
 532 that stores attributes and they will be `exec`s of subtypes there-of.
 533 So we need `free_exec` which can handle all the subtypes, and we need
 534 `free_binode`.
 535
 536 ###### ast functions
 537
 538         static void free_binode(struct binode *b)
 539         {
 540                 if (!b)
 541                         return;
 542                 free_exec(b->left);
 543                 free_exec(b->right);
 544                 free(b);
 545         }
 546
 547 ###### core functions
 548         static void free_exec(struct exec *e)
 549         {
 550                 if (!e)
 551                         return;
 552                 switch(e->type) {
 553                         ## free exec cases
 554                 }
 555         }
 556
 557 ###### forward decls
 558
 559         static void free_exec(struct exec *e);
 560
 561 ###### free exec cases
 562         case Xbinode: free_binode(cast(binode, e)); break;
 563
 564 #### Printing
 565
 566 Printing an `exec` requires that we know the current indent level for
 567 printing line-oriented components.  As will become clear later, we
 568 also want to know what sort of bracketing to use.
 569
 570 ###### ast functions
 571
 572         static void do_indent(int i, char *str)
 573         {
 574                 while (i--)
 575                         printf("    ");
 576                 printf("%s", str);
 577         }
 578
 579 ###### core functions
 580         static void print_binode(struct binode *b, int indent, int bracket)
 581         {
 582                 struct binode *b2;
 583                 switch(b->op) {
 584                 ## print binode cases
 585                 }
 586         }
 587
 588         static void print_exec(struct exec *e, int indent, int bracket)
 589         {
 590                 switch (e->type) {
 591                 case Xbinode:
 592                         print_binode(cast(binode, e), indent, bracket); break;
 593                 ## print exec cases
 594                 }
 595         }
 596
 597 ###### forward decls
 598
 599         static void print_exec(struct exec *e, int indent, int bracket);
 600
 601 #### Analysing
 602
 603 As discusses, analysis involves propagating type requirements around
 604 the program and looking for errors.
 605
 606 So propagate_types is passed a type that the `exec` is expected to return,
 607 and returns the type that it does return, either of which can be `Vunknown`.
 608 An `ok` flag is passed by reference. It is set to `0` when an error is
 609 found, and `2` when any change is made.  If it remains unchanged at
 610 `1`, then no more propagation is needed.
 611
 612 ###### core functions
 613
 614         static enum vtype propagate_types(struct exec *prog, enum vtype type,
 615                                           int *ok)
 616         {
 617                 enum vtype t;
 618
 619                 if (!prog) {
 620                         if (type != Vunknown && type != Vnone)
 621                                 *ok = 0;
 622                         return Vnone;
 623                 }
 624
 625                 switch (prog->type) {
 626                 case Xbinode:
 627                 {
 628                         struct binode *b = cast(binode, prog);
 629                         switch (b->op) {
 630                         ## propagate binode cases
 631                         }
 632                         break;
 633                 }
 634                 ## propagate exec cases
 635                 }
 636                 return Vnone;
 637         }
 638
 639 #### Interpreting
 640
 641 Interpreting an `exec` doesn't require anything but the `exec`.  State
 642 is stored in variables and each variable will be directly linked from
 643 within the `exec` tree.  The exception to this is the whole `program`
 644 which needs to look at command line arguments.  The `program` will be
 645 interpreted separately.
 646
 647 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
 648
 649 ###### core functions
 650
 651         static struct value interp_exec(struct exec *e)
 652         {
 653                 struct value rv;
 654                 rv.vtype = Vnone;
 655                 if (!e)
 656                         return rv;
 657
 658                 switch(e->type) {
 659                 case Xbinode:
 660                 {
 661                         struct binode *b = cast(binode, e);
 662                         struct value left, right;
 663                         left.vtype = right.vtype = Vnone;
 664                         switch (b->op) {
 665                         ## interp binode cases
 666                         }
 667                         free_value(left); free_value(right);
 668                         break;
 669                 }
 670                 ## interp exec cases
 671                 }
 672                 return rv;
 673         }
 674
 675 ## Language elements
 676
 677 Each language element needs to be parsed, printed, analysed,
 678 interpreted, and freed.  There are several, so let's just start with
 679 the easy ones and work our way up.
 680
 681 ### Values
 682
 683 We have already met values and separate objects.  When manifest
 684 constants appear in the program text that must result in an executable
 685 which has a constant value.  So the `val` structure embeds a value in
 686 an executable.
 687
 688 ###### exec type
 689         Xval,
 690
 691 ###### ast
 692         struct val {
 693                 struct exec;
 694                 struct value val;
 695         };
 696
 697 ###### Grammar
 698
 699         $*val
 700         Value ->  True ${
 701                         $0 = new(val);
 702                         $0->val.vtype = Vbool;
 703                         $0->val.bool = 1;
 704                         }$
 705                 | False ${
 706                         $0 = new(val);
 707                         $0->val.vtype = Vbool;
 708                         $0->val.bool = 0;
 709                         }$
 710                 | NUMBER ${
 711                         $0 = new(val);
 712                         $0->val.vtype = Vnum;
 713                         if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
 714                                 mpq_init($0->val.num);
 715                         }$
 716                 | STRING ${
 717                         $0 = new(val);
 718                         $0->val.vtype = Vstr;
 719                         string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
 720                         }$
 721
 722 ###### print exec cases
 723         case Xval:
 724         {
 725                 struct val *v = cast(val, e);
 726                 if (v->val.vtype == Vstr)
 727                         printf("\"");
 728                 print_value(v->val);
 729                 if (v->val.vtype == Vstr)
 730                         printf("\"");
 731                 break;
 732         }
 733
 734 ###### propagate exec cases
 735                 case Xval:
 736                 {
 737                         struct val *val = cast(val, prog);
 738                         if (type != Vunknown &&
 739                             type != val->val.vtype)
 740                                 *ok = 0;
 741                         return val->val.vtype;
 742                 }
 743
 744 ###### interp exec cases
 745         case Xval:
 746                 return dup_value(cast(val, e)->val);
 747
 748 ###### ast functions
 749         void free_val(struct val *v)
 750         {
 751                 if (!v)
 752                         return;
 753                 free_value(v->val);
 754                 free(v);
 755         }
 756
 757 ###### free exec cases
 758         case Xval: free_val(cast(val, e)); break;
 759
 760 ###### ast functions
 761         // Move all nodes from 'b' to 'rv', reversing the order.
 762         // In 'b' 'left' is a list, and 'right' is the last node.
 763         // In 'rv', left' is the first node and 'right' is a list.
 764         struct binode *reorder_bilist(struct binode *b)
 765         {
 766                 struct binode *rv = NULL;
 767
 768                 while (b) {
 769                         struct exec *t = b->right;
 770                         b->right = rv;
 771                         rv = b;
 772                         if (b->left)
 773                                 b = cast(binode, b->left);
 774                         else
 775                                 b = NULL;
 776                         rv->left = t;
 777                 }
 778                 return rv;
 779         }
 780
 781 ### Variables
 782
 783 Just as we used as `val` to wrap a value into an `exec`, we similarly
 784 need a `var` to wrap a `variable` into an exec.  While each `val`
 785 contained a copy of the value, each `var` hold a link to the variable
 786 because it really is the same variable no matter where it appears.
 787
 788 ###### exec type
 789         Xvar,
 790
 791 ###### ast
 792         struct var {
 793                 struct exec;
 794                 struct variable *var;
 795         };
 796
 797 ###### Grammar
 798         $*var
 799         Variable -> IDENTIFIER ${
 800                 $0 = new(var);
 801                 $0->var = find_variable(config, $1.txt);
 802         }$
 803
 804 ###### print exec cases
 805         case Xvar:
 806         {
 807                 struct var *v = cast(var, e);
 808                 printf("%.*s", v->var->name.len, v->var->name.txt);
 809                 break;
 810         }
 811
 812 ###### propagate exec cases
 813
 814         case Xvar:
 815         {
 816                 struct var *var = cast(var, prog);
 817                 if (var->var->val.vtype == Vunknown) {
 818                         if (type != Vunknown && *ok != 0) {
 819                                 val_init(&var->var->val, type);
 820                                 *ok = 2;
 821                         }
 822                         return type;
 823                 }
 824                 if (type == Vunknown)
 825                         return var->var->val.vtype;
 826                 if (type != var->var->val.vtype)
 827                         *ok = 0;
 828                 return type;
 829         }
 830
 831 ###### interp exec cases
 832         case Xvar:
 833                 return dup_value(cast(var, e)->var->val);
 834
 835 ###### ast functions
 836
 837         void free_var(struct var *v)
 838         {
 839                 free(v);
 840         }
 841
 842 ###### free exec cases
 843         case Xvar: free_var(cast(var, e)); break;
 844
 845 ### Expressions: Boolean
 846
 847 Our first user of the `binode` will be expressions, and particularly
 848 Boolean expressions.  As I haven't implemented precedence in the
 849 parser generator yet, we need different names from each precedence
 850 level used by expressions.  The outer most or lowest level precedence
 851 are Boolean `or` `and`, and `not` which form and `Expression` our of `BTerm`s
 852 and `BFact`s.
 853
 854 ###### Binode types
 855         And,
 856         Or,
 857         Not,
 858
 859 ####### Grammar
 860
 861         $*binode
 862         Expression -> Expression or BTerm ${
 863                         $0 = new(binode);
 864                         $0->op = Or;
 865                         $0->left = $<1;
 866                         $0->right = $<3;
 867                 }$
 868                 | BTerm ${ $0 = $<1; }$
 869
 870         BTerm -> BTerm and BFact ${
 871                         $0 = new(binode);
 872                         $0->op = And;
 873                         $0->left = $<1;
 874                         $0->right = $<3;
 875                 }$
 876                 | BFact ${ $0 = $<1; }$
 877
 878         BFact -> not BFact ${
 879                         $0 = new(binode);
 880                         $0->op = Not;
 881                         $0->right = $<2;
 882                         }$
 883                 ## other BFact
 884
 885 ###### print binode cases
 886         case And:
 887                 print_exec(b->left, -1, 0);
 888                 printf(" and ");
 889                 print_exec(b->right, -1, 0);
 890                 break;
 891         case Or:
 892                 print_exec(b->left, -1, 0);
 893                 printf(" or ");
 894                 print_exec(b->right, -1, 0);
 895                 break;
 896         case Not:
 897                 printf("not ");
 898                 print_exec(b->right, -1, 0);
 899                 break;
 900
 901 ###### propagate binode cases
 902         case And:
 903         case Or:
 904         case Not:
 905                 /* both must be Vbool, result is Vbool */
 906                 propagate_types(b->left, Vbool, ok);
 907                 propagate_types(b->right, Vbool, ok);
 908                 if (type != Vbool && type != Vunknown)
 909                         *ok = 0;
 910                 return Vbool;
 911
 912 ###### interp binode cases
 913         case And:
 914                 rv = interp_exec(b->left);
 915                 right = interp_exec(b->right);
 916                 rv.bool = rv.bool && right.bool;
 917                 break;
 918         case Or:
 919                 rv = interp_exec(b->left);
 920                 right = interp_exec(b->right);
 921                 rv.bool = rv.bool || right.bool;
 922                 break;
 923         case Not:
 924                 rv = interp_exec(b->right);
 925                 rv.bool = !rv.bool;
 926                 break;
 927
 928 ### Expressions: Comparison
 929
 930 Of slightly higher precedence that Boolean expressions are
 931 Comparisons.
 932 A comparison takes arguments of any type, but the two types must be
 933 the same.
 934
 935 To simplify the parsing we introduce an `eop` which can return an
 936 expression operator.
 937
 938 ###### ast
 939         struct eop {
 940                 enum Btype op;
 941         };
 942
 943 ###### ast functions
 944         static void free_eop(struct eop *e)
 945         {
 946                 if (e)
 947                         free(e);
 948         }
 949
 950 ###### Binode types
 951         Less,
 952         Gtr,
 953         LessEq,
 954         GtrEq,
 955         Eql,
 956         NEql,
 957
 958 ###### other BFact
 959         | Expr CMPop Expr ${
 960                         $0 = new(binode);
 961                         $0->op = $2.op;
 962                         $0->left = $<1;
 963                         $0->right = $<3;
 964                 }$
 965                 | Expr ${ $0 = $<1; }$
 966
 967 ###### Grammar
 968
 969         $eop
 970         CMPop ->   < ${ $0.op = Less; }$
 971                 |  > ${ $0.op = Gtr; }$
 972                 |  <= ${ $0.op = LessEq; }$
 973                 |  >= ${ $0.op = GtrEq; }$
 974                 |  == ${ $0.op = Eql; }$
 975                 |  != ${ $0.op = NEql; }$
 976
 977 ###### print binode cases
 978
 979         case Less:
 980         case LessEq:
 981         case Gtr:
 982         case GtrEq:
 983         case Eql:
 984         case NEql:
 985                 print_exec(b->left, -1, 0);
 986                 switch(b->op) {
 987                 case Less:   printf(" < "); break;
 988                 case LessEq: printf(" <= "); break;
 989                 case Gtr:    printf(" > "); break;
 990                 case GtrEq:  printf(" >= "); break;
 991                 case Eql:    printf(" == "); break;
 992                 case NEql:   printf(" != "); break;
 993                 default: abort();
 994                 }
 995                 print_exec(b->right, -1, 0);
 996                 break;
 997
 998 ###### propagate binode cases
 999         case Less:
1000         case LessEq:
1001         case Gtr:
1002         case GtrEq:
1003         case Eql:
1004         case NEql:
1005                 /* Both must match, result is Vbool */
1006                 t = propagate_types(b->left, Vunknown, ok);
1007                 if (t != Vunknown)
1008                         propagate_types(b->right, t, ok);
1009                 else {
1010                         t = propagate_types(b->right, Vunknown, ok);
1011                         if (t != Vunknown)
1012                                 t = propagate_types(b->left, t, ok);
1013                 }
1014                 if (type != Vbool && type != Vunknown)
1015                         *ok = 0;
1016                 return Vbool;
1017
1018 ###### interp binode cases
1019         case Less:
1020         case LessEq:
1021         case Gtr:
1022         case GtrEq:
1023         case Eql:
1024         case NEql:
1025         {
1026                 int cmp;
1027                 left = interp_exec(b->left);
1028                 right = interp_exec(b->right);
1029                 cmp = value_cmp(left, right);
1030                 rv.vtype = Vbool;
1031                 switch (b->op) {
1032                 case Less:      rv.bool = cmp <  0; break;
1033                 case LessEq:    rv.bool = cmp <= 0; break;
1034                 case Gtr:       rv.bool = cmp >  0; break;
1035                 case GtrEq:     rv.bool = cmp >= 0; break;
1036                 case Eql:       rv.bool = cmp == 0; break;
1037                 case NEql:      rv.bool = cmp != 0; break;
1038                 default: rv.bool = 0; break;
1039                 }
1040                 break;
1041         }
1042
1043 ### Expressions: The rest
1044
1045 The remaining expressions with the highest precedence are arithmetic
1046 and string concatenation.  There are `Expr`, `Term`, and `Factor`.
1047 The `Factor` is where the `Value` and `Variable` that we already have
1048 are included.
1049
1050 `+` and `-` are both infix and prefix operations (where they are
1051 absolute value and negation).  These have different operator names.
1052
1053 We also have a 'Bracket' operator which records where parentheses were
1054 found.  This make it easy to reproduce these when printing.  Once
1055 precedence is handled better I might be able to discard this.
1056
1057 ###### Binode types
1058         Plus, Minus,
1059         Times, Divide,
1060         Concat,
1061         Absolute, Negate,
1062         Bracket,
1063
1064 ###### Grammar
1065
1066         $*binode
1067         Expr -> Expr Eop Term ${
1068                         $0 = new(binode);
1069                         $0->op = $2.op;
1070                         $0->left = $<1;
1071                         $0->right = $<3;
1072                 }$
1073                 | Term ${ $0 = $<1; }$
1074
1075         Term -> Term Top Factor ${
1076                         $0 = new(binode);
1077                         $0->op = $2.op;
1078                         $0->left = $<1;
1079                         $0->right = $<3;
1080                 }$
1081                 | Factor ${ $0 = $<1; }$
1082
1083         Factor -> ( Expression ) ${
1084                         $0 = new(binode);
1085                         $0->op = Bracket;
1086                         $0->right = $<2;
1087                 }$
1088                 | Uop Factor ${
1089                         $0 = new(binode);
1090                         $0->op = $1.op;
1091                         $0->right = $<2;
1092                 }$
1093                 | Value ${ $0 = (struct binode *)$<1; }$
1094                 | Variable ${ $0 = (struct binode *)$<1; }$
1095
1096         $eop
1097         Eop ->    + ${ $0.op = Plus; }$
1098                 | - ${ $0.op = Minus; }$
1099
1100         Uop ->    + ${ $0.op = Absolute; }$
1101                 | - ${ $0.op = Negate; }$
1102
1103         Top ->    * ${ $0.op = Times; }$
1104                 | / ${ $0.op = Divide; }$
1105                 | ++ ${ $0.op = Concat; }$
1106
1107 ###### print binode cases
1108         case Plus:
1109         case Minus:
1110         case Times:
1111         case Divide:
1112         case Concat:
1113                 print_exec(b->left, indent, 0);
1114                 switch(b->op) {
1115                 case Plus:   printf(" + "); break;
1116                 case Minus:  printf(" - "); break;
1117                 case Times:  printf(" * "); break;
1118                 case Divide: printf(" / "); break;
1119                 case Concat: printf(" ++ "); break;
1120                 default: abort();
1121                 }
1122                 print_exec(b->right, indent, 0);
1123                 break;
1124         case Absolute:
1125                 printf("+");
1126                 print_exec(b->right, indent, 0);
1127                 break;
1128         case Negate:
1129                 printf("-");
1130                 print_exec(b->right, indent, 0);
1131                 break;
1132         case Bracket:
1133                 printf("(");
1134                 print_exec(b->right, indent, 0);
1135                 printf(")");
1136                 break;
1137
1138 ###### propagate binode cases
1139         case Plus:
1140         case Minus:
1141         case Times:
1142         case Divide:
1143                 /* both must be numbers, result is Vnum */
1144         case Absolute:
1145         case Negate:
1146                 /* as propagate_types ignores a NULL,
1147                  * unary ops fit here too */
1148                 propagate_types(b->left, Vnum, ok);
1149                 propagate_types(b->right, Vnum, ok);
1150                 if (type != Vnum && type != Vunknown)
1151                         *ok = 0;
1152                 return Vnum;
1153
1154         case Concat:
1155                 /* both must be Vstr, result is Vstr */
1156                 propagate_types(b->left, Vstr, ok);
1157                 propagate_types(b->right, Vstr, ok);
1158                 if (type != Vstr && type != Vunknown)
1159                         *ok = 0;
1160                 return Vstr;
1161
1162         case Bracket:
1163                 return propagate_types(b->right, type, ok);
1164
1165 ###### interp binode cases
1166
1167         case Plus:
1168                 rv = interp_exec(b->left);
1169                 right = interp_exec(b->right);
1170                 mpq_add(rv.num, rv.num, right.num);
1171                 break;
1172         case Minus:
1173                 rv = interp_exec(b->left);
1174                 right = interp_exec(b->right);
1175                 mpq_sub(rv.num, rv.num, right.num);
1176                 break;
1177         case Times:
1178                 rv = interp_exec(b->left);
1179                 right = interp_exec(b->right);
1180                 mpq_mul(rv.num, rv.num, right.num);
1181                 break;
1182         case Divide:
1183                 rv = interp_exec(b->left);
1184                 right = interp_exec(b->right);
1185                 mpq_div(rv.num, rv.num, right.num);
1186                 break;
1187         case Negate:
1188                 rv = interp_exec(b->right);
1189                 mpq_neg(rv.num, rv.num);
1190                 break;
1191         case Absolute:
1192                 rv = interp_exec(b->right);
1193                 mpq_abs(rv.num, rv.num);
1194                 break;
1195         case Bracket:
1196                 rv = interp_exec(b->right);
1197                 break;
1198         case Concat:
1199                 left = interp_exec(b->left);
1200                 right = interp_exec(b->right);
1201                 rv.vtype = Vstr;
1202                 rv.str = text_join(left.str, right.str);
1203                 break;
1204
1205 ### Blocks, Statements, and Statement lists.
1206
1207 Now that we have expressions out of the way we need to turn to
1208 statements.  There are simple statements and more complex statements.
1209 Simple statements do not contain newlines, complex statements do.
1210
1211 Statements often come in sequences and we have corresponding simple
1212 statement lists and complex statement lists.
1213 The former comprise only simple statements separated by semicolons.
1214 The later comprise complex statements and simple statement lists.  They are
1215 separated by newlines.  Thus the semicolon is only used to separate
1216 simple statements on the one line.  This may be overly restrictive,
1217 but I'm not sure I every want a complex statement to share a line with
1218 anything else.
1219
1220 Note that a simple statement list can still use multiple lines if
1221 subsequent lines are indented, so
1222
1223 ###### Example: wrapped simple statement list
1224
1225         a = b; c = d;
1226            e = f; print g
1227
1228 is a single simple statement list.  This might allow room for
1229 confusion, so I'm not set on it yet.
1230
1231 A simple statement list needs no extra syntax.  A complex statement
1232 list has two syntactic forms.  It can be enclosed in braces (much like
1233 C blocks), or it can be introduced by a colon and continue until an
1234 unindented newline (much like Python blocks).  With this extra syntax
1235 it is referred to as a block.
1236
1237 Note that a block does not have to include any newlines if it only
1238 contains simple statements.  So both of:
1239
1240         if condition: a=b; d=f
1241
1242         if condition { a=b; print f }
1243
1244 are valid.
1245
1246 In either case the list is constructed from a `binode` list with
1247 `Block` as the operator.  When parsing the list it is most convenient
1248 to append to the end, so a list is a list and a statement.  When using
1249 the list it is more convenient to consider a list to be a statement
1250 and a list.  So we need a function to re-order a list.
1251 `reorder_bilist` serves this purpose.
1252
1253 The only stand-alone statement we introduce at this stage is `pass`
1254 which does nothing and is represented as a `NULL` pointer in a `Block`
1255 list.
1256
1257 ###### Binode types
1258         Block,
1259
1260 ###### Grammar
1261
1262         $void
1263         OptNL -> Newlines
1264                 |
1265
1266         Newlines -> NEWLINE
1267                 | Newlines NEWLINE
1268
1269         $*binode
1270         Open -> {
1271                 | NEWLINE {
1272         Close -> }
1273                 | NEWLINE }
1274         Block -> Open Statementlist Close ${ $0 = $<2; }$
1275                 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1276                 | : Statementlist ${ $0 = $<2; }$
1277                 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1278
1279         Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1280
1281         ComplexStatements -> ComplexStatements ComplexStatement ${
1282                 $0 = new(binode);
1283                 $0->op = Block;
1284                 $0->left = $<1;
1285                 $0->right = $<2;
1286                 }$
1287                 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1288                 | ComplexStatement ${
1289                 $0 = new(binode);
1290                 $0->op = Block;
1291                 $0->left = NULL;
1292                 $0->right = $<1;
1293                 }$
1294
1295         $*exec
1296         ComplexStatement -> SimpleStatements NEWLINE ${
1297                         $0 = reorder_bilist($<1);
1298                         }$
1299                 ## ComplexStatement Grammar
1300
1301         $*binode
1302         SimpleStatements -> SimpleStatements ; SimpleStatement ${
1303                         $0 = new(binode);
1304                         $0->op = Block;
1305                         $0->left = $<1;
1306                         $0->right = $<3;
1307                         }$
1308                 | SimpleStatement ${
1309                         $0 = new(binode);
1310                         $0->op = Block;
1311                         $0->left = NULL;
1312                         $0->right = $<1;
1313                         }$
1314                 | SimpleStatements ; ${ $0 = $<1; }$
1315
1316         SimpleStatement -> pass ${ $0 = NULL; }$
1317                 ## SimpleStatement Grammar
1318
1319 ###### print binode cases
1320         case Block:
1321                 if (indent < 0) {
1322                         // simple statement
1323                         if (b->left == NULL)
1324                                 printf("pass");
1325                         else
1326                                 print_exec(b->left, indent, 0);
1327                         if (b->right) {
1328                                 printf("; ");
1329                                 print_exec(b->right, indent, 0);
1330                         }
1331                 } else {
1332                         // block, one per line
1333                         if (b->left == NULL)
1334                                 do_indent(indent, "pass\n");
1335                         else
1336                                 print_exec(b->left, indent, bracket);
1337                         if (b->right)
1338                                 print_exec(b->right, indent, bracket);
1339                 }
1340                 break;
1341
1342 ###### propagate binode cases
1343         case Block:
1344         {
1345                 /* If any statement returns something other then Vnone
1346                  * then all such must return same type.
1347                  * As each statement may be Vnone or something else,
1348                  * we must always pass Vunknown down, otherwise an incorrect
1349                  * error might occur.
1350                  */
1351                 struct binode *e;
1352
1353                 for (e = b; e; e = cast(binode, e->right)) {
1354                         t = propagate_types(e->left, Vunknown, ok);
1355                         if (t != Vunknown && t != Vnone) {
1356                                 if (type == Vunknown)
1357                                         type = t;
1358                                 else if (t != type)
1359                                         *ok = 0;
1360                         }
1361                 }
1362                 return type;
1363         }
1364
1365 ###### interp binode cases
1366         case Block:
1367                 while (rv.vtype == Vnone &&
1368                        b) {
1369                         if (b->left)
1370                                 rv = interp_exec(b->left);
1371                         b = cast(binode, b->right);
1372                 }
1373                 break;
1374
1375 ### The Print statement
1376
1377 `print` is a simple statement that takes a comma-separated list of
1378 expressions and prints the values separated by spaces and terminated
1379 by a newline.  No control of formatting is possible.
1380
1381 `print` faces the same list-ordering issue as blocks, and uses the
1382 same solution.
1383
1384 ###### Binode types
1385         Print,
1386
1387 ###### SimpleStatement Grammar
1388
1389         | print ExpressionList ${
1390                 $0 = reorder_bilist($<2);
1391         }$
1392         | print ExpressionList , ${
1393                 $0 = new(binode);
1394                 $0->op = Print;
1395                 $0->right = NULL;
1396                 $0->left = $<2;
1397                 $0 = reorder_bilist($0);
1398         }$
1399         | print ${
1400                 $0 = new(binode);
1401                 $0->op = Print;
1402                 $0->right = NULL;
1403         }$
1404
1405 ###### Grammar
1406
1407         $*binode
1408         ExpressionList -> ExpressionList , Expression ${
1409                 $0 = new(binode);
1410                 $0->op = Print;
1411                 $0->left = $<1;
1412                 $0->right = $<3;
1413                 }$
1414                 | Expression ${
1415                         $0 = new(binode);
1416                         $0->op = Print;
1417                         $0->left = NULL;
1418                         $0->right = $<1;
1419                 }$
1420
1421 ###### print binode cases
1422
1423         case Print:
1424                 do_indent(indent, "print");
1425                 while (b) {
1426                         if (b->left) {
1427                                 printf(" ");
1428                                 print_exec(b->left, -1, 0);
1429                                 if (b->right)
1430                                         printf(",");
1431                         }
1432                         b = cast(binode, b->right);
1433                 }
1434                 if (indent >= 0)
1435                         printf("\n");
1436                 break;
1437
1438 ###### propagate binode cases
1439
1440         case Print:
1441                 /* don't care but all must be consistent */
1442                 propagate_types(b->left, Vunknown, ok);
1443                 propagate_types(b->right, Vunknown, ok);
1444                 break;
1445
1446 ###### interp binode cases
1447
1448         case Print:
1449         {
1450                 char sep = 0;
1451                 int eol = 1;
1452                 for ( ; b; b = cast(binode, b->right))
1453                         if (b->left) {
1454                                 if (sep)
1455                                         putchar(sep);
1456                                 left = interp_exec(b->left);
1457                                 print_value(left);
1458                                 free_value(left);
1459                                 if (b->right)
1460                                         sep = ' ';
1461                         } else if (sep)
1462                                 eol = 0;
1463                 left.vtype = Vnone;
1464                 if (eol)
1465                         printf("\n");
1466                 break;
1467         }
1468
1469 ###### Assignment statement
1470
1471 An assignment will assign a value to a variable.  The analysis phase
1472 ensures that the type will be correct so the interpreted just needs to
1473 perform the calculation.
1474
1475 ###### Binode types
1476         Assign,
1477
1478 ###### SimpleStatement Grammar
1479         | Variable = Expression ${
1480                         $0 = new(binode);
1481                         $0->op = Assign;
1482                         $0->left = $<1;
1483                         $0->right =$<3;
1484                 }$
1485
1486 ###### print binode cases
1487
1488         case Assign:
1489                 do_indent(indent, "");
1490                 print_exec(b->left, indent, 0);
1491                 printf(" = ");
1492                 print_exec(b->right, indent, 0);
1493                 if (indent >= 0)
1494                         printf("\n");
1495                 break;
1496
1497 ###### propagate binode cases
1498
1499         case Assign:
1500                 /* Both must match, result is Vnone */
1501                 t = propagate_types(b->left, Vunknown, ok);
1502                 if (t != Vunknown)
1503                         propagate_types(b->right, t, ok);
1504                 else {
1505                         t = propagate_types(b->right, Vunknown, ok);
1506                         if (t != Vunknown)
1507                                 t = propagate_types(b->left, t, ok);
1508                 }
1509                 return Vnone;
1510
1511 ###### interp binode cases
1512
1513         case Assign:
1514         {
1515                 struct variable *v = cast(var, b->left)->var;
1516                 right = interp_exec(b->right);
1517                 free_value(v->val);
1518                 v->val = right;
1519                 right.vtype = Vunknown;
1520                 break;
1521         }
1522
1523 ### The `use` statement
1524
1525 The `use` statement is the last "simple" statement.  It is needed when
1526 the condition in a conditional statement is a block.  `use` works much
1527 like `return` in C, but only completes the `condition`, not the whole
1528 function.
1529
1530 ###### Binode types
1531         Use,
1532
1533 ###### SimpleStatement Grammar
1534         | use Expression ${
1535                 $0 = new(binode);
1536                 $0->op = Use;
1537                 $0->right = $<2;
1538         }$
1539
1540 ###### print binode cases
1541
1542         case Use:
1543                 do_indent(indent, "use ");
1544                 print_exec(b->right, -1, 0);
1545                 if (indent >= 0)
1546                         printf("\n");
1547                 break;
1548
1549 ###### propagate binode cases
1550
1551         case Use:
1552                 /* result matches value */
1553                 return propagate_types(b->right, type, ok);
1554
1555 ###### interp binode cases
1556
1557         case Use:
1558                 rv = interp_exec(b->right);
1559                 break;
1560
1561 ### The Conditional Statement
1562
1563 This is the biggy and currently the only complex statement.
1564 This subsumes `if`, `while`, `do/while`, `switch`, and some part of
1565 `for`.  It is comprised of a number of parts, all of which are
1566 optional though set combinations apply.
1567
1568 If there is a `forpart`, it is executed first, only once.
1569 If there is a `dopart`, then it is executed repeatedly providing
1570 always that the `condpart` or `cond`, if present, does not return a non-True
1571 value.  `condpart` can fail to return any value if it simply executes
1572 to completion.  This is treated the same as returning True.
1573
1574 If there is a `thenpart` it will be executed whenever the `condpart`
1575 or `cond` returns True (or does not return), but this will happen
1576 *after* `dopart` (when present).
1577
1578 If `elsepart` is present it will be executed at most once when the
1579 condition returns False.  If there are any `casepart`s, they will be
1580 executed when the condition returns a matching value.
1581
1582 The particular sorts of values allowed in case parts has not yet been
1583 determined in the language design.
1584
1585 The cond_statement cannot fit into a `binode` so a new `exec` is
1586 defined.
1587
1588 ###### exec type
1589         Xcond_statement,
1590
1591 ###### ast
1592         struct casepart {
1593                 struct exec *value;
1594                 struct exec *action;
1595                 struct casepart *next;
1596         };
1597         struct cond_statement {
1598                 struct exec;
1599                 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
1600                 struct casepart *casepart;
1601         };
1602
1603 ###### ast functions
1604
1605         static void free_casepart(struct casepart *cp)
1606         {
1607                 while (cp) {
1608                         struct casepart *t;
1609                         free_exec(cp->value);
1610                         free_exec(cp->action);
1611                         t = cp->next;
1612                         free(cp);
1613                         cp = t;
1614                 }
1615         }
1616
1617         void free_cond_statement(struct cond_statement *s)
1618         {
1619                 if (!s)
1620                         return;
1621                 free_exec(s->forpart);
1622                 free_exec(s->condpart);
1623                 free_exec(s->dopart);
1624                 free_exec(s->thenpart);
1625                 free_exec(s->elsepart);
1626                 free_casepart(s->casepart);
1627                 free(s);
1628         }
1629
1630 ###### free exec cases
1631         case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
1632
1633 ###### ComplexStatement Grammar
1634         | CondStatement ${ $0 = $<1; }$
1635
1636 ###### Grammar
1637
1638         $*cond_statement
1639         CondStatement -> ForThen WhilePart CondSuffix ${
1640                         $0 = $<3;
1641                         $0->forpart = $1.forpart; $1.forpart = NULL;
1642                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1643                         $0->condpart = $2.condpart; $2.condpart = NULL;
1644                         $0->dopart = $2.dopart; $2.dopart = NULL;
1645                         }$
1646                 | WhilePart CondSuffix ${
1647                         $0 = $<2;
1648                         $0->condpart = $1.condpart; $1.condpart = NULL;
1649                         $0->dopart = $1.dopart; $1.dopart = NULL;
1650                         }$
1651                 | SwitchPart CondSuffix ${
1652                         $0 = $<2;
1653                         $0->condpart = $<1;
1654                         }$
1655                 | IfPart IfSuffix ${
1656                         $0 = $<2;
1657                         $0->condpart = $1.condpart; $1.condpart = NULL;
1658                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1659                         }$
1660
1661         CondSuffix -> IfSuffix ${ $0 = $<1; }$
1662                 | Newlines case Expression Block CondSuffix ${ {
1663                         struct casepart *cp = calloc(1, sizeof(*cp));
1664                         $0 = $<5;
1665                         cp->value = $<3;
1666                         cp->action = $<4;
1667                         cp->next = $0->casepart;
1668                         $0->casepart = cp;
1669                 } }$
1670                 | case Expression Block CondSuffix ${ {
1671                         struct casepart *cp = calloc(1, sizeof(*cp));
1672                         $0 = $<4;
1673                         cp->value = $<2;
1674                         cp->action = $<3;
1675                         cp->next = $0->casepart;
1676                         $0->casepart = cp;
1677                 } }$
1678
1679         IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
1680                 | Newlines else Block ${
1681                         $0 = new(cond_statement);
1682                         $0->elsepart = $<3;
1683                 }$
1684                 | else Block ${
1685                         $0 = new(cond_statement);
1686                         $0->elsepart = $<2;
1687                 }$
1688                 | Newlines else CondStatement ${
1689                         $0 = new(cond_statement);
1690                         $0->elsepart = $<3;
1691                 }$
1692                 | else CondStatement ${
1693                         $0 = new(cond_statement);
1694                         $0->elsepart = $<2;
1695                 }$
1696
1697
1698         $*exec
1699         ForPart -> for SimpleStatements ${
1700                         $0 = reorder_bilist($<2);
1701                 }$
1702                 |  for Block ${
1703                         $0 = $<2;
1704                 }$
1705
1706         ThenPart -> then SimpleStatements ${
1707                         $0 = reorder_bilist($<2);
1708                 }$
1709                 |  then Block ${
1710                         $0 = $<2;
1711                 }$
1712
1713         ThenPartNL -> ThenPart OptNL ${
1714                         $0 = $<1;
1715                 }$
1716
1717         WhileHead -> while Block ${
1718                 $0 = $<2;
1719                 }$
1720
1721         $cond_statement
1722         ForThen -> ForPart OptNL ThenPartNL ${
1723                         $0.forpart = $<1;
1724                         $0.thenpart = $<3;
1725                 }$
1726                 | ForPart OptNL ${
1727                         $0.forpart = $<1;
1728                 }$
1729
1730         WhilePart -> while Expression Block ${
1731                         $0.type = Xcond_statement;
1732                         $0.condpart = $<2;
1733                         $0.dopart = $<3;
1734                 }$
1735                 |    WhileHead OptNL do Block ${
1736                         $0.type = Xcond_statement;
1737                         $0.condpart = $<1;
1738                         $0.dopart = $<4;
1739                 }$
1740
1741         IfPart -> if Expression Block ${
1742                         $0.type = Xcond_statement;
1743                         $0.condpart = $<2;
1744                         $0.thenpart = $<3;
1745                 }$
1746                 | if Block OptNL then Block ${
1747                         $0.type = Xcond_statement;
1748                         $0.condpart = $<2;
1749                         $0.thenpart = $<5;
1750                 }$
1751
1752         $*exec
1753         SwitchPart -> switch Expression ${
1754                         $0 = $<2;
1755                 }$
1756                 | switch Block ${
1757                         $0 = $<2;
1758                 }$
1759
1760 ###### print exec cases
1761
1762         case Xcond_statement:
1763         {
1764                 struct cond_statement *cs = cast(cond_statement, e);
1765                 struct casepart *cp;
1766                 if (cs->forpart) {
1767                         do_indent(indent, "for");
1768                         if (bracket) printf(" {\n"); else printf(":\n");
1769                         print_exec(cs->forpart, indent+1, bracket);
1770                         if (cs->thenpart) {
1771                                 if (bracket)
1772                                         do_indent(indent, "} then {\n");
1773                                 else
1774                                         do_indent(indent, "then:\n");
1775                                 print_exec(cs->thenpart, indent+1, bracket);
1776                         }
1777                         if (bracket) do_indent(indent, "}\n");
1778                 }
1779                 if (cs->dopart) {
1780                         // a loop
1781                         if (cs->condpart && cs->condpart->type == Xbinode &&
1782                             cast(binode, cs->condpart)->op == Block) {
1783                                 if (bracket)
1784                                         do_indent(indent, "while {\n");
1785                                 else
1786                                         do_indent(indent, "while:\n");
1787                                 print_exec(cs->condpart, indent+1, bracket);
1788                                 if (bracket)
1789                                         do_indent(indent, "} do {\n");
1790                                 else
1791                                         do_indent(indent, "do:\n");
1792                                 print_exec(cs->dopart, indent+1, bracket);
1793                                 if (bracket)
1794                                         do_indent(indent, "}\n");
1795                         } else {
1796                                 do_indent(indent, "while ");
1797                                 print_exec(cs->condpart, 0, bracket);
1798                                 if (bracket)
1799                                         printf(" {\n");
1800                                 else
1801                                         printf(":\n");
1802                                 print_exec(cs->dopart, indent+1, bracket);
1803                                 if (bracket)
1804                                         do_indent(indent, "}\n");
1805                         }
1806                 } else {
1807                         // a condition
1808                         if (cs->casepart)
1809                                 do_indent(indent, "switch");
1810                         else
1811                                 do_indent(indent, "if");
1812                         if (cs->condpart && cs->condpart->type == Xbinode &&
1813                             cast(binode, cs->condpart)->op == Block) {
1814                                 printf(":\n");
1815                                 print_exec(cs->condpart, indent+1, bracket);
1816                                 do_indent(indent, "then:\n");
1817                                 print_exec(cs->thenpart, indent+1, bracket);
1818                         } else {
1819                                 printf(" ");
1820                                 print_exec(cs->condpart, 0, bracket);
1821                                 printf(":\n");
1822                                 print_exec(cs->thenpart, indent+1, bracket);
1823                         }
1824                 }
1825                 for (cp = cs->casepart; cp; cp = cp->next) {
1826                         do_indent(indent, "case ");
1827                         print_exec(cp->value, -1, 0);
1828                         printf(":\n");
1829                         print_exec(cp->action, indent+1, bracket);
1830                 }
1831                 if (cs->elsepart) {
1832                         do_indent(indent, "else:\n");
1833                         print_exec(cs->elsepart, indent+1, bracket);
1834                 }
1835                 break;
1836         }
1837
1838 ###### propagate exec cases
1839         case Xcond_statement:
1840         {
1841                 // forpart and dopart must return Vnone
1842                 // condpart must be bool or match casepart->values
1843                 // thenpart, elsepart, casepart->action must match
1844                 // or be Vnone
1845                 struct cond_statement *cs = cast(cond_statement, prog);
1846                 struct casepart *c;
1847
1848                 t = propagate_types(cs->forpart, Vnone, ok);
1849                 if (t != Vunknown && t != Vnone)
1850                         *ok = 0;
1851                 t = propagate_types(cs->dopart, Vnone, ok);
1852                 if (t != Vunknown && t != Vnone)
1853                         *ok = 0;
1854                 if (cs->casepart == NULL)
1855                         propagate_types(cs->condpart, Vbool, ok);
1856                 else {
1857                         t = Vunknown;
1858                         for (c = cs->casepart;
1859                              c && (t == Vunknown); c = c->next)
1860                                 t = propagate_types(c->value, Vunknown, ok);
1861                         if (t == Vunknown && cs->condpart)
1862                                 t = propagate_types(cs->condpart, Vunknown, ok);
1863                         // Now we have a type (I hope) push it down
1864                         if (t != Vunknown) {
1865                                 for (c = cs->casepart; c; c = c->next)
1866                                         propagate_types(c->value, t, ok);
1867                                 propagate_types(cs->condpart, t, ok);
1868                         }
1869                 }
1870                 if (type == Vunknown || type == Vnone)
1871                         type = propagate_types(cs->thenpart, Vunknown, ok);
1872                 if (type == Vunknown || type == Vnone)
1873                         type = propagate_types(cs->elsepart, Vunknown, ok);
1874                 for (c = cs->casepart;
1875                      c && (type == Vunknown || type == Vnone);
1876                      c = c->next)
1877                         type = propagate_types(c->action, Vunknown, ok);
1878                 if (type != Vunknown && type != Vnone) {
1879                         propagate_types(cs->thenpart, type, ok);
1880                         propagate_types(cs->elsepart, type, ok);
1881                         for (c = cs->casepart; c ; c = c->next)
1882                                 propagate_types(c->action, type, ok);
1883                         return type;
1884                 } else
1885                         return Vunknown;
1886         }
1887
1888 ###### interp exec cases
1889         case Xcond_statement:
1890         {
1891                 struct value v, cnd;
1892                 struct casepart *cp;
1893                 struct cond_statement *c = cast(cond_statement, e);
1894                 if (c->forpart)
1895                         interp_exec(c->forpart);
1896                 do {
1897                         if (c->condpart)
1898                                 cnd = interp_exec(c->condpart);
1899                         else
1900                                 cnd.vtype = Vnone;
1901                         if (!(cnd.vtype == Vnone ||
1902                               (cnd.vtype == Vbool && cnd.bool != 0)))
1903                                 break;
1904                         if (c->dopart) {
1905                                 free_value(cnd);
1906                                 interp_exec(c->dopart);
1907                         }
1908                         if (c->thenpart) {
1909                                 v = interp_exec(c->thenpart);
1910                                 if (v.vtype != Vnone || !c->dopart)
1911                                         return v;
1912                                 free_value(v);
1913                         }
1914                 } while (c->dopart);
1915
1916                 for (cp = c->casepart; cp; cp = cp->next) {
1917                         v = interp_exec(cp->value);
1918                         if (value_cmp(v, cnd) == 0) {
1919                                 free_value(v);
1920                                 free_value(cnd);
1921                                 return interp_exec(cp->action);
1922                         }
1923                         free_value(v);
1924                 }
1925                 free_value(cnd);
1926                 if (c->elsepart)
1927                         return interp_exec(c->elsepart);
1928                 v.vtype = Vnone;
1929                 return v;
1930         }
1931
1932 ### Finally the whole program.
1933
1934 Somewhat reminiscent of Pascal a (current) Ocean program starts with
1935 the keyword "program" and list of variable names which are assigned
1936 values from command line arguments.  Following this is a `block` which
1937 is the code to execute.
1938
1939 As this is the top level, several things are handled a bit
1940 differently.
1941 The whole program is not interpreted by `interp_exec` as that isn't
1942 passed the argument list which the program requires.  Similarly type
1943 analysis is a bit more interesting at this level.
1944
1945 ###### Binode types
1946         Program,
1947
1948 ###### Parser: grammar
1949
1950         $*binode
1951         Program -> program Varlist Block OptNL ${
1952                 $0 = new(binode);
1953                 $0->op = Program;
1954                 $0->left = reorder_bilist($<2);
1955                 $0->right = $<3;
1956         }$
1957
1958         Varlist -> Varlist Variable ${
1959                         $0 = new(binode);
1960                         $0->op = Program;
1961                         $0->left = $<1;
1962                         $0->right = $<2;
1963                 }$
1964                 | ${ $0 = NULL; }$
1965         ## Grammar
1966
1967 ###### print binode cases
1968         case Program:
1969                 do_indent(indent, "program");
1970                 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
1971                         printf(" ");
1972                         print_exec(b2->left, 0, 0);
1973                 }
1974                 if (bracket)
1975                         printf(" {\n");
1976                 else
1977                         printf(":\n");
1978                 print_exec(b->right, indent+1, bracket);
1979                 if (bracket)
1980                         do_indent(indent, "}\n");
1981                 break;
1982
1983 ###### propagate binode cases
1984         case Program: abort();
1985
1986 ###### core functions
1987
1988         static int analyse_prog(struct exec *prog, struct parse_context *c)
1989         {
1990                 struct binode *b = cast(binode, prog);
1991                 struct variable *v;
1992                 int ok = 1;
1993                 int uniq = 314159;
1994                 do {
1995                         ok = 1;
1996                         propagate_types(b->right, Vnone, &ok);
1997                 } while (ok == 2);
1998                 if (!ok)
1999                         return 0;
2000
2001                 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2002                         struct var *v = cast(var, b->left);
2003                         if (v->var->val.vtype == Vunknown)
2004                                 val_init(&v->var->val, Vstr);
2005                 }
2006                 b = cast(binode, prog);
2007                 do {
2008                         ok = 1;
2009                         propagate_types(b->right, Vnone, &ok);
2010                 } while (ok == 2);
2011                 if (!ok)
2012                         return 0;
2013
2014                 for (v = c->varlist; v; v = v->next)
2015                         if (v->val.vtype == Vunknown) {
2016                                 v->val.vtype = Vnum;
2017                                 mpq_init(v->val.num);
2018                                 mpq_set_ui(v->val.num, uniq, 1);
2019                                 uniq++;
2020                         }
2021                 /* Make sure everything is still consistent */
2022                 propagate_types(b->right, Vnone, &ok);
2023                 return !!ok;
2024         }
2025
2026         static void interp_prog(struct exec *prog, char **argv)
2027         {
2028                 struct binode *p = cast(binode, prog);
2029                 struct binode *al = cast(binode, p->left);
2030                 struct value v;
2031
2032                 while (al) {
2033                         struct var *v = cast(var, al->left);
2034                         struct value *vl = &v->var->val;
2035
2036                         if (argv[0] == NULL) {
2037                                 printf("Not enough args\n");
2038                                 exit(1);
2039                         }
2040                         al = cast(binode, al->right);
2041                         free_value(*vl);
2042                         if (!parse_value(vl, argv[0]))
2043                                 exit(1);
2044                         argv++;
2045                 }
2046                 v = interp_exec(p->right);
2047                 free_value(v);
2048         }
2049
2050 ###### interp binode cases
2051         case Program: abort();