ocean-lang.org Git - ocean/blob - csrc/oceani.mdc

   1 # Ocean Interpreter - Falls Creek version
   2
   3 Ocean is intended to be an compiled language, so this interpreter is
   4 not targeted at being the final product.  It is very much an intermediate
   5 stage, and fills that role in two distinct ways.
   6
   7 Firstly, it exists as a platform to experiment with the early language
   8 design.  An interpreter is easy to write and easy to get working, so
   9 the barrier for entry is lower if I aim to start with an interpreter.
  10
  11 Secondly, the plan for the Ocean compiler is to write it in the
  12 [Ocean language](http://ocean-lang.org).  To achieve this we naturally
  13 need some sort of boot-strap process and this interpreter - written in
  14 portable C - will fill that role.  It will be used to bootstrap the
  15 Ocean compiler.
  16
  17 Two features that are not needed to fill either of these roles are
  18 performance and completeness.  The interpreter only needs to be fast
  19 enough to run small test programs and occasionally to run the compiler
  20 on itself.  It only needs to be complete enough to test aspects of the
  21 design which are developed before the compiler is working, and to run
  22 the compiler on itself.  Any features not used by the compiler when
  23 compiling itself are superfluous.  They may be included anyway, but
  24 they may not.
  25
  26 Nonetheless, the interpreter should end up being reasonably complete,
  27 and any performance bottlenecks which appear and are easily fixed, will
  28 be.
  29
  30 ## Current version
  31
  32 This initial version of the interpreter exists to test out the
  33 structured statement providing conditions and iteration.  Clearly we
  34 need some minimal other functionality so that values can be tested and
  35 instructions iterated over.  All that functionality is clearly not
  36 normative at this stage (not that anything is **really** normative
  37 yet) and will change, so early test code will certainly break.
  38
  39 Beyond the structured statement and the `use` statement which is
  40 intimately related to it we have:
  41
  42  - "blocks" of multiple statements.
  43  - `pass`: a statement which does nothing.
  44  - variables: any identifier is assumed to store a number, string,
  45    or Boolean.
  46  - expressions: `+`, `-`, `*`, `/` can apply to integers and `++` can
  47    catenate strings.  `and`, `or`, `not` manipulate Booleans, and
  48    normal comparison operators can work on all three types.
  49  - assignments: can assign the value of an expression to a variable.
  50  - `print`: will print the values in a list of expressions.
  51  - `program`: is given a list of identifiers to initialize from
  52    arguments.
  53
  54 ## Naming
  55
  56 Versions of the interpreter which obviously do not support a complete
  57 language will be named after creeks and streams.  This one is Falls
  58 Creek.
  59
  60 Once we have something reasonably resembling a complete language, the
  61 names of rivers will be used.
  62 Early versions of the compiler will be named after seas.  Major
  63 releases of the compiler will be named after oceans.  Hopefully I will
  64 be finished once I get to the Pacific Ocean release.
  65
  66 ## Outline
  67
  68 As well as parsing and executing a program, the interpreter can print
  69 out the program from the parsed internal structure.  This is useful
  70 for validating the parsing.
  71 So the main requirements of the interpreter are:
  72
  73 - Parse the program
  74 - Analyse the parsed program to ensure consistency
  75 - print the program
  76 - execute the program
  77
  78 This is all performed by a single C program extracted with
  79 `parsergen`.
  80
  81 There will be two formats for printing the program a default and one
  82 that uses bracketing.  So an extra command line option is needed for
  83 that.
  84
  85 ###### File: oceani.mk
  86
  87         myCFLAGS := -Wall -g -fplan9-extensions
  88         CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
  89         myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
  90         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
  91         ## libs
  92         all :: oceani
  93         oceani.c oceani.h : oceani.mdc parsergen
  94                 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
  95         oceani.mk: oceani.mdc md2c
  96                 ./md2c oceani.mdc
  97
  98         oceani: oceani.o
  99
 100 ###### Parser: header
 101         ## macros
 102         ## ast
 103         struct parse_context {
 104                 struct token_config config;
 105                 ## parse context
 106         };
 107
 108 ###### Parser: code
 109
 110         #include <unistd.h>
 111         #include <stdlib.h>
 112         #include <fcntl.h>
 113         #include <errno.h>
 114         #include <sys/mman.h>
 115         #include <string.h>
 116         #include <stdio.h>
 117         #include <locale.h>
 118         #include <malloc.h>
 119         #include "mdcode.h"
 120         #include "scanner.h"
 121         #include "parser.h"
 122
 123         ## includes
 124
 125         #include "oceani.h"
 126
 127         ## forward decls
 128         ## value functions
 129         ## ast functions
 130         ## core functions
 131
 132         #include <getopt.h>
 133         static char Usage[] = "Usage: oceani --trace --print --noexec prog.ocn\n";
 134         static const struct option long_options[] = {
 135                 {"trace",     0, NULL, 't'},
 136                 {"print",     0, NULL, 'p'},
 137                 {"noexec",    0, NULL, 'n'},
 138                 {"brackets",  0, NULL, 'b'},
 139                 {NULL,        0, NULL, 0},
 140         };
 141         const char *options = "tpnb";
 142         int main(int argc, char *argv[])
 143         {
 144                 int fd;
 145                 int len;
 146                 char *file;
 147                 struct section *s;
 148                 struct parse_context context = {
 149                         .config = {
 150                                 .ignored = (1 << TK_line_comment)
 151                                          | (1 << TK_block_comment),
 152                                 .number_chars = ".,_+-",
 153                                 .word_start = "_",
 154                                 .word_cont = "_",
 155                         },
 156                 };
 157                 int doprint=0, dotrace=0, doexec=1, brackets=0;
 158                 struct exec **prog;
 159                 int opt;
 160                 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
 161                        != -1) {
 162                         switch(opt) {
 163                         case 't': dotrace=1; break;
 164                         case 'p': doprint=1; break;
 165                         case 'n': doexec=0; break;
 166                         case 'b': brackets=1; break;
 167                         default: fprintf(stderr, Usage);
 168                                 exit(1);
 169                         }
 170                 }
 171                 if (optind >= argc) {
 172                         fprintf(stderr, "oceani: no input file given\n");
 173                         exit(1);
 174                 }
 175                 fd = open(argv[optind], O_RDONLY);
 176                 if (fd < 0) {
 177                         fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
 178                         exit(1);
 179                 }
 180                 len = lseek(fd, 0, 2);
 181                 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
 182                 s = code_extract(file, file+len, NULL);
 183                 if (!s) {
 184                         fprintf(stderr, "oceani: could not find any code in %s\n",
 185                                 argv[optind]);
 186                         exit(1);
 187                 }
 188                 prog = parse_oceani(s->code, &context.config,
 189                                     dotrace ? stderr : NULL);
 190                 if (prog && doprint)
 191                         print_exec(*prog, 0, brackets);
 192                 if (prog && doexec) {
 193                         if (!analyse_prog(*prog, &context)) {
 194                                 fprintf(stderr, "oceani: type error in program\n");
 195                                 exit(1);
 196                         }
 197                         interp_prog(*prog, argv+optind+1);
 198                 }
 199                 if (prog) {
 200                         free_exec(*prog);
 201                         free(prog);
 202                 }
 203                 while (s) {
 204                         struct section *t = s->next;
 205                         code_free(s->code);
 206                         free(s);
 207                         s = t;
 208                 }
 209                 ## free context
 210                 exit(0);
 211         }
 212
 213 ### Analysis
 214
 215 These four requirements of parse, analyse, print, interpret apply to
 216 each language element individually so that is how most of the code
 217 will be structured.
 218
 219 Three of the four are fairly self explanatory.  The one that requires
 220 a little explanation is the analysis step.
 221
 222 The current language design does not require variables to be declared,
 223 but they must have a single type.  Different operations impose
 224 different requirements on the variables, for example addition requires
 225 both arguments to be numeric, and assignment requires the variable on
 226 the left to have the same type as the expression on the right.
 227
 228 Analysis involves propagating these type requirements around
 229 consequently setting the type of each variable.  If any requirements
 230 are violated (e.g. a string is compared with a number) or if a
 231 variable needs to have two different types, then an error is raised
 232 and the program will not run.
 233
 234 Determining the types of all variables early is important for
 235 processing command line arguments.  These can be assigned to any type
 236 of variable, but we must first know the correct type so any required
 237 conversion can happen.  If a variable is associated with a command
 238 line argument but no type can be interpreted (e.g. the variable is
 239 only ever used in a `print` statement), then the type is set to
 240 'string'.
 241
 242 If the type of a variable cannot be determined at all, then it is set
 243 to be a number and given a unique value.  This allows it to fill the
 244 role of a name in an enumerated type, which is useful for testing the
 245 `switch` statement.
 246
 247 ## Data Structures
 248
 249 One last introductory step before detailing the language elements and
 250 providing their four requirements is to establish the data structures
 251 to store these elements.
 252
 253 There are two key objects that we need to work with: executable
 254 elements which comprise the program, and values which the program
 255 works with.  Between these is the set of variables which hold the
 256 values.
 257
 258 ### Values
 259
 260 Values can be numbers, which we represent as multi-precision
 261 fractions, strings and Booleans.  When analysing the program we also
 262 need to allow for places where no value is meaningful (`Vnone`) and
 263 where we don't know what type to expect yet (`Vunknown`).
 264 A 2 character 'tail' is included in each value as the scanner wants
 265 to parse that from the end of numbers and we need somewhere to put
 266 it.  It is currently ignored but one day might allow for
 267 e.g. "imaginary" numbers.
 268
 269 Values are never shared, they are always copied when used, and freed
 270 when no longer needed.
 271
 272 ###### includes
 273         #include <gmp.h>
 274         #include "string.h"
 275         #include "number.h"
 276
 277 ###### libs
 278         myLDLIBS := libnumber.o libstring.o -lgmp
 279         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 280
 281 ###### ast
 282         struct value {
 283                 enum vtype {Vunknown, Vnone, Vstr, Vnum, Vbool} vtype;
 284                 union {
 285                         struct text str;
 286                         mpq_t num;
 287                         int bool;
 288                 };
 289                 char tail[2];
 290         };
 291
 292 ###### ast functions
 293         void free_value(struct value v)
 294         {
 295                 switch (v.vtype) {
 296                 case Vnone:
 297                 case Vunknown: break;
 298                 case Vstr: free(v.str.txt); break;
 299                 case Vnum: mpq_clear(v.num); break;
 300                 case Vbool: break;
 301                 }
 302         }
 303
 304 ###### value functions
 305
 306         static void val_init(struct value *val, enum vtype type)
 307         {
 308                 val->vtype = type;
 309                 switch(type) {
 310                 case Vnone:abort();
 311                 case Vunknown: break;
 312                 case Vnum:
 313                         mpq_init(val->num); break;
 314                 case Vstr:
 315                         val->str.txt = malloc(1);
 316                         val->str.len = 0;
 317                         break;
 318                 case Vbool:
 319                         val->bool = 0;
 320                         break;
 321                 }
 322         }
 323
 324         static struct value dup_value(struct value v)
 325         {
 326                 struct value rv;
 327                 rv.vtype = v.vtype;
 328                 switch (rv.vtype) {
 329                 case Vnone:
 330                 case Vunknown: break;
 331                 case Vbool:
 332                         rv.bool = v.bool;
 333                         break;
 334                 case Vnum:
 335                         mpq_init(rv.num);
 336                         mpq_set(rv.num, v.num);
 337                         break;
 338                 case Vstr:
 339                         rv.str.len = v.str.len;
 340                         rv.str.txt = malloc(rv.str.len);
 341                         memcpy(rv.str.txt, v.str.txt, v.str.len);
 342                         break;
 343                 }
 344                 return rv;
 345         }
 346
 347         static int value_cmp(struct value left, struct value right)
 348         {
 349                 int cmp;
 350                 if (left.vtype != right.vtype)
 351                         return left.vtype - right.vtype;
 352                 switch (left.vtype) {
 353                 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
 354                 case Vstr: cmp = text_cmp(left.str, right.str); break;
 355                 case Vbool: cmp = left.bool - right.bool; break;
 356                 case Vnone:
 357                 case Vunknown: cmp = 0;
 358                 }
 359                 return cmp;
 360         }
 361
 362         static struct text text_join(struct text a, struct text b)
 363         {
 364                 struct text rv;
 365                 rv.len = a.len + b.len;
 366                 rv.txt = malloc(rv.len);
 367                 memcpy(rv.txt, a.txt, a.len);
 368                 memcpy(rv.txt+a.len, b.txt, b.len);
 369                 return rv;
 370         }
 371
 372         static void print_value(struct value v)
 373         {
 374                 switch (v.vtype) {
 375                 case Vunknown:
 376                         printf("*Unknown*"); break;
 377                 case Vnone:
 378                         printf("*no-value*"); break;
 379                 case Vstr:
 380                         printf("%.*s", v.str.len, v.str.txt); break;
 381                 case Vbool:
 382                         printf("%s", v.bool ? "True":"False"); break;
 383                 case Vnum:
 384                         {
 385                         mpf_t fl;
 386                         mpf_init2(fl, 20);
 387                         mpf_set_q(fl, v.num);
 388                         gmp_printf("%Fg", fl);
 389                         mpf_clear(fl);
 390                         break;
 391                         }
 392                 }
 393         }
 394
 395         static int parse_value(struct value *vl, char *arg)
 396         {
 397                 struct text tx;
 398                 switch(vl->vtype) {
 399                 case Vunknown:
 400                 case Vnone:
 401                         return 0;
 402                 case Vstr:
 403                         vl->str.len = strlen(arg);
 404                         vl->str.txt = malloc(vl->str.len);
 405                         memcpy(vl->str.txt, arg, vl->str.len);
 406                         break;
 407                 case Vnum:
 408                         tx.txt = arg; tx.len = strlen(tx.txt);
 409                         if (number_parse(vl->num, vl->tail, tx) == 0)
 410                                 mpq_init(vl->num);
 411                         break;
 412                 case Vbool:
 413                         if (strcasecmp(arg, "true") == 0 ||
 414                             strcmp(arg, "1") == 0)
 415                                 vl->bool = 1;
 416                         else if (strcasecmp(arg, "false") == 0 ||
 417                             strcmp(arg, "0") == 0)
 418                                 vl->bool = 2;
 419                         else {
 420                                 printf("Bad bool: %s\n", arg);
 421                                 return 0;
 422                         }
 423                         break;
 424                 }
 425                 return 1;
 426         }
 427
 428 ### Variables
 429
 430 Variables are simply named values.  We store them in a linked list
 431 sorted by name and use sequential search and insertion sort.
 432
 433 This linked list is stored in the parse context so that reduce
 434 functions can find or add variables, and so the analysis phase can
 435 ensure that every variable gets a type.
 436
 437 ###### ast
 438
 439         struct variable {
 440                 struct text name;
 441                 struct variable *next;
 442                 struct value val;
 443         };
 444
 445 ###### macros
 446
 447         #define container_of(ptr, type, member) ({                      \
 448                 const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
 449                 (type *)( (char *)__mptr - offsetof(type,member) );})
 450
 451 ###### parse context
 452
 453         struct variable *varlist;
 454
 455 ###### free context
 456         while (context.varlist) {
 457                 struct variable *v = context.varlist;
 458                 context.varlist = v->next;
 459                 free_value(v->val);
 460                 free(v);
 461         }
 462
 463 ###### ast functions
 464
 465         static struct variable *find_variable(struct token_config *conf, struct text s)
 466         {
 467                 struct variable **l = &container_of(conf, struct parse_context,
 468                                                     config)->varlist;
 469                 struct variable *n;
 470                 int cmp = 1;
 471
 472                 while (*l &&
 473                         (cmp = text_cmp((*l)->name, s)) < 0)
 474                                 l = & (*l)->next;
 475                 if (cmp == 0)
 476                         return *l;
 477                 n = calloc(1, sizeof(*n));
 478                 n->name = s;
 479                 n->val.vtype = Vunknown;
 480                 n->next = *l;
 481                 *l = n;
 482                 return n;
 483         }
 484
 485 ### Executables
 486
 487 Executables can be lots of different things.  In many cases an
 488 executable is just an operation combined with one or two other
 489 executables.  This allows for expressions and lists etc.  Other times
 490 an executable is something quite specific like a constant or variable
 491 name.  So we define a `struct exec` to be a general executable with a
 492 type, and a `struct binode` which is a subclass of `exec` and forms a
 493 node in a binary tree and holding an operation. There will be other
 494 subclasses, and to access these we need to be able to `cast` the
 495 `exec` into the various other types.
 496
 497 ###### macros
 498         #define cast(structname, pointer) ({            \
 499                 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
 500                 if (__mptr && *__mptr != X##structname) abort();                \
 501                 (struct structname *)( (char *)__mptr);})
 502
 503         #define new(structname) ({                      \
 504                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
 505                 __ptr->type = X##structname;            \
 506                 __ptr;})
 507
 508 ###### ast
 509         enum exec_types {
 510                 Xbinode,
 511                 ## exec type
 512         };
 513         struct exec {
 514                 enum exec_types type;
 515         };
 516         struct binode {
 517                 struct exec;
 518                 enum Btype {
 519                         ## Binode types
 520                 } op;
 521                 struct exec *left, *right;
 522         };
 523
 524 Each different type of `exec` node needs a number of functions
 525 defined, a bit like methods.  We must be able to be able to free it,
 526 print it, analyse it and execute it.  Once we have specific `exec`
 527 types we will need to parse them to.  Let's take this a bit more
 528 slowly.
 529
 530 #### Freeing
 531
 532 The parser generator requires as `free_foo` function for each struct
 533 that stores attributes and they will be `exec`s of subtypes there-of.
 534 So we need `free_exec` which can handle all the subtypes, and we need
 535 `free_binode`.
 536
 537 ###### ast functions
 538
 539         static void free_binode(struct binode *b)
 540         {
 541                 if (!b)
 542                         return;
 543                 free_exec(b->left);
 544                 free_exec(b->right);
 545                 free(b);
 546         }
 547
 548 ###### core functions
 549         static void free_exec(struct exec *e)
 550         {
 551                 if (!e)
 552                         return;
 553                 switch(e->type) {
 554                         ## free exec cases
 555                 }
 556         }
 557
 558 ###### forward decls
 559
 560         static void free_exec(struct exec *e);
 561
 562 ###### free exec cases
 563         case Xbinode: free_binode(cast(binode, e)); break;
 564
 565 #### Printing
 566
 567 Printing an `exec` requires that we know the current indent level for
 568 printing line-oriented components.  As will become clear later, we
 569 also want to know what sort of bracketing to use.
 570
 571 ###### ast functions
 572
 573         static void do_indent(int i, char *str)
 574         {
 575                 while (i--)
 576                         printf("    ");
 577                 printf("%s", str);
 578         }
 579
 580 ###### core functions
 581         static void print_binode(struct binode *b, int indent, int bracket)
 582         {
 583                 struct binode *b2;
 584                 switch(b->op) {
 585                 ## print binode cases
 586                 }
 587         }
 588
 589         static void print_exec(struct exec *e, int indent, int bracket)
 590         {
 591                 switch (e->type) {
 592                 case Xbinode:
 593                         print_binode(cast(binode, e), indent, bracket); break;
 594                 ## print exec cases
 595                 }
 596         }
 597
 598 ###### forward decls
 599
 600         static void print_exec(struct exec *e, int indent, int bracket);
 601
 602 #### Analysing
 603
 604 As discusses, analysis involves propagating type requirements around
 605 the program and looking for errors.
 606
 607 So propagate_types is passed a type that the `exec` is expected to return,
 608 and returns the type that it does return, either of which can be `Vunknown`.
 609 An `ok` flag is passed by reference. It is set to `0` when an error is
 610 found, and `2` when any change is made.  If it remains unchanged at
 611 `1`, then no more propagation is needed.
 612
 613 ###### core functions
 614
 615         static enum vtype propagate_types(struct exec *prog, enum vtype type,
 616                                           int *ok)
 617         {
 618                 enum vtype t;
 619
 620                 if (!prog) {
 621                         if (type != Vunknown && type != Vnone)
 622                                 *ok = 0;
 623                         return Vnone;
 624                 }
 625
 626                 switch (prog->type) {
 627                 case Xbinode:
 628                 {
 629                         struct binode *b = cast(binode, prog);
 630                         switch (b->op) {
 631                         ## propagate binode cases
 632                         }
 633                         break;
 634                 }
 635                 ## propagate exec cases
 636                 }
 637                 return Vnone;
 638         }
 639
 640 #### Interpreting
 641
 642 Interpreting an `exec` doesn't require anything but the `exec`.  State
 643 is stored in variables and each variable will be directly linked from
 644 within the `exec` tree.  The exception to this is the whole `program`
 645 which needs to look at command line arguments.  The `program` will be
 646 interpreted separately.
 647
 648 Each `exec` can return a value, which may be `Vnone` but shouldn't be `Vunknown`.
 649
 650 ###### core functions
 651
 652         static struct value interp_exec(struct exec *e)
 653         {
 654                 struct value rv;
 655                 rv.vtype = Vnone;
 656                 if (!e)
 657                         return rv;
 658
 659                 switch(e->type) {
 660                 case Xbinode:
 661                 {
 662                         struct binode *b = cast(binode, e);
 663                         struct value left, right;
 664                         left.vtype = right.vtype = Vnone;
 665                         switch (b->op) {
 666                         ## interp binode cases
 667                         }
 668                         free_value(left); free_value(right);
 669                         break;
 670                 }
 671                 ## interp exec cases
 672                 }
 673                 return rv;
 674         }
 675
 676 ## Language elements
 677
 678 Each language element needs to be parsed, printed, analysed,
 679 interpreted, and freed.  There are several, so let's just start with
 680 the easy ones and work our way up.
 681
 682 ### Values
 683
 684 We have already met values and separate objects.  When manifest
 685 constants appear in the program text that must result in an executable
 686 which has a constant value.  So the `val` structure embeds a value in
 687 an executable.
 688
 689 ###### exec type
 690         Xval,
 691
 692 ###### ast
 693         struct val {
 694                 struct exec;
 695                 struct value val;
 696         };
 697
 698 ###### Grammar
 699
 700         $*val
 701         Value ->  True ${
 702                         $0 = new(val);
 703                         $0->val.vtype = Vbool;
 704                         $0->val.bool = 1;
 705                         }$
 706                 | False ${
 707                         $0 = new(val);
 708                         $0->val.vtype = Vbool;
 709                         $0->val.bool = 0;
 710                         }$
 711                 | NUMBER ${
 712                         $0 = new(val);
 713                         $0->val.vtype = Vnum;
 714                         if (number_parse($0->val.num, $0->val.tail, $1.txt) == 0)
 715                                 mpq_init($0->val.num);
 716                         }$
 717                 | STRING ${
 718                         $0 = new(val);
 719                         $0->val.vtype = Vstr;
 720                         string_parse(&$1, '\\', &$0->val.str, $0->val.tail);
 721                         }$
 722
 723 ###### print exec cases
 724         case Xval:
 725         {
 726                 struct val *v = cast(val, e);
 727                 if (v->val.vtype == Vstr)
 728                         printf("\"");
 729                 print_value(v->val);
 730                 if (v->val.vtype == Vstr)
 731                         printf("\"");
 732                 break;
 733         }
 734
 735 ###### propagate exec cases
 736                 case Xval:
 737                 {
 738                         struct val *val = cast(val, prog);
 739                         if (type != Vunknown &&
 740                             type != val->val.vtype)
 741                                 *ok = 0;
 742                         return val->val.vtype;
 743                 }
 744
 745 ###### interp exec cases
 746         case Xval:
 747                 return dup_value(cast(val, e)->val);
 748
 749 ###### ast functions
 750         void free_val(struct val *v)
 751         {
 752                 if (!v)
 753                         return;
 754                 free_value(v->val);
 755                 free(v);
 756         }
 757
 758 ###### free exec cases
 759         case Xval: free_val(cast(val, e)); break;
 760
 761 ###### ast functions
 762         // Move all nodes from 'b' to 'rv', reversing the order.
 763         // In 'b' 'left' is a list, and 'right' is the last node.
 764         // In 'rv', left' is the first node and 'right' is a list.
 765         struct binode *reorder_bilist(struct binode *b)
 766         {
 767                 struct binode *rv = NULL;
 768
 769                 while (b) {
 770                         struct exec *t = b->right;
 771                         b->right = rv;
 772                         rv = b;
 773                         if (b->left)
 774                                 b = cast(binode, b->left);
 775                         else
 776                                 b = NULL;
 777                         rv->left = t;
 778                 }
 779                 return rv;
 780         }
 781
 782 ### Variables
 783
 784 Just as we used as `val` to wrap a value into an `exec`, we similarly
 785 need a `var` to wrap a `variable` into an exec.  While each `val`
 786 contained a copy of the value, each `var` hold a link to the variable
 787 because it really is the same variable no matter where it appears.
 788
 789 ###### exec type
 790         Xvar,
 791
 792 ###### ast
 793         struct var {
 794                 struct exec;
 795                 struct variable *var;
 796         };
 797
 798 ###### Grammar
 799         $*var
 800         Variable -> IDENTIFIER ${
 801                 $0 = new(var);
 802                 $0->var = find_variable(config, $1.txt);
 803         }$
 804
 805 ###### print exec cases
 806         case Xvar:
 807         {
 808                 struct var *v = cast(var, e);
 809                 printf("%.*s", v->var->name.len, v->var->name.txt);
 810                 break;
 811         }
 812
 813 ###### propagate exec cases
 814
 815         case Xvar:
 816         {
 817                 struct var *var = cast(var, prog);
 818                 if (var->var->val.vtype == Vunknown) {
 819                         if (type != Vunknown && *ok != 0) {
 820                                 val_init(&var->var->val, type);
 821                                 *ok = 2;
 822                         }
 823                         return type;
 824                 }
 825                 if (type == Vunknown)
 826                         return var->var->val.vtype;
 827                 if (type != var->var->val.vtype)
 828                         *ok = 0;
 829                 return type;
 830         }
 831
 832 ###### interp exec cases
 833         case Xvar:
 834                 return dup_value(cast(var, e)->var->val);
 835
 836 ###### ast functions
 837
 838         void free_var(struct var *v)
 839         {
 840                 free(v);
 841         }
 842
 843 ###### free exec cases
 844         case Xvar: free_var(cast(var, e)); break;
 845
 846 ### Expressions: Boolean
 847
 848 Our first user of the `binode` will be expressions, and particularly
 849 Boolean expressions.  As I haven't implemented precedence in the
 850 parser generator yet, we need different names from each precedence
 851 level used by expressions.  The outer most or lowest level precedence
 852 are Boolean `or` `and`, and `not` which form and `Expression` our of `BTerm`s
 853 and `BFact`s.
 854
 855 ###### Binode types
 856         And,
 857         Or,
 858         Not,
 859
 860 ####### Grammar
 861
 862         $*binode
 863         Expression -> Expression or BTerm ${
 864                         $0 = new(binode);
 865                         $0->op = Or;
 866                         $0->left = $<1;
 867                         $0->right = $<3;
 868                 }$
 869                 | BTerm ${ $0 = $<1; }$
 870
 871         BTerm -> BTerm and BFact ${
 872                         $0 = new(binode);
 873                         $0->op = And;
 874                         $0->left = $<1;
 875                         $0->right = $<3;
 876                 }$
 877                 | BFact ${ $0 = $<1; }$
 878
 879         BFact -> not BFact ${
 880                         $0 = new(binode);
 881                         $0->op = Not;
 882                         $0->right = $<2;
 883                         }$
 884                 ## other BFact
 885
 886 ###### print binode cases
 887         case And:
 888                 print_exec(b->left, -1, 0);
 889                 printf(" and ");
 890                 print_exec(b->right, -1, 0);
 891                 break;
 892         case Or:
 893                 print_exec(b->left, -1, 0);
 894                 printf(" or ");
 895                 print_exec(b->right, -1, 0);
 896                 break;
 897         case Not:
 898                 printf("not ");
 899                 print_exec(b->right, -1, 0);
 900                 break;
 901
 902 ###### propagate binode cases
 903         case And:
 904         case Or:
 905         case Not:
 906                 /* both must be Vbool, result is Vbool */
 907                 propagate_types(b->left, Vbool, ok);
 908                 propagate_types(b->right, Vbool, ok);
 909                 if (type != Vbool && type != Vunknown)
 910                         *ok = 0;
 911                 return Vbool;
 912
 913 ###### interp binode cases
 914         case And:
 915                 rv = interp_exec(b->left);
 916                 right = interp_exec(b->right);
 917                 rv.bool = rv.bool && right.bool;
 918                 break;
 919         case Or:
 920                 rv = interp_exec(b->left);
 921                 right = interp_exec(b->right);
 922                 rv.bool = rv.bool || right.bool;
 923                 break;
 924         case Not:
 925                 rv = interp_exec(b->right);
 926                 rv.bool = !rv.bool;
 927                 break;
 928
 929 ### Expressions: Comparison
 930
 931 Of slightly higher precedence that Boolean expressions are
 932 Comparisons.
 933 A comparison takes arguments of any type, but the two types must be
 934 the same.
 935
 936 To simplify the parsing we introduce an `eop` which can return an
 937 expression operator.
 938
 939 ###### ast
 940         struct eop {
 941                 enum Btype op;
 942         };
 943
 944 ###### ast functions
 945         static void free_eop(struct eop *e)
 946         {
 947                 if (e)
 948                         free(e);
 949         }
 950
 951 ###### Binode types
 952         Less,
 953         Gtr,
 954         LessEq,
 955         GtrEq,
 956         Eql,
 957         NEql,
 958
 959 ###### other BFact
 960         | Expr CMPop Expr ${
 961                         $0 = new(binode);
 962                         $0->op = $2.op;
 963                         $0->left = $<1;
 964                         $0->right = $<3;
 965                 }$
 966                 | Expr ${ $0 = $<1; }$
 967
 968 ###### Grammar
 969
 970         $eop
 971         CMPop ->   < ${ $0.op = Less; }$
 972                 |  > ${ $0.op = Gtr; }$
 973                 |  <= ${ $0.op = LessEq; }$
 974                 |  >= ${ $0.op = GtrEq; }$
 975                 |  == ${ $0.op = Eql; }$
 976                 |  != ${ $0.op = NEql; }$
 977
 978 ###### print binode cases
 979
 980         case Less:
 981         case LessEq:
 982         case Gtr:
 983         case GtrEq:
 984         case Eql:
 985         case NEql:
 986                 print_exec(b->left, -1, 0);
 987                 switch(b->op) {
 988                 case Less:   printf(" < "); break;
 989                 case LessEq: printf(" <= "); break;
 990                 case Gtr:    printf(" > "); break;
 991                 case GtrEq:  printf(" >= "); break;
 992                 case Eql:    printf(" == "); break;
 993                 case NEql:   printf(" != "); break;
 994                 default: abort();
 995                 }
 996                 print_exec(b->right, -1, 0);
 997                 break;
 998
 999 ###### propagate binode cases
1000         case Less:
1001         case LessEq:
1002         case Gtr:
1003         case GtrEq:
1004         case Eql:
1005         case NEql:
1006                 /* Both must match, result is Vbool */
1007                 t = propagate_types(b->left, Vunknown, ok);
1008                 if (t != Vunknown)
1009                         propagate_types(b->right, t, ok);
1010                 else {
1011                         t = propagate_types(b->right, Vunknown, ok);
1012                         if (t != Vunknown)
1013                                 t = propagate_types(b->left, t, ok);
1014                 }
1015                 if (type != Vbool && type != Vunknown)
1016                         *ok = 0;
1017                 return Vbool;
1018
1019 ###### interp binode cases
1020         case Less:
1021         case LessEq:
1022         case Gtr:
1023         case GtrEq:
1024         case Eql:
1025         case NEql:
1026         {
1027                 int cmp;
1028                 left = interp_exec(b->left);
1029                 right = interp_exec(b->right);
1030                 cmp = value_cmp(left, right);
1031                 rv.vtype = Vbool;
1032                 switch (b->op) {
1033                 case Less:      rv.bool = cmp <  0; break;
1034                 case LessEq:    rv.bool = cmp <= 0; break;
1035                 case Gtr:       rv.bool = cmp >  0; break;
1036                 case GtrEq:     rv.bool = cmp >= 0; break;
1037                 case Eql:       rv.bool = cmp == 0; break;
1038                 case NEql:      rv.bool = cmp != 0; break;
1039                 default: rv.bool = 0; break;
1040                 }
1041                 break;
1042         }
1043
1044 ### Expressions: The rest
1045
1046 The remaining expressions with the highest precedence are arithmetic
1047 and string concatenation.  There are `Expr`, `Term`, and `Factor`.
1048 The `Factor` is where the `Value` and `Variable` that we already have
1049 are included.
1050
1051 `+` and `-` are both infix and prefix operations (where they are
1052 absolute value and negation).  These have different operator names.
1053
1054 We also have a 'Bracket' operator which records where parentheses were
1055 found.  This make it easy to reproduce these when printing.  Once
1056 precedence is handled better I might be able to discard this.
1057
1058 ###### Binode types
1059         Plus, Minus,
1060         Times, Divide,
1061         Concat,
1062         Absolute, Negate,
1063         Bracket,
1064
1065 ###### Grammar
1066
1067         $*binode
1068         Expr -> Expr Eop Term ${
1069                         $0 = new(binode);
1070                         $0->op = $2.op;
1071                         $0->left = $<1;
1072                         $0->right = $<3;
1073                 }$
1074                 | Term ${ $0 = $<1; }$
1075
1076         Term -> Term Top Factor ${
1077                         $0 = new(binode);
1078                         $0->op = $2.op;
1079                         $0->left = $<1;
1080                         $0->right = $<3;
1081                 }$
1082                 | Factor ${ $0 = $<1; }$
1083
1084         Factor -> ( Expression ) ${
1085                         $0 = new(binode);
1086                         $0->op = Bracket;
1087                         $0->right = $<2;
1088                 }$
1089                 | Uop Factor ${
1090                         $0 = new(binode);
1091                         $0->op = $1.op;
1092                         $0->right = $<2;
1093                 }$
1094                 | Value ${ $0 = (struct binode *)$<1; }$
1095                 | Variable ${ $0 = (struct binode *)$<1; }$
1096
1097         $eop
1098         Eop ->    + ${ $0.op = Plus; }$
1099                 | - ${ $0.op = Minus; }$
1100
1101         Uop ->    + ${ $0.op = Absolute; }$
1102                 | - ${ $0.op = Negate; }$
1103
1104         Top ->    * ${ $0.op = Times; }$
1105                 | / ${ $0.op = Divide; }$
1106                 | ++ ${ $0.op = Concat; }$
1107
1108 ###### print binode cases
1109         case Plus:
1110         case Minus:
1111         case Times:
1112         case Divide:
1113         case Concat:
1114                 print_exec(b->left, indent, 0);
1115                 switch(b->op) {
1116                 case Plus:   printf(" + "); break;
1117                 case Minus:  printf(" - "); break;
1118                 case Times:  printf(" * "); break;
1119                 case Divide: printf(" / "); break;
1120                 case Concat: printf(" ++ "); break;
1121                 default: abort();
1122                 }
1123                 print_exec(b->right, indent, 0);
1124                 break;
1125         case Absolute:
1126                 printf("+");
1127                 print_exec(b->right, indent, 0);
1128                 break;
1129         case Negate:
1130                 printf("-");
1131                 print_exec(b->right, indent, 0);
1132                 break;
1133         case Bracket:
1134                 printf("(");
1135                 print_exec(b->right, indent, 0);
1136                 printf(")");
1137                 break;
1138
1139 ###### propagate binode cases
1140         case Plus:
1141         case Minus:
1142         case Times:
1143         case Divide:
1144                 /* both must be numbers, result is Vnum */
1145         case Absolute:
1146         case Negate:
1147                 /* as propagate_types ignores a NULL,
1148                  * unary ops fit here too */
1149                 propagate_types(b->left, Vnum, ok);
1150                 propagate_types(b->right, Vnum, ok);
1151                 if (type != Vnum && type != Vunknown)
1152                         *ok = 0;
1153                 return Vnum;
1154
1155         case Concat:
1156                 /* both must be Vstr, result is Vstr */
1157                 propagate_types(b->left, Vstr, ok);
1158                 propagate_types(b->right, Vstr, ok);
1159                 if (type != Vstr && type != Vunknown)
1160                         *ok = 0;
1161                 return Vstr;
1162
1163         case Bracket:
1164                 return propagate_types(b->right, type, ok);
1165
1166 ###### interp binode cases
1167
1168         case Plus:
1169                 rv = interp_exec(b->left);
1170                 right = interp_exec(b->right);
1171                 mpq_add(rv.num, rv.num, right.num);
1172                 break;
1173         case Minus:
1174                 rv = interp_exec(b->left);
1175                 right = interp_exec(b->right);
1176                 mpq_sub(rv.num, rv.num, right.num);
1177                 break;
1178         case Times:
1179                 rv = interp_exec(b->left);
1180                 right = interp_exec(b->right);
1181                 mpq_mul(rv.num, rv.num, right.num);
1182                 break;
1183         case Divide:
1184                 rv = interp_exec(b->left);
1185                 right = interp_exec(b->right);
1186                 mpq_div(rv.num, rv.num, right.num);
1187                 break;
1188         case Negate:
1189                 rv = interp_exec(b->right);
1190                 mpq_neg(rv.num, rv.num);
1191                 break;
1192         case Absolute:
1193                 rv = interp_exec(b->right);
1194                 mpq_abs(rv.num, rv.num);
1195                 break;
1196         case Bracket:
1197                 rv = interp_exec(b->right);
1198                 break;
1199         case Concat:
1200                 left = interp_exec(b->left);
1201                 right = interp_exec(b->right);
1202                 rv.vtype = Vstr;
1203                 rv.str = text_join(left.str, right.str);
1204                 break;
1205
1206 ### Blocks, Statements, and Statement lists.
1207
1208 Now that we have expressions out of the way we need to turn to
1209 statements.  There are simple statements and more complex statements.
1210 Simple statements do not contain newlines, complex statements do.
1211
1212 Statements often come in sequences and we have corresponding simple
1213 statement lists and complex statement lists.
1214 The former comprise only simple statements separated by semicolons.
1215 The later comprise complex statements and simple statement lists.  They are
1216 separated by newlines.  Thus the semicolon is only used to separate
1217 simple statements on the one line.  This may be overly restrictive,
1218 but I'm not sure I every want a complex statement to share a line with
1219 anything else.
1220
1221 Note that a simple statement list can still use multiple lines if
1222 subsequent lines are indented, so
1223
1224 ###### Example: wrapped simple statement list
1225
1226         a = b; c = d;
1227            e = f; print g
1228
1229 is a single simple statement list.  This might allow room for
1230 confusion, so I'm not set on it yet.
1231
1232 A simple statement list needs no extra syntax.  A complex statement
1233 list has two syntactic forms.  It can be enclosed in braces (much like
1234 C blocks), or it can be introduced by a colon and continue until an
1235 unindented newline (much like Python blocks).  With this extra syntax
1236 it is referred to as a block.
1237
1238 Note that a block does not have to include any newlines if it only
1239 contains simple statements.  So both of:
1240
1241         if condition: a=b; d=f
1242
1243         if condition { a=b; print f }
1244
1245 are valid.
1246
1247 In either case the list is constructed from a `binode` list with
1248 `Block` as the operator.  When parsing the list it is most convenient
1249 to append to the end, so a list is a list and a statement.  When using
1250 the list it is more convenient to consider a list to be a statement
1251 and a list.  So we need a function to re-order a list.
1252 `reorder_bilist` serves this purpose.
1253
1254 The only stand-alone statement we introduce at this stage is `pass`
1255 which does nothing and is represented as a `NULL` pointer in a `Block`
1256 list.
1257
1258 ###### Binode types
1259         Block,
1260
1261 ###### Grammar
1262
1263         $void
1264         OptNL -> Newlines
1265                 |
1266
1267         Newlines -> NEWLINE
1268                 | Newlines NEWLINE
1269
1270         $*binode
1271         Open -> {
1272                 | NEWLINE {
1273         Close -> }
1274                 | NEWLINE }
1275         Block -> Open Statementlist Close ${ $0 = $<2; }$
1276                 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
1277                 | : Statementlist ${ $0 = $<2; }$
1278                 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
1279
1280         Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
1281
1282         ComplexStatements -> ComplexStatements ComplexStatement ${
1283                 $0 = new(binode);
1284                 $0->op = Block;
1285                 $0->left = $<1;
1286                 $0->right = $<2;
1287                 }$
1288                 | ComplexStatements NEWLINE ${ $0 = $<1; }$
1289                 | ComplexStatement ${
1290                 $0 = new(binode);
1291                 $0->op = Block;
1292                 $0->left = NULL;
1293                 $0->right = $<1;
1294                 }$
1295
1296         $*exec
1297         ComplexStatement -> SimpleStatements NEWLINE ${
1298                         $0 = reorder_bilist($<1);
1299                         }$
1300                 ## ComplexStatement Grammar
1301
1302         $*binode
1303         SimpleStatements -> SimpleStatements ; SimpleStatement ${
1304                         $0 = new(binode);
1305                         $0->op = Block;
1306                         $0->left = $<1;
1307                         $0->right = $<3;
1308                         }$
1309                 | SimpleStatement ${
1310                         $0 = new(binode);
1311                         $0->op = Block;
1312                         $0->left = NULL;
1313                         $0->right = $<1;
1314                         }$
1315                 | SimpleStatements ; ${ $0 = $<1; }$
1316
1317         SimpleStatement -> pass ${ $0 = NULL; }$
1318                 ## SimpleStatement Grammar
1319
1320 ###### print binode cases
1321         case Block:
1322                 if (indent < 0) {
1323                         // simple statement
1324                         if (b->left == NULL)
1325                                 printf("pass");
1326                         else
1327                                 print_exec(b->left, indent, 0);
1328                         if (b->right) {
1329                                 printf("; ");
1330                                 print_exec(b->right, indent, 0);
1331                         }
1332                 } else {
1333                         // block, one per line
1334                         if (b->left == NULL)
1335                                 do_indent(indent, "pass\n");
1336                         else
1337                                 print_exec(b->left, indent, bracket);
1338                         if (b->right)
1339                                 print_exec(b->right, indent, bracket);
1340                 }
1341                 break;
1342
1343 ###### propagate binode cases
1344         case Block:
1345         {
1346                 /* If any statement returns something other then Vnone
1347                  * then all such must return same type.
1348                  * As each statement may be Vnone or something else,
1349                  * we must always pass Vunknown down, otherwise an incorrect
1350                  * error might occur.
1351                  */
1352                 struct binode *e;
1353
1354                 for (e = b; e; e = cast(binode, e->right)) {
1355                         t = propagate_types(e->left, Vunknown, ok);
1356                         if (t != Vunknown && t != Vnone) {
1357                                 if (type == Vunknown)
1358                                         type = t;
1359                                 else if (t != type)
1360                                         *ok = 0;
1361                         }
1362                 }
1363                 return type;
1364         }
1365
1366 ###### interp binode cases
1367         case Block:
1368                 while (rv.vtype == Vnone &&
1369                        b) {
1370                         if (b->left)
1371                                 rv = interp_exec(b->left);
1372                         b = cast(binode, b->right);
1373                 }
1374                 break;
1375
1376 ### The Print statement
1377
1378 `print` is a simple statement that takes a comma-separated list of
1379 expressions and prints the values separated by spaces and terminated
1380 by a newline.  No control of formatting is possible.
1381
1382 `print` faces the same list-ordering issue as blocks, and uses the
1383 same solution.
1384
1385 ###### Binode types
1386         Print,
1387
1388 ###### SimpleStatement Grammar
1389
1390         | print ExpressionList ${
1391                 $0 = reorder_bilist($<2);
1392         }$
1393         | print ExpressionList , ${
1394                 $0 = new(binode);
1395                 $0->op = Print;
1396                 $0->right = NULL;
1397                 $0->left = $<2;
1398                 $0 = reorder_bilist($0);
1399         }$
1400         | print ${
1401                 $0 = new(binode);
1402                 $0->op = Print;
1403                 $0->right = NULL;
1404         }$
1405
1406 ###### Grammar
1407
1408         $*binode
1409         ExpressionList -> ExpressionList , Expression ${
1410                 $0 = new(binode);
1411                 $0->op = Print;
1412                 $0->left = $<1;
1413                 $0->right = $<3;
1414                 }$
1415                 | Expression ${
1416                         $0 = new(binode);
1417                         $0->op = Print;
1418                         $0->left = NULL;
1419                         $0->right = $<1;
1420                 }$
1421
1422 ###### print binode cases
1423
1424         case Print:
1425                 do_indent(indent, "print");
1426                 while (b) {
1427                         if (b->left) {
1428                                 printf(" ");
1429                                 print_exec(b->left, -1, 0);
1430                                 if (b->right)
1431                                         printf(",");
1432                         }
1433                         b = cast(binode, b->right);
1434                 }
1435                 if (indent >= 0)
1436                         printf("\n");
1437                 break;
1438
1439 ###### propagate binode cases
1440
1441         case Print:
1442                 /* don't care but all must be consistent */
1443                 propagate_types(b->left, Vunknown, ok);
1444                 propagate_types(b->right, Vunknown, ok);
1445                 break;
1446
1447 ###### interp binode cases
1448
1449         case Print:
1450         {
1451                 char sep = 0;
1452                 int eol = 1;
1453                 for ( ; b; b = cast(binode, b->right))
1454                         if (b->left) {
1455                                 if (sep)
1456                                         putchar(sep);
1457                                 left = interp_exec(b->left);
1458                                 print_value(left);
1459                                 free_value(left);
1460                                 if (b->right)
1461                                         sep = ' ';
1462                         } else if (sep)
1463                                 eol = 0;
1464                 left.vtype = Vnone;
1465                 if (eol)
1466                         printf("\n");
1467                 break;
1468         }
1469
1470 ###### Assignment statement
1471
1472 An assignment will assign a value to a variable.  The analysis phase
1473 ensures that the type will be correct so the interpreted just needs to
1474 perform the calculation.
1475
1476 ###### Binode types
1477         Assign,
1478
1479 ###### SimpleStatement Grammar
1480         | Variable = Expression ${
1481                         $0 = new(binode);
1482                         $0->op = Assign;
1483                         $0->left = $<1;
1484                         $0->right =$<3;
1485                 }$
1486
1487 ###### print binode cases
1488
1489         case Assign:
1490                 do_indent(indent, "");
1491                 print_exec(b->left, indent, 0);
1492                 printf(" = ");
1493                 print_exec(b->right, indent, 0);
1494                 if (indent >= 0)
1495                         printf("\n");
1496                 break;
1497
1498 ###### propagate binode cases
1499
1500         case Assign:
1501                 /* Both must match, result is Vnone */
1502                 t = propagate_types(b->left, Vunknown, ok);
1503                 if (t != Vunknown)
1504                         propagate_types(b->right, t, ok);
1505                 else {
1506                         t = propagate_types(b->right, Vunknown, ok);
1507                         if (t != Vunknown)
1508                                 t = propagate_types(b->left, t, ok);
1509                 }
1510                 return Vnone;
1511
1512 ###### interp binode cases
1513
1514         case Assign:
1515         {
1516                 struct variable *v = cast(var, b->left)->var;
1517                 right = interp_exec(b->right);
1518                 free_value(v->val);
1519                 v->val = right;
1520                 right.vtype = Vunknown;
1521                 break;
1522         }
1523
1524 ### The `use` statement
1525
1526 The `use` statement is the last "simple" statement.  It is needed when
1527 the condition in a conditional statement is a block.  `use` works much
1528 like `return` in C, but only completes the `condition`, not the whole
1529 function.
1530
1531 ###### Binode types
1532         Use,
1533
1534 ###### SimpleStatement Grammar
1535         | use Expression ${
1536                 $0 = new(binode);
1537                 $0->op = Use;
1538                 $0->right = $<2;
1539         }$
1540
1541 ###### print binode cases
1542
1543         case Use:
1544                 do_indent(indent, "use ");
1545                 print_exec(b->right, -1, 0);
1546                 if (indent >= 0)
1547                         printf("\n");
1548                 break;
1549
1550 ###### propagate binode cases
1551
1552         case Use:
1553                 /* result matches value */
1554                 return propagate_types(b->right, type, ok);
1555
1556 ###### interp binode cases
1557
1558         case Use:
1559                 rv = interp_exec(b->right);
1560                 break;
1561
1562 ### The Conditional Statement
1563
1564 This is the biggy and currently the only complex statement.
1565 This subsumes `if`, `while`, `do/while`, `switch`, and some part of
1566 `for`.  It is comprised of a number of parts, all of which are
1567 optional though set combinations apply.
1568
1569 If there is a `forpart`, it is executed first, only once.
1570 If there is a `dopart`, then it is executed repeatedly providing
1571 always that the `condpart` or `cond`, if present, does not return a non-True
1572 value.  `condpart` can fail to return any value if it simply executes
1573 to completion.  This is treated the same as returning True.
1574
1575 If there is a `thenpart` it will be executed whenever the `condpart`
1576 or `cond` returns True (or does not return), but this will happen
1577 *after* `dopart` (when present).
1578
1579 If `elsepart` is present it will be executed at most once when the
1580 condition returns False.  If there are any `casepart`s, they will be
1581 executed when the condition returns a matching value.
1582
1583 The particular sorts of values allowed in case parts has not yet been
1584 determined in the language design.
1585
1586 The cond_statement cannot fit into a `binode` so a new `exec` is
1587 defined.
1588
1589 ###### exec type
1590         Xcond_statement,
1591
1592 ###### ast
1593         struct casepart {
1594                 struct exec *value;
1595                 struct exec *action;
1596                 struct casepart *next;
1597         };
1598         struct cond_statement {
1599                 struct exec;
1600                 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
1601                 struct casepart *casepart;
1602         };
1603
1604 ###### ast functions
1605
1606         static void free_casepart(struct casepart *cp)
1607         {
1608                 while (cp) {
1609                         struct casepart *t;
1610                         free_exec(cp->value);
1611                         free_exec(cp->action);
1612                         t = cp->next;
1613                         free(cp);
1614                         cp = t;
1615                 }
1616         }
1617
1618         void free_cond_statement(struct cond_statement *s)
1619         {
1620                 if (!s)
1621                         return;
1622                 free_exec(s->forpart);
1623                 free_exec(s->condpart);
1624                 free_exec(s->dopart);
1625                 free_exec(s->thenpart);
1626                 free_exec(s->elsepart);
1627                 free_casepart(s->casepart);
1628                 free(s);
1629         }
1630
1631 ###### free exec cases
1632         case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
1633
1634 ###### ComplexStatement Grammar
1635         | CondStatement ${ $0 = $<1; }$
1636
1637 ###### Grammar
1638
1639         $*cond_statement
1640         CondStatement -> ForThen WhilePart CondSuffix ${
1641                         $0 = $<3;
1642                         $0->forpart = $1.forpart; $1.forpart = NULL;
1643                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1644                         $0->condpart = $2.condpart; $2.condpart = NULL;
1645                         $0->dopart = $2.dopart; $2.dopart = NULL;
1646                         }$
1647                 | WhilePart CondSuffix ${
1648                         $0 = $<2;
1649                         $0->condpart = $1.condpart; $1.condpart = NULL;
1650                         $0->dopart = $1.dopart; $1.dopart = NULL;
1651                         }$
1652                 | SwitchPart CondSuffix ${
1653                         $0 = $<2;
1654                         $0->condpart = $<1;
1655                         }$
1656                 | IfPart IfSuffix ${
1657                         $0 = $<2;
1658                         $0->condpart = $1.condpart; $1.condpart = NULL;
1659                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
1660                         }$
1661
1662         CondSuffix -> IfSuffix ${ $0 = $<1; }$
1663                 | Newlines case Expression Block CondSuffix ${ {
1664                         struct casepart *cp = calloc(1, sizeof(*cp));
1665                         $0 = $<5;
1666                         cp->value = $<3;
1667                         cp->action = $<4;
1668                         cp->next = $0->casepart;
1669                         $0->casepart = cp;
1670                 } }$
1671                 | case Expression Block CondSuffix ${ {
1672                         struct casepart *cp = calloc(1, sizeof(*cp));
1673                         $0 = $<4;
1674                         cp->value = $<2;
1675                         cp->action = $<3;
1676                         cp->next = $0->casepart;
1677                         $0->casepart = cp;
1678                 } }$
1679
1680         IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
1681                 | Newlines else Block ${
1682                         $0 = new(cond_statement);
1683                         $0->elsepart = $<3;
1684                 }$
1685                 | else Block ${
1686                         $0 = new(cond_statement);
1687                         $0->elsepart = $<2;
1688                 }$
1689                 | Newlines else CondStatement ${
1690                         $0 = new(cond_statement);
1691                         $0->elsepart = $<3;
1692                 }$
1693                 | else CondStatement ${
1694                         $0 = new(cond_statement);
1695                         $0->elsepart = $<2;
1696                 }$
1697
1698
1699         $*exec
1700         ForPart -> for SimpleStatements ${
1701                         $0 = reorder_bilist($<2);
1702                 }$
1703                 |  for Block ${
1704                         $0 = $<2;
1705                 }$
1706
1707         ThenPart -> then SimpleStatements ${
1708                         $0 = reorder_bilist($<2);
1709                 }$
1710                 |  then Block ${
1711                         $0 = $<2;
1712                 }$
1713
1714         ThenPartNL -> ThenPart OptNL ${
1715                         $0 = $<1;
1716                 }$
1717
1718         WhileHead -> while Block ${
1719                 $0 = $<2;
1720                 }$
1721
1722         $cond_statement
1723         ForThen -> ForPart OptNL ThenPartNL ${
1724                         $0.forpart = $<1;
1725                         $0.thenpart = $<3;
1726                 }$
1727                 | ForPart OptNL ${
1728                         $0.forpart = $<1;
1729                 }$
1730
1731         WhilePart -> while Expression Block ${
1732                         $0.type = Xcond_statement;
1733                         $0.condpart = $<2;
1734                         $0.dopart = $<3;
1735                 }$
1736                 |    WhileHead OptNL do Block ${
1737                         $0.type = Xcond_statement;
1738                         $0.condpart = $<1;
1739                         $0.dopart = $<4;
1740                 }$
1741
1742         IfPart -> if Expression Block ${
1743                         $0.type = Xcond_statement;
1744                         $0.condpart = $<2;
1745                         $0.thenpart = $<3;
1746                 }$
1747                 | if Block OptNL then Block ${
1748                         $0.type = Xcond_statement;
1749                         $0.condpart = $<2;
1750                         $0.thenpart = $<5;
1751                 }$
1752
1753         $*exec
1754         SwitchPart -> switch Expression ${
1755                         $0 = $<2;
1756                 }$
1757                 | switch Block ${
1758                         $0 = $<2;
1759                 }$
1760
1761 ###### print exec cases
1762
1763         case Xcond_statement:
1764         {
1765                 struct cond_statement *cs = cast(cond_statement, e);
1766                 struct casepart *cp;
1767                 if (cs->forpart) {
1768                         do_indent(indent, "for");
1769                         if (bracket) printf(" {\n"); else printf(":\n");
1770                         print_exec(cs->forpart, indent+1, bracket);
1771                         if (cs->thenpart) {
1772                                 if (bracket)
1773                                         do_indent(indent, "} then {\n");
1774                                 else
1775                                         do_indent(indent, "then:\n");
1776                                 print_exec(cs->thenpart, indent+1, bracket);
1777                         }
1778                         if (bracket) do_indent(indent, "}\n");
1779                 }
1780                 if (cs->dopart) {
1781                         // a loop
1782                         if (cs->condpart && cs->condpart->type == Xbinode &&
1783                             cast(binode, cs->condpart)->op == Block) {
1784                                 if (bracket)
1785                                         do_indent(indent, "while {\n");
1786                                 else
1787                                         do_indent(indent, "while:\n");
1788                                 print_exec(cs->condpart, indent+1, bracket);
1789                                 if (bracket)
1790                                         do_indent(indent, "} do {\n");
1791                                 else
1792                                         do_indent(indent, "do:\n");
1793                                 print_exec(cs->dopart, indent+1, bracket);
1794                                 if (bracket)
1795                                         do_indent(indent, "}\n");
1796                         } else {
1797                                 do_indent(indent, "while ");
1798                                 print_exec(cs->condpart, 0, bracket);
1799                                 if (bracket)
1800                                         printf(" {\n");
1801                                 else
1802                                         printf(":\n");
1803                                 print_exec(cs->dopart, indent+1, bracket);
1804                                 if (bracket)
1805                                         do_indent(indent, "}\n");
1806                         }
1807                 } else {
1808                         // a condition
1809                         if (cs->casepart)
1810                                 do_indent(indent, "switch");
1811                         else
1812                                 do_indent(indent, "if");
1813                         if (cs->condpart && cs->condpart->type == Xbinode &&
1814                             cast(binode, cs->condpart)->op == Block) {
1815                                 printf(":\n");
1816                                 print_exec(cs->condpart, indent+1, bracket);
1817                                 do_indent(indent, "then:\n");
1818                                 print_exec(cs->thenpart, indent+1, bracket);
1819                         } else {
1820                                 printf(" ");
1821                                 print_exec(cs->condpart, 0, bracket);
1822                                 printf(":\n");
1823                                 print_exec(cs->thenpart, indent+1, bracket);
1824                         }
1825                 }
1826                 for (cp = cs->casepart; cp; cp = cp->next) {
1827                         do_indent(indent, "case ");
1828                         print_exec(cp->value, -1, 0);
1829                         printf(":\n");
1830                         print_exec(cp->action, indent+1, bracket);
1831                 }
1832                 if (cs->elsepart) {
1833                         do_indent(indent, "else:\n");
1834                         print_exec(cs->elsepart, indent+1, bracket);
1835                 }
1836                 break;
1837         }
1838
1839 ###### propagate exec cases
1840         case Xcond_statement:
1841         {
1842                 // forpart and dopart must return Vnone
1843                 // condpart must be bool or match casepart->values
1844                 // thenpart, elsepart, casepart->action must match
1845                 // or be Vnone
1846                 struct cond_statement *cs = cast(cond_statement, prog);
1847                 struct casepart *c;
1848
1849                 t = propagate_types(cs->forpart, Vnone, ok);
1850                 if (t != Vunknown && t != Vnone)
1851                         *ok = 0;
1852                 t = propagate_types(cs->dopart, Vnone, ok);
1853                 if (t != Vunknown && t != Vnone)
1854                         *ok = 0;
1855                 if (cs->casepart == NULL)
1856                         propagate_types(cs->condpart, Vbool, ok);
1857                 else {
1858                         t = Vunknown;
1859                         for (c = cs->casepart;
1860                              c && (t == Vunknown); c = c->next)
1861                                 t = propagate_types(c->value, Vunknown, ok);
1862                         if (t == Vunknown && cs->condpart)
1863                                 t = propagate_types(cs->condpart, Vunknown, ok);
1864                         // Now we have a type (I hope) push it down
1865                         if (t != Vunknown) {
1866                                 for (c = cs->casepart; c; c = c->next)
1867                                         propagate_types(c->value, t, ok);
1868                                 propagate_types(cs->condpart, t, ok);
1869                         }
1870                 }
1871                 if (type == Vunknown || type == Vnone)
1872                         type = propagate_types(cs->thenpart, Vunknown, ok);
1873                 if (type == Vunknown || type == Vnone)
1874                         type = propagate_types(cs->elsepart, Vunknown, ok);
1875                 for (c = cs->casepart;
1876                      c && (type == Vunknown || type == Vnone);
1877                      c = c->next)
1878                         type = propagate_types(c->action, Vunknown, ok);
1879                 if (type != Vunknown && type != Vnone) {
1880                         propagate_types(cs->thenpart, type, ok);
1881                         propagate_types(cs->elsepart, type, ok);
1882                         for (c = cs->casepart; c ; c = c->next)
1883                                 propagate_types(c->action, type, ok);
1884                         return type;
1885                 } else
1886                         return Vunknown;
1887         }
1888
1889 ###### interp exec cases
1890         case Xcond_statement:
1891         {
1892                 struct value v, cnd;
1893                 struct casepart *cp;
1894                 struct cond_statement *c = cast(cond_statement, e);
1895                 if (c->forpart)
1896                         interp_exec(c->forpart);
1897                 do {
1898                         if (c->condpart)
1899                                 cnd = interp_exec(c->condpart);
1900                         else
1901                                 cnd.vtype = Vnone;
1902                         if (!(cnd.vtype == Vnone ||
1903                               (cnd.vtype == Vbool && cnd.bool != 0)))
1904                                 break;
1905                         if (c->dopart) {
1906                                 free_value(cnd);
1907                                 interp_exec(c->dopart);
1908                         }
1909                         if (c->thenpart) {
1910                                 v = interp_exec(c->thenpart);
1911                                 if (v.vtype != Vnone || !c->dopart)
1912                                         return v;
1913                                 free_value(v);
1914                         }
1915                 } while (c->dopart);
1916
1917                 for (cp = c->casepart; cp; cp = cp->next) {
1918                         v = interp_exec(cp->value);
1919                         if (value_cmp(v, cnd) == 0) {
1920                                 free_value(v);
1921                                 free_value(cnd);
1922                                 return interp_exec(cp->action);
1923                         }
1924                         free_value(v);
1925                 }
1926                 free_value(cnd);
1927                 if (c->elsepart)
1928                         return interp_exec(c->elsepart);
1929                 v.vtype = Vnone;
1930                 return v;
1931         }
1932
1933 ### Finally the whole program.
1934
1935 Somewhat reminiscent of Pascal a (current) Ocean program starts with
1936 the keyword "program" and list of variable names which are assigned
1937 values from command line arguments.  Following this is a `block` which
1938 is the code to execute.
1939
1940 As this is the top level, several things are handled a bit
1941 differently.
1942 The whole program is not interpreted by `interp_exec` as that isn't
1943 passed the argument list which the program requires.  Similarly type
1944 analysis is a bit more interesting at this level.
1945
1946 ###### Binode types
1947         Program,
1948
1949 ###### Parser: grammar
1950
1951         $*binode
1952         Program -> program Varlist Block OptNL ${
1953                 $0 = new(binode);
1954                 $0->op = Program;
1955                 $0->left = reorder_bilist($<2);
1956                 $0->right = $<3;
1957         }$
1958
1959         Varlist -> Varlist Variable ${
1960                         $0 = new(binode);
1961                         $0->op = Program;
1962                         $0->left = $<1;
1963                         $0->right = $<2;
1964                 }$
1965                 | ${ $0 = NULL; }$
1966         ## Grammar
1967
1968 ###### print binode cases
1969         case Program:
1970                 do_indent(indent, "program");
1971                 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
1972                         printf(" ");
1973                         print_exec(b2->left, 0, 0);
1974                 }
1975                 if (bracket)
1976                         printf(" {\n");
1977                 else
1978                         printf(":\n");
1979                 print_exec(b->right, indent+1, bracket);
1980                 if (bracket)
1981                         do_indent(indent, "}\n");
1982                 break;
1983
1984 ###### propagate binode cases
1985         case Program: abort();
1986
1987 ###### core functions
1988
1989         static int analyse_prog(struct exec *prog, struct parse_context *c)
1990         {
1991                 struct binode *b = cast(binode, prog);
1992                 struct variable *v;
1993                 int ok = 1;
1994                 int uniq = 314159;
1995                 do {
1996                         ok = 1;
1997                         propagate_types(b->right, Vnone, &ok);
1998                 } while (ok == 2);
1999                 if (!ok)
2000                         return 0;
2001
2002                 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
2003                         struct var *v = cast(var, b->left);
2004                         if (v->var->val.vtype == Vunknown)
2005                                 val_init(&v->var->val, Vstr);
2006                 }
2007                 b = cast(binode, prog);
2008                 do {
2009                         ok = 1;
2010                         propagate_types(b->right, Vnone, &ok);
2011                 } while (ok == 2);
2012                 if (!ok)
2013                         return 0;
2014
2015                 for (v = c->varlist; v; v = v->next)
2016                         if (v->val.vtype == Vunknown) {
2017                                 v->val.vtype = Vnum;
2018                                 mpq_init(v->val.num);
2019                                 mpq_set_ui(v->val.num, uniq, 1);
2020                                 uniq++;
2021                         }
2022                 /* Make sure everything is still consistent */
2023                 propagate_types(b->right, Vnone, &ok);
2024                 return !!ok;
2025         }
2026
2027         static void interp_prog(struct exec *prog, char **argv)
2028         {
2029                 struct binode *p = cast(binode, prog);
2030                 struct binode *al = cast(binode, p->left);
2031                 struct value v;
2032
2033                 while (al) {
2034                         struct var *v = cast(var, al->left);
2035                         struct value *vl = &v->var->val;
2036
2037                         if (argv[0] == NULL) {
2038                                 printf("Not enough args\n");
2039                                 exit(1);
2040                         }
2041                         al = cast(binode, al->right);
2042                         free_value(*vl);
2043                         if (!parse_value(vl, argv[0]))
2044                                 exit(1);
2045                         argv++;
2046                 }
2047                 v = interp_exec(p->right);
2048                 free_value(v);
2049         }
2050
2051 ###### interp binode cases
2052         case Program: abort();