ocean-lang.org Git - ocean/blob - csrc/oceani.mdc

   1 # Ocean Interpreter - Stoney Creek version
   2
   3 Ocean is intended to be a compiled language, so this interpreter is
   4 not targeted at being the final product.  It is, rather, an intermediate
   5 stage and fills that role in two distinct ways.
   6
   7 Firstly, it exists as a platform to experiment with the early language
   8 design.  An interpreter is easy to write and easy to get working, so
   9 the barrier for entry is lower if I aim to start with an interpreter.
  10
  11 Secondly, the plan for the Ocean compiler is to write it in the
  12 [Ocean language](http://ocean-lang.org).  To achieve this we naturally
  13 need some sort of boot-strap process and this interpreter - written in
  14 portable C - will fill that role.  It will be used to bootstrap the
  15 Ocean compiler.
  16
  17 Two features that are not needed to fill either of these roles are
  18 performance and completeness.  The interpreter only needs to be fast
  19 enough to run small test programs and occasionally to run the compiler
  20 on itself.  It only needs to be complete enough to test aspects of the
  21 design which are developed before the compiler is working, and to run
  22 the compiler on itself.  Any features not used by the compiler when
  23 compiling itself are superfluous.  They may be included anyway, but
  24 they may not.
  25
  26 Nonetheless, the interpreter should end up being reasonably complete,
  27 and any performance bottlenecks which appear and are easily fixed, will
  28 be.
  29
  30 ## Current version
  31
  32 This second version of the interpreter exists to test out the
  33 structured statement providing conditions and iteration, and simple
  34 variable scoping.  Clearly we need some minimal other functionality so
  35 that values can be tested and instructions iterated over.  All that
  36 functionality is clearly not normative at this stage (not that
  37 anything is **really** normative yet) and will change, so early test
  38 code will certainly break in later versions.
  39
  40 The under-test parts of the language are:
  41
  42  - conditional/looping structured statements
  43  - the `use` statement which is needed for that
  44  - Variable binding using ":=" and "::=", and assignment using "=".
  45
  46 Elements which are present to make a usable language are:
  47
  48  - "blocks" of multiple statements.
  49  - `pass`: a statement which does nothing.
  50  - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
  51    catenate strings.  `and`, `or`, `not` manipulate Booleans, and
  52    normal comparison operators can work on all three types.
  53  - `print`: will print the values in a list of expressions.
  54  - `program`: is given a list of identifiers to initialize from
  55    arguments.
  56
  57 ## Naming
  58
  59 Versions of the interpreter which obviously do not support a complete
  60 language will be named after creeks and streams.  This one is Stoney
  61 Creek.
  62
  63 Once we have something reasonably resembling a complete language, the
  64 names of rivers will be used.
  65 Early versions of the compiler will be named after seas.  Major
  66 releases of the compiler will be named after oceans.  Hopefully I will
  67 be finished once I get to the Pacific Ocean release.
  68
  69 ## Outline
  70
  71 As well as parsing and executing a program, the interpreter can print
  72 out the program from the parsed internal structure.  This is useful
  73 for validating the parsing.
  74 So the main requirements of the interpreter are:
  75
  76 - Parse the program, possibly with tracing,
  77 - Analyse the parsed program to ensure consistency,
  78 - Print the program,
  79 - Execute the program.
  80
  81 This is all performed by a single C program extracted with
  82 `parsergen`.
  83
  84 There will be two formats for printing the program: a default and one
  85 that uses bracketing.  So a `--bracket` command line option is needed
  86 for that.  Normally the first code section found is used, however an
  87 alternate section can be requested so that a file (such as this one)
  88 can contain multiple programs This is effected with the `--section`
  89 option.
  90
  91 This code must be compiled with `-fplan9-extensions` so that anonymous
  92 structures can be used.
  93
  94 ###### File: oceani.mk
  95
  96         myCFLAGS := -Wall -g -fplan9-extensions
  97         CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
  98         myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
  99         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 100         ## libs
 101         all :: $(LDLIBS) oceani
 102         oceani.c oceani.h : oceani.mdc parsergen
 103                 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
 104         oceani.mk: oceani.mdc md2c
 105                 ./md2c oceani.mdc
 106
 107         oceani: oceani.o $(LDLIBS)
 108                 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
 109
 110 ###### Parser: header
 111         ## macros
 112         ## ast
 113         struct parse_context {
 114                 struct token_config config;
 115                 char *file_name;
 116                 int parse_error;
 117                 ## parse context
 118         };
 119
 120 ###### macros
 121
 122         #define container_of(ptr, type, member) ({                      \
 123                 const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
 124                 (type *)( (char *)__mptr - offsetof(type,member) );})
 125
 126         #define config2context(_conf) container_of(_conf, struct parse_context, \
 127                 config)
 128
 129 ###### Parser: code
 130
 131         #include <unistd.h>
 132         #include <stdlib.h>
 133         #include <fcntl.h>
 134         #include <errno.h>
 135         #include <sys/mman.h>
 136         #include <string.h>
 137         #include <stdio.h>
 138         #include <locale.h>
 139         #include <malloc.h>
 140         #include "mdcode.h"
 141         #include "scanner.h"
 142         #include "parser.h"
 143
 144         ## includes
 145
 146         #include "oceani.h"
 147
 148         ## forward decls
 149         ## value functions
 150         ## ast functions
 151         ## core functions
 152
 153         #include <getopt.h>
 154         static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
 155                               "--section=SectionName prog.ocn\n";
 156         static const struct option long_options[] = {
 157                 {"trace",     0, NULL, 't'},
 158                 {"print",     0, NULL, 'p'},
 159                 {"noexec",    0, NULL, 'n'},
 160                 {"brackets",  0, NULL, 'b'},
 161                 {"section",   1, NULL, 's'},
 162                 {NULL,        0, NULL, 0},
 163         };
 164         const char *options = "tpnbs";
 165         int main(int argc, char *argv[])
 166         {
 167                 int fd;
 168                 int len;
 169                 char *file;
 170                 struct section *s;
 171                 char *section = NULL;
 172                 struct parse_context context = {
 173                         .config = {
 174                                 .ignored = (1 << TK_line_comment)
 175                                          | (1 << TK_block_comment),
 176                                 .number_chars = ".,_+-",
 177                                 .word_start = "_",
 178                                 .word_cont = "_",
 179                         },
 180                 };
 181                 int doprint=0, dotrace=0, doexec=1, brackets=0;
 182                 struct exec **prog;
 183                 int opt;
 184                 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
 185                        != -1) {
 186                         switch(opt) {
 187                         case 't': dotrace=1; break;
 188                         case 'p': doprint=1; break;
 189                         case 'n': doexec=0; break;
 190                         case 'b': brackets=1; break;
 191                         case 's': section = optarg; break;
 192                         default: fprintf(stderr, Usage);
 193                                 exit(1);
 194                         }
 195                 }
 196                 if (optind >= argc) {
 197                         fprintf(stderr, "oceani: no input file given\n");
 198                         exit(1);
 199                 }
 200                 fd = open(argv[optind], O_RDONLY);
 201                 if (fd < 0) {
 202                         fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
 203                         exit(1);
 204                 }
 205                 context.file_name = argv[optind];
 206                 len = lseek(fd, 0, 2);
 207                 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
 208                 s = code_extract(file, file+len, NULL);
 209                 if (!s) {
 210                         fprintf(stderr, "oceani: could not find any code in %s\n",
 211                                 argv[optind]);
 212                         exit(1);
 213                 }
 214
 215                 ## context initialization
 216
 217                 if (section) {
 218                         struct section *ss;
 219                         for (ss = s; ss; ss = ss->next) {
 220                                 struct text sec = ss->section;
 221                                 if (sec.len == strlen(section) &&
 222                                     strncmp(sec.txt, section, sec.len) == 0)
 223                                         break;
 224                         }
 225                         if (ss)
 226                                 prog = parse_oceani(ss->code, &context.config,
 227                                                     dotrace ? stderr : NULL);
 228                         else {
 229                                 fprintf(stderr, "oceani: cannot find section %s\n",
 230                                         section);
 231                                 exit(1);
 232                         }
 233                 } else
 234                         prog = parse_oceani(s->code, &context.config,
 235                                     dotrace ? stderr : NULL);
 236                 if (!prog) {
 237                         fprintf(stderr, "oceani: fatal parser error.\n");
 238                         context.parse_error = 1;
 239                 }
 240                 if (prog && doprint)
 241                         print_exec(*prog, 0, brackets);
 242                 if (prog && doexec && !context.parse_error) {
 243                         if (!analyse_prog(*prog, &context)) {
 244                                 fprintf(stderr, "oceani: type error in program - not running.\n");
 245                                 exit(1);
 246                         }
 247                         interp_prog(*prog, argv+optind+1);
 248                 }
 249                 if (prog) {
 250                         free_exec(*prog);
 251                         free(prog);
 252                 }
 253                 while (s) {
 254                         struct section *t = s->next;
 255                         code_free(s->code);
 256                         free(s);
 257                         s = t;
 258                 }
 259                 ## free context vars
 260                 ## free context types
 261                 exit(context.parse_error ? 1 : 0);
 262         }
 263
 264 ### Analysis
 265
 266 The four requirements of parse, analyse, print, interpret apply to
 267 each language element individually so that is how most of the code
 268 will be structured.
 269
 270 Three of the four are fairly self explanatory.  The one that requires
 271 a little explanation is the analysis step.
 272
 273 The current language design does not require the types of variables to
 274 be declared, but they must still have a single type.  Different
 275 operations impose different requirements on the variables, for example
 276 addition requires both arguments to be numeric, and assignment
 277 requires the variable on the left to have the same type as the
 278 expression on the right.
 279
 280 Analysis involves propagating these type requirements around and
 281 consequently setting the type of each variable.  If any requirements
 282 are violated (e.g. a string is compared with a number) or if a
 283 variable needs to have two different types, then an error is raised
 284 and the program will not run.
 285
 286 If the same variable is declared in both branchs of an 'if/else', or
 287 in all cases of a 'switch' then the multiple instances may be merged
 288 into just one variable if the variable is references after the
 289 conditional statement.  When this happens, the types must naturally be
 290 consistent across all the branches.  When the variable is not used
 291 outside the if, the variables in the different branches are distinct
 292 and can be of different types.
 293
 294 Determining the types of all variables early is important for
 295 processing command line arguments.  These can be assigned to any type
 296 of variable, but we must first know the correct type so any required
 297 conversion can happen.  If a variable is associated with a command
 298 line argument but no type can be interpreted (e.g. the variable is
 299 only ever used in a `print` statement), then the type is set to
 300 'string'.
 301
 302 Undeclared names may only appear in "use" statements and "case" expressions.
 303 These names are given a type of "label" and a unique value.
 304 This allows them to fill the role of a name in an enumerated type, which
 305 is useful for testing the `switch` statement.
 306
 307 As we will see, the condition part of a `while` statement can return
 308 either a Boolean or some other type.  This requires that the expect
 309 type that gets passed around comprises a type (`enum vtype`) and a
 310 flag to indicate that `Vbool` is also permitted.
 311
 312 As there are, as yet, no distinct types that are compatible, there
 313 isn't much subtlety in the analysis.  When we have distinct number
 314 types, this will become more interesting.
 315
 316 #### Error reporting
 317
 318 When analysis discovers an inconsistency it needs to report an error;
 319 just refusing to run the code ensures that the error doesn't cascade,
 320 but by itself it isn't very useful.  A clear understand of the sort of
 321 error message that are useful will help guide the process of analysis.
 322
 323 At a simplistic level, the only sort of error that type analysis can
 324 report is that the type of some construct doesn't match a contextual
 325 requirement.  For example, in `4 + "hello"` the addition provides a
 326 contextual requirement for numbers, but `"hello"` is not a number.  In
 327 this particular example no further information is needed as the types
 328 are obvious from local information.  When a variable is involved that
 329 isn't the case.  It may be helpful to explain why the variable has a
 330 particular type, by indicating the location where the type was set,
 331 whether by declaration or usage.
 332
 333 Using a recursive-descent analysis we can easily detect a problem at
 334 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
 335 will detect that one argument is not a number and the usage of `hello`
 336 will detect that a number was wanted, but not provided.  In this
 337 (early) version of the language, we will generate error reports at
 338 multiple locations, so the use of `hello` will report an error and
 339 explain were the value was set, and the addition will report an error
 340 and say why numbers are needed.  To be able to report locations for
 341 errors, each language element will need to record a file location
 342 (line and column) and each variable will need to record the language
 343 element where its type was set.  For now we will assume that each line
 344 of an error message indicates one location in the file, and up to 2
 345 types.  So we provide a `printf`-like function which takes a format, a
 346 language (a `struct exec` which has not yet been introduced), and 2
 347 types. "`%1`" reports the first type, "`%2`" reports the second.  We
 348 will need a function to print the location, once we know how that is
 349 stored.  As will be explained later, there are sometimes extra rules for
 350 type matching and they might affect error messages, we need to pass those
 351 in too.
 352
 353 As well as type errors, we sometimes need to report problems with
 354 tokens, which might be unexpected or might name a type that has not
 355 been defined.  For these we have `tok_err()` which reports an error
 356 with a given token.  Each of the error functions sets the flag in the
 357 context so indicate that parsing failed.
 358
 359 ###### forward decls
 360
 361         static void fput_loc(struct exec *loc, FILE *f);
 362
 363 ###### core functions
 364
 365         static void type_err(struct parse_context *c,
 366                              char *fmt, struct exec *loc,
 367                              struct type *t1, int rules, struct type *t2)
 368         {
 369                 fprintf(stderr, "%s:", c->file_name);
 370                 fput_loc(loc, stderr);
 371                 for (; *fmt ; fmt++) {
 372                         if (*fmt != '%') {
 373                                 fputc(*fmt, stderr);
 374                                 continue;
 375                         }
 376                         fmt++;
 377                         switch (*fmt) {
 378                         case '%': fputc(*fmt, stderr); break;
 379                         default: fputc('?', stderr); break;
 380                         case '1':
 381                                 if (t1)
 382                                         fprintf(stderr, "%.*s", t1->name.len, t1->name.txt);
 383                                 else
 384                                         fputs("*unknown*", stderr);
 385                                 break;
 386                         case '2':
 387                                 if (t2)
 388                                         fprintf(stderr, "%.*s", t2->name.len, t2->name.txt);
 389                                 else
 390                                         fputs("*unknown*", stderr);
 391                                 break;
 392                                 break;
 393                         ## format cases
 394                         }
 395                 }
 396                 fputs("\n", stderr);
 397                 c->parse_error = 1;
 398         }
 399
 400         static void tok_err(struct parse_context *c, char *fmt, struct token *t)
 401         {
 402                 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
 403                         t->txt.len, t->txt.txt);
 404                 c->parse_error = 1;
 405         }
 406
 407 ## Data Structures
 408
 409 One last introductory step before detailing the language elements and
 410 providing their four requirements is to establish the data structures
 411 to store these elements.
 412
 413 There are two key objects that we need to work with: executable
 414 elements which comprise the program, and values which the program
 415 works with.  Between these are the variables in their various scopes
 416 which hold the values, and types which classify the values stored and
 417 manipulatd by executables.
 418
 419 ### Types
 420
 421 Values come in a wide range of types, with more likely to be added.
 422 Each type needs to be able to parse and print its own values (for
 423 convenience at least) as well as to compare two values, at least for
 424 equality and possibly for order.  For now, values might need to be
 425 duplicated and freed, though eventually such manipulations will be
 426 better integrated into the language.
 427
 428 Rather than requiring every numeric type to support all numeric
 429 operations (add, multiple, etc), we allow types to be able to present
 430 as one of a few standard types: integer, float, and fraction.  The
 431 existance of these conversion functions enable types to determine if
 432 they are compatible with other types.
 433
 434 Named type are stored in a simple linked list.  Objects of each type are "values"
 435 which are often passed around by value.
 436
 437 ###### ast
 438
 439         struct value {
 440                 struct type *type;
 441                 union {
 442                         ## value union fields
 443                 };
 444         };
 445
 446         struct type {
 447                 struct text name;
 448                 struct type *next;
 449                 struct value (*init)(struct type *type);
 450                 struct value (*prepare)(struct type *type);
 451                 struct value (*parse)(struct type *type, char *str);
 452                 void (*print)(struct value val);
 453                 int (*cmp_order)(struct value v1, struct value v2);
 454                 int (*cmp_eq)(struct value v1, struct value v2);
 455                 struct value (*dup)(struct value val);
 456                 void (*free)(struct value val);
 457                 int (*compat)(struct type *this, struct type *other);
 458                 long long (*to_int)(struct value *v);
 459                 double (*to_float)(struct value *v);
 460                 int (*to_mpq)(mpq_t *q, struct value *v);
 461                 union {
 462                         ## type union fields
 463                 };
 464         };
 465
 466 ###### parse context
 467
 468         struct type *typelist;
 469
 470 ###### ast functions
 471
 472         static struct type *find_type(struct parse_context *c, struct text s)
 473         {
 474                 struct type *l = c->typelist;
 475
 476                 while (l &&
 477                        text_cmp(l->name, s) != 0)
 478                                 l = l->next;
 479                 return l;
 480         }
 481
 482         static struct type *add_type(struct parse_context *c, struct text s,
 483                                      struct type *proto)
 484         {
 485                 struct type *n;
 486
 487                 n = calloc(1, sizeof(*n));
 488                 *n = *proto;
 489                 n->name = s;
 490                 n->next = c->typelist;
 491                 c->typelist = n;
 492                 return n;
 493         }
 494
 495         static void free_type(struct type *t)
 496         {
 497                 /* The type is always a reference to something in the
 498                  * context, so we don't need to free anything.
 499                  */
 500         }
 501
 502         static void free_value(struct value v)
 503         {
 504                 if (v.type)
 505                         v.type->free(v);
 506         }
 507
 508         static int type_compat(struct type *require, struct type *have, int rules)
 509         {
 510                 if ((rules & Rboolok) && have == Tbool)
 511                         return 1;
 512                 if ((rules & Rnolabel) && have == Tlabel)
 513                         return 0;
 514                 if (!require || !have)
 515                         return 1;
 516
 517                 if (require->compat)
 518                         return require->compat(require, have);
 519
 520                 return require == have;
 521         }
 522
 523         static struct value val_prepare(struct type *type)
 524         {
 525                 struct value rv;
 526
 527                 if (type)
 528                         return type->prepare(type);
 529                 rv.type = type;
 530                 return rv;
 531         }
 532
 533         static struct value val_init(struct type *type)
 534         {
 535                 struct value rv;
 536
 537                 if (type)
 538                         return type->init(type);
 539                 rv.type = type;
 540                 return rv;
 541         }
 542
 543         static struct value dup_value(struct value v)
 544         {
 545                 if (v.type)
 546                         return v.type->dup(v);
 547                 return v;
 548         }
 549
 550         static int value_cmp(struct value left, struct value right)
 551         {
 552                 if (left.type && left.type->cmp_order)
 553                         return left.type->cmp_order(left, right);
 554                 if (left.type && left.type->cmp_eq)
 555                         return left.type->cmp_eq(left, right);
 556                 return -1;
 557         }
 558
 559         static void print_value(struct value v)
 560         {
 561                 if (v.type && v.type->print)
 562                         v.type->print(v);
 563                 else
 564                         printf("*Unknown*");
 565         }
 566
 567         static struct value parse_value(struct type *type, char *arg)
 568         {
 569                 struct value rv;
 570
 571                 if (type && type->parse)
 572                         return type->parse(type, arg);
 573                 rv.type = NULL;
 574                 return rv;
 575         }
 576
 577 ###### free context types
 578
 579         while (context.typelist) {
 580                 struct type *t = context.typelist;
 581
 582                 context.typelist = t->next;
 583                 free(t);
 584         }
 585
 586 #### Base Types
 587
 588 Values of the base types can be numbers, which we represent as
 589 multi-precision fractions, strings, Booleans and labels.  When
 590 analysing the program we also need to allow for places where no value
 591 is meaningful (type `Tnone`) and where we don't know what type to
 592 expect yet (type is `NULL`).
 593
 594 Values are never shared, they are always copied when used, and freed
 595 when no longer needed.
 596
 597 When propagating type information around the program, we need to
 598 determine if two types are compatible, where type `NULL` is compatible
 599 with anything.  There are two special cases with type compatibility,
 600 both related to the Conditional Statement which will be described
 601 later.  In some cases a Boolean can be accepted as well as some other
 602 primary type, and in others any type is acceptable except a label (`Vlabel`).
 603 A separate function encode these cases will simplify some code later.
 604
 605 When assigning command line arguments to variables, we need to be able
 606 to parse each type from a string.
 607
 608 ###### includes
 609         #include <gmp.h>
 610         #include "string.h"
 611         #include "number.h"
 612
 613 ###### libs
 614         myLDLIBS := libnumber.o libstring.o -lgmp
 615         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 616
 617 ###### type union fields
 618         enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
 619
 620 ###### value union fields
 621         struct text str;
 622         mpq_t num;
 623         int bool;
 624         void *label;
 625
 626 ###### ast functions
 627         static void _free_value(struct value v)
 628         {
 629                 switch (v.type->vtype) {
 630                 case Vnone: break;
 631                 case Vstr: free(v.str.txt); break;
 632                 case Vnum: mpq_clear(v.num); break;
 633                 case Vlabel:
 634                 case Vbool: break;
 635                 }
 636         }
 637
 638 ###### value functions
 639
 640         static struct value _val_prepare(struct type *type)
 641         {
 642                 struct value rv;
 643
 644                 rv.type = type;
 645                 switch(type->vtype) {
 646                 case Vnone:
 647                         break;
 648                 case Vnum:
 649                         memset(&rv.num, 0, sizeof(rv.num));
 650                         break;
 651                 case Vstr:
 652                         rv.str.txt = NULL;
 653                         rv.str.len = 0;
 654                         break;
 655                 case Vbool:
 656                         rv.bool = 0;
 657                         break;
 658                 case Vlabel:
 659                         rv.label = NULL;
 660                         break;
 661                 }
 662                 return rv;
 663         }
 664
 665         static struct value _val_init(struct type *type)
 666         {
 667                 struct value rv;
 668
 669                 rv.type = type;
 670                 switch(type->vtype) {
 671                 case Vnone:
 672                         break;
 673                 case Vnum:
 674                         mpq_init(rv.num); break;
 675                 case Vstr:
 676                         rv.str.txt = malloc(1);
 677                         rv.str.len = 0;
 678                         break;
 679                 case Vbool:
 680                         rv.bool = 0;
 681                         break;
 682                 case Vlabel:
 683                         rv.label = NULL;
 684                         break;
 685                 }
 686                 return rv;
 687         }
 688
 689         static struct value _dup_value(struct value v)
 690         {
 691                 struct value rv;
 692                 rv.type = v.type;
 693                 switch (rv.type->vtype) {
 694                 case Vnone:
 695                         break;
 696                 case Vlabel:
 697                         rv.label = v.label;
 698                         break;
 699                 case Vbool:
 700                         rv.bool = v.bool;
 701                         break;
 702                 case Vnum:
 703                         mpq_init(rv.num);
 704                         mpq_set(rv.num, v.num);
 705                         break;
 706                 case Vstr:
 707                         rv.str.len = v.str.len;
 708                         rv.str.txt = malloc(rv.str.len);
 709                         memcpy(rv.str.txt, v.str.txt, v.str.len);
 710                         break;
 711                 }
 712                 return rv;
 713         }
 714
 715         static int _value_cmp(struct value left, struct value right)
 716         {
 717                 int cmp;
 718                 if (left.type != right.type)
 719                         return left.type - right.type;
 720                 switch (left.type->vtype) {
 721                 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
 722                 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
 723                 case Vstr: cmp = text_cmp(left.str, right.str); break;
 724                 case Vbool: cmp = left.bool - right.bool; break;
 725                 case Vnone: cmp = 0;
 726                 }
 727                 return cmp;
 728         }
 729
 730         static void _print_value(struct value v)
 731         {
 732                 switch (v.type->vtype) {
 733                 case Vnone:
 734                         printf("*no-value*"); break;
 735                 case Vlabel:
 736                         printf("*label-%p*", v.label); break;
 737                 case Vstr:
 738                         printf("%.*s", v.str.len, v.str.txt); break;
 739                 case Vbool:
 740                         printf("%s", v.bool ? "True":"False"); break;
 741                 case Vnum:
 742                         {
 743                         mpf_t fl;
 744                         mpf_init2(fl, 20);
 745                         mpf_set_q(fl, v.num);
 746                         gmp_printf("%Fg", fl);
 747                         mpf_clear(fl);
 748                         break;
 749                         }
 750                 }
 751         }
 752
 753         static struct value _parse_value(struct type *type, char *arg)
 754         {
 755                 struct value val;
 756                 struct text tx;
 757                 int neg = 0;
 758                 char tail[3] = "";
 759
 760                 val.type = type;
 761                 switch(type->vtype) {
 762                 case Vlabel:
 763                 case Vnone:
 764                         val.type = NULL;
 765                         break;
 766                 case Vstr:
 767                         val.str.len = strlen(arg);
 768                         val.str.txt = malloc(val.str.len);
 769                         memcpy(val.str.txt, arg, val.str.len);
 770                         break;
 771                 case Vnum:
 772                         if (*arg == '-') {
 773                                 neg = 1;
 774                                 arg++;
 775                         }
 776                         tx.txt = arg; tx.len = strlen(tx.txt);
 777                         if (number_parse(val.num, tail, tx) == 0)
 778                                 mpq_init(val.num);
 779                         else if (neg)
 780                                 mpq_neg(val.num, val.num);
 781                         if (tail[0]) {
 782                                 printf("Unsupported suffix: %s\n", arg);
 783                                 val.type = NULL;
 784                         }
 785                         break;
 786                 case Vbool:
 787                         if (strcasecmp(arg, "true") == 0 ||
 788                             strcmp(arg, "1") == 0)
 789                                 val.bool = 1;
 790                         else if (strcasecmp(arg, "false") == 0 ||
 791                                  strcmp(arg, "0") == 0)
 792                                 val.bool = 0;
 793                         else {
 794                                 printf("Bad bool: %s\n", arg);
 795                                 val.type = NULL;
 796                         }
 797                         break;
 798                 }
 799                 return val;
 800         }
 801
 802         static void _free_value(struct value v);
 803
 804         static struct type base_prototype = {
 805                 .init = _val_init,
 806                 .prepare = _val_prepare,
 807                 .parse = _parse_value,
 808                 .print = _print_value,
 809                 .cmp_order = _value_cmp,
 810                 .cmp_eq = _value_cmp,
 811                 .dup = _dup_value,
 812                 .free = _free_value,
 813         };
 814
 815         static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
 816
 817 ###### ast functions
 818         static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
 819         {
 820                 struct text txt = { n, strlen(n) };
 821                 struct type *t;
 822
 823                 t = add_type(c, txt, &base_prototype);
 824                 t->vtype = vt;
 825                 return t;
 826         }
 827
 828 ###### context initialization
 829
 830         Tbool  = add_base_type(&context, "Boolean", Vbool);
 831         Tstr   = add_base_type(&context, "string", Vstr);
 832         Tnum   = add_base_type(&context, "number", Vnum);
 833         Tnone  = add_base_type(&context, "none", Vnone);
 834         Tlabel = add_base_type(&context, "label", Vlabel);
 835
 836 ### Variables
 837
 838 Variables are scoped named values.  We store the names in a linked
 839 list of "bindings" sorted lexically, and use sequential search and
 840 insertion sort.
 841
 842 ###### ast
 843
 844         struct binding {
 845                 struct text name;
 846                 struct binding *next;   // in lexical order
 847                 ## binding fields
 848         };
 849
 850 This linked list is stored in the parse context so that "reduce"
 851 functions can find or add variables, and so the analysis phase can
 852 ensure that every variable gets a type.
 853
 854 ###### parse context
 855
 856         struct binding *varlist;  // In lexical order
 857
 858 ###### ast functions
 859
 860         static struct binding *find_binding(struct parse_context *c, struct text s)
 861         {
 862                 struct binding **l = &c->varlist;
 863                 struct binding *n;
 864                 int cmp = 1;
 865
 866                 while (*l &&
 867                         (cmp = text_cmp((*l)->name, s)) < 0)
 868                                 l = & (*l)->next;
 869                 if (cmp == 0)
 870                         return *l;
 871                 n = calloc(1, sizeof(*n));
 872                 n->name = s;
 873                 n->next = *l;
 874                 *l = n;
 875                 return n;
 876         }
 877
 878 Each name can be linked to multiple variables defined in different
 879 scopes.  Each scope starts where the name is declared and continues
 880 until the end of the containing code block.  Scopes of a given name
 881 cannot nest, so a declaration while a name is in-scope is an error.
 882
 883 ###### binding fields
 884         struct variable *var;
 885
 886 ###### ast
 887         struct variable {
 888                 struct variable *previous;
 889                 struct value val;
 890                 struct binding *name;
 891                 struct exec *where_decl;// where name was declared
 892                 struct exec *where_set; // where type was set
 893                 ## variable fields
 894         };
 895
 896 While the naming seems strange, we include local constants in the
 897 definition of variables.  A name declared `var := value` can
 898 subsequently be changed, but a name declared `var ::= value` cannot -
 899 it is constant
 900
 901 ###### variable fields
 902         int constant;
 903
 904 Scopes in parallel branches can be partially merged.  More
 905 specifically, if a given name is declared in both branches of an
 906 if/else then its scope is a candidate for merging.  Similarly if
 907 every branch of an exhaustive switch (e.g. has an "else" clause)
 908 declares a given name, then the scopes from the branches are
 909 candidates for merging.
 910
 911 Note that names declared inside a loop (which is only parallel to
 912 itself) are never visible after the loop.  Similarly names defined in
 913 scopes which are not parallel, such as those started by `for` and
 914 `switch`, are never visible after the scope.  Only variables defined in
 915 both `then` and `else` (including the implicit then after an `if`, and
 916 excluding `then` used with `for`) and in all `case`s and `else` of a
 917 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
 918
 919 Labels, which are a bit like variables, follow different rules.
 920 Labels are not explicitly declared, but if an undeclared name appears
 921 in a context where a label is legal, that effectively declares the
 922 name as a label.  The declaration remains in force (or in scope) at
 923 least to the end of the immediately containing block and conditionally
 924 in any larger containing block which does not declare the name in some
 925 other way.  Importantly, the conditional scope extension happens even
 926 if the label is only used in one parallel branch of a conditional --
 927 when used in one branch it is treated as having been declared in all
 928 branches.
 929
 930 Merge candidates are tentatively visible beyond the end of the
 931 branching statement which creates them.  If the name is used, the
 932 merge is affirmed and they become a single variable visible at the
 933 outer layer.  If not - if it is redeclared first - the merge lapses.
 934
 935 To track scopes we have an extra stack, implemented as a linked list,
 936 which roughly parallels the parse stack and which is used exclusively
 937 for scoping.  When a new scope is opened, a new frame is pushed and
 938 the child-count of the parent frame is incremented.  This child-count
 939 is used to distinguish between the first of a set of parallel scopes,
 940 in which declared variables must not be in scope, and subsequent
 941 branches, whether they must already be conditionally scoped.
 942
 943 To push a new frame *before* any code in the frame is parsed, we need a
 944 grammar reduction.  This is most easily achieved with a grammar
 945 element which derives the empty string, and creates the new scope when
 946 it is recognized.  This can be placed, for example, between a keyword
 947 like "if" and the code following it.
 948
 949 ###### ast
 950         struct scope {
 951                 struct scope *parent;
 952                 int child_count;
 953         };
 954
 955 ###### parse context
 956         int scope_depth;
 957         struct scope *scope_stack;
 958
 959 ###### ast functions
 960         static void scope_pop(struct parse_context *c)
 961         {
 962                 struct scope *s = c->scope_stack;
 963
 964                 c->scope_stack = s->parent;
 965                 free(s);
 966                 c->scope_depth -= 1;
 967         }
 968
 969         static void scope_push(struct parse_context *c)
 970         {
 971                 struct scope *s = calloc(1, sizeof(*s));
 972                 if (c->scope_stack)
 973                         c->scope_stack->child_count += 1;
 974                 s->parent = c->scope_stack;
 975                 c->scope_stack = s;
 976                 c->scope_depth += 1;
 977         }
 978
 979 ###### Grammar
 980
 981         $void
 982         OpenScope -> ${ scope_push(config2context(config)); }$
 983
 984
 985 Each variable records a scope depth and is in one of four states:
 986
 987 - "in scope".  This is the case between the declaration of the
 988   variable and the end of the containing block, and also between
 989   the usage with affirms a merge and the end of that block.
 990
 991   The scope depth is not greater than the current parse context scope
 992   nest depth.  When the block of that depth closes, the state will
 993   change.  To achieve this, all "in scope" variables are linked
 994   together as a stack in nesting order.
 995
 996 - "pending".  The "in scope" block has closed, but other parallel
 997   scopes are still being processed.  So far, every parallel block at
 998   the same level that has closed has declared the name.
 999
1000   The scope depth is the depth of the last parallel block that
1001   enclosed the declaration, and that has closed.
1002
1003 - "conditionally in scope".  The "in scope" block and all parallel
1004   scopes have closed, and no further mention of the name has been
1005   seen.  This state includes a secondary nest depth which records the
1006   outermost scope seen since the variable became conditionally in
1007   scope.  If a use of the name is found, the variable becomes "in
1008   scope" and that secondary depth becomes the recorded scope depth.
1009   If the name is declared as a new variable, the old variable becomes
1010   "out of scope" and the recorded scope depth stays unchanged.
1011
1012 - "out of scope".  The variable is neither in scope nor conditionally
1013   in scope.  It is permanently out of scope now and can be removed from
1014   the "in scope" stack.
1015
1016
1017 ###### variable fields
1018         int depth, min_depth;
1019         enum { OutScope, PendingScope, CondScope, InScope } scope;
1020         struct variable *in_scope;
1021
1022 ###### parse context
1023
1024         struct variable *in_scope;
1025
1026 All variables with the same name are linked together using the
1027 'previous' link.  Those variable that have
1028 been affirmatively merged all have a 'merged' pointer that points to
1029 one primary variable - the most recently declared instance. When
1030 merging variables, we need to also adjust the 'merged' pointer on any
1031 other variables that had previously been merged with the one that will
1032 no longer be primary.
1033
1034 ###### variable fields
1035         struct variable *merged;
1036
1037 ###### ast functions
1038
1039         static void variable_merge(struct variable *primary, struct variable *secondary)
1040         {
1041                 struct variable *v;
1042
1043                 if (primary->merged)
1044                         // shouldn't happen
1045                         primary = primary->merged;
1046
1047                 for (v = primary->previous; v; v=v->previous)
1048                         if (v == secondary || v == secondary->merged ||
1049                             v->merged == secondary ||
1050                             (v->merged && v->merged == secondary->merged)) {
1051                                 v->scope = OutScope;
1052                                 v->merged = primary;
1053                         }
1054         }
1055
1056 ###### free context vars
1057
1058         while (context.varlist) {
1059                 struct binding *b = context.varlist;
1060                 struct variable *v = b->var;
1061                 context.varlist = b->next;
1062                 free(b);
1063                 while (v) {
1064                         struct variable *t = v;
1065
1066                         v = t->previous;
1067                         free_value(t->val);
1068                         free(t);
1069                 }
1070         }
1071
1072 #### Manipulating Bindings
1073
1074 When a name is conditionally visible, a new declaration discards the
1075 old binding - the condition lapses.  Conversely a usage of the name
1076 affirms the visibility and extends it to the end of the containing
1077 block - i.e. the block that contains both the original declaration and
1078 the latest usage.  This is determined from `min_depth`.  When a
1079 conditionally visible variable gets affirmed like this, it is also
1080 merged with other conditionally visible variables with the same name.
1081
1082 When we parse a variable declaration we either signal an error if the
1083 name is currently bound, or create a new variable at the current nest
1084 depth if the name is unbound or bound to a conditionally scoped or
1085 pending-scope variable.  If the previous variable was conditionally
1086 scoped, it and its homonyms becomes out-of-scope.
1087
1088 When we parse a variable reference (including non-declarative
1089 assignment) we signal an error if the name is not bound or is bound to
1090 a pending-scope variable; update the scope if the name is bound to a
1091 conditionally scoped variable; or just proceed normally if the named
1092 variable is in scope.
1093
1094 When we exit a scope, any variables bound at this level are either
1095 marked out of scope or pending-scoped, depending on whether the
1096 scope was sequential or parallel.
1097
1098 When exiting a parallel scope we check if there are any variables that
1099 were previously pending and are still visible. If there are, then
1100 there weren't redeclared in the most recent scope, so they cannot be
1101 merged and must become out-of-scope.  If it is not the first of
1102 parallel scopes (based on `child_count`), we check that there was a
1103 previous binding that is still pending-scope.  If there isn't, the new
1104 variable must now be out-of-scope.
1105
1106 When exiting a sequential scope that immediately enclosed parallel
1107 scopes, we need to resolve any pending-scope variables.  If there was
1108 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1109 we need to mark all pending-scope variable as out-of-scope.  Otherwise
1110 all pending-scope variables become conditionally scoped.
1111
1112 ###### ast
1113         enum closetype { CloseSequential, CloseParallel, CloseElse };
1114
1115 ###### ast functions
1116
1117         static struct variable *var_decl(struct parse_context *c, struct text s)
1118         {
1119                 struct binding *b = find_binding(c, s);
1120                 struct variable *v = b->var;
1121
1122                 switch (v ? v->scope : OutScope) {
1123                 case InScope:
1124                         /* Caller will report the error */
1125                         return NULL;
1126                 case CondScope:
1127                         for (;
1128                              v && v->scope == CondScope;
1129                              v = v->previous)
1130                                 v->scope = OutScope;
1131                         break;
1132                 default: break;
1133                 }
1134                 v = calloc(1, sizeof(*v));
1135                 v->previous = b->var;
1136                 b->var = v;
1137                 v->name = b;
1138                 v->min_depth = v->depth = c->scope_depth;
1139                 v->scope = InScope;
1140                 v->in_scope = c->in_scope;
1141                 c->in_scope = v;
1142                 v->val = val_prepare(NULL);
1143                 return v;
1144         }
1145
1146         static struct variable *var_ref(struct parse_context *c, struct text s)
1147         {
1148                 struct binding *b = find_binding(c, s);
1149                 struct variable *v = b->var;
1150                 struct variable *v2;
1151
1152                 switch (v ? v->scope : OutScope) {
1153                 case OutScope:
1154                 case PendingScope:
1155                         /* Signal an error - once that is possible */
1156                         return NULL;
1157                 case CondScope:
1158                         /* All CondScope variables of this name need to be merged
1159                          * and become InScope
1160                          */
1161                         v->depth = v->min_depth;
1162                         v->scope = InScope;
1163                         for (v2 = v->previous;
1164                              v2 && v2->scope == CondScope;
1165                              v2 = v2->previous)
1166                                 variable_merge(v, v2);
1167                         break;
1168                 case InScope:
1169                         break;
1170                 }
1171                 return v;
1172         }
1173
1174         static void var_block_close(struct parse_context *c, enum closetype ct)
1175         {
1176                 /* close of all variables that are in_scope */
1177                 struct variable *v, **vp, *v2;
1178
1179                 scope_pop(c);
1180                 for (vp = &c->in_scope;
1181                      v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1182                      ) {
1183                         switch (ct) {
1184                         case CloseElse:
1185                         case CloseParallel: /* handle PendingScope */
1186                                 switch(v->scope) {
1187                                 case InScope:
1188                                 case CondScope:
1189                                         if (c->scope_stack->child_count == 1)
1190                                                 v->scope = PendingScope;
1191                                         else if (v->previous &&
1192                                                  v->previous->scope == PendingScope)
1193                                                 v->scope = PendingScope;
1194                                         else if (v->val.type == Tlabel)
1195                                                 v->scope = PendingScope;
1196                                         else if (v->name->var == v)
1197                                                 v->scope = OutScope;
1198                                         if (ct == CloseElse) {
1199                                                 /* All Pending variables with this name
1200                                                  * are now Conditional */
1201                                                 for (v2 = v;
1202                                                      v2 && v2->scope == PendingScope;
1203                                                      v2 = v2->previous)
1204                                                         v2->scope = CondScope;
1205                                         }
1206                                         break;
1207                                 case PendingScope:
1208                                         for (v2 = v;
1209                                              v2 && v2->scope == PendingScope;
1210                                              v2 = v2->previous)
1211                                                 if (v2->val.type != Tlabel)
1212                                                         v2->scope = OutScope;
1213                                         break;
1214                                 case OutScope: break;
1215                                 }
1216                                 break;
1217                         case CloseSequential:
1218                                 if (v->val.type == Tlabel)
1219                                         v->scope = PendingScope;
1220                                 switch (v->scope) {
1221                                 case InScope:
1222                                         v->scope = OutScope;
1223                                         break;
1224                                 case PendingScope:
1225                                         /* There was no 'else', so we can only become
1226                                          * conditional if we know the cases were exhaustive,
1227                                          * and that doesn't mean anything yet.
1228                                          * So only labels become conditional..
1229                                          */
1230                                         for (v2 = v;
1231                                              v2 && v2->scope == PendingScope;
1232                                              v2 = v2->previous)
1233                                                 if (v2->val.type == Tlabel) {
1234                                                         v2->scope = CondScope;
1235                                                         v2->min_depth = c->scope_depth;
1236                                                 } else
1237                                                         v2->scope = OutScope;
1238                                         break;
1239                                 case CondScope:
1240                                 case OutScope: break;
1241                                 }
1242                                 break;
1243                         }
1244                         if (v->scope == OutScope)
1245                                 *vp = v->in_scope;
1246                         else
1247                                 vp = &v->in_scope;
1248                 }
1249         }
1250
1251 ### Executables
1252
1253 Executables can be lots of different things.  In many cases an
1254 executable is just an operation combined with one or two other
1255 executables.  This allows for expressions and lists etc.  Other times
1256 an executable is something quite specific like a constant or variable
1257 name.  So we define a `struct exec` to be a general executable with a
1258 type, and a `struct binode` which is a subclass of `exec`, forms a
1259 node in a binary tree, and holds an operation. There will be other
1260 subclasses, and to access these we need to be able to `cast` the
1261 `exec` into the various other types.
1262
1263 ###### macros
1264         #define cast(structname, pointer) ({            \
1265                 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1266                 if (__mptr && *__mptr != X##structname) abort();                \
1267                 (struct structname *)( (char *)__mptr);})
1268
1269         #define new(structname) ({                                              \
1270                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1271                 __ptr->type = X##structname;                                            \
1272                 __ptr->line = -1; __ptr->column = -1;                                   \
1273                 __ptr;})
1274
1275         #define new_pos(structname, token) ({                                           \
1276                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1277                 __ptr->type = X##structname;                                            \
1278                 __ptr->line = token.line; __ptr->column = token.col;                    \
1279                 __ptr;})
1280
1281 ###### ast
1282         enum exec_types {
1283                 Xbinode,
1284                 ## exec type
1285         };
1286         struct exec {
1287                 enum exec_types type;
1288                 int line, column;
1289         };
1290         struct binode {
1291                 struct exec;
1292                 enum Btype {
1293                         ## Binode types
1294                 } op;
1295                 struct exec *left, *right;
1296         };
1297
1298 ###### ast functions
1299
1300         static int __fput_loc(struct exec *loc, FILE *f)
1301         {
1302                 if (loc->line >= 0) {
1303                         fprintf(f, "%d:%d: ", loc->line, loc->column);
1304                         return 1;
1305                 }
1306                 if (loc->type == Xbinode)
1307                         return __fput_loc(cast(binode,loc)->left, f) ||
1308                                __fput_loc(cast(binode,loc)->right, f);
1309                 return 0;
1310         }
1311         static void fput_loc(struct exec *loc, FILE *f)
1312         {
1313                 if (!__fput_loc(loc, f))
1314                         fprintf(f, "??:??: ");
1315         }
1316
1317 Each different type of `exec` node needs a number of functions
1318 defined, a bit like methods.  We must be able to be able to free it,
1319 print it, analyse it and execute it.  Once we have specific `exec`
1320 types we will need to parse them too.  Let's take this a bit more
1321 slowly.
1322
1323 #### Freeing
1324
1325 The parser generator requires a `free_foo` function for each struct
1326 that stores attributes and they will be `exec`s and subtypes there-of.
1327 So we need `free_exec` which can handle all the subtypes, and we need
1328 `free_binode`.
1329
1330 ###### ast functions
1331
1332         static void free_binode(struct binode *b)
1333         {
1334                 if (!b)
1335                         return;
1336                 free_exec(b->left);
1337                 free_exec(b->right);
1338                 free(b);
1339         }
1340
1341 ###### core functions
1342         static void free_exec(struct exec *e)
1343         {
1344                 if (!e)
1345                         return;
1346                 switch(e->type) {
1347                         ## free exec cases
1348                 }
1349         }
1350
1351 ###### forward decls
1352
1353         static void free_exec(struct exec *e);
1354
1355 ###### free exec cases
1356         case Xbinode: free_binode(cast(binode, e)); break;
1357
1358 #### Printing
1359
1360 Printing an `exec` requires that we know the current indent level for
1361 printing line-oriented components.  As will become clear later, we
1362 also want to know what sort of bracketing to use.
1363
1364 ###### ast functions
1365
1366         static void do_indent(int i, char *str)
1367         {
1368                 while (i--)
1369                         printf("    ");
1370                 printf("%s", str);
1371         }
1372
1373 ###### core functions
1374         static void print_binode(struct binode *b, int indent, int bracket)
1375         {
1376                 struct binode *b2;
1377                 switch(b->op) {
1378                 ## print binode cases
1379                 }
1380         }
1381
1382         static void print_exec(struct exec *e, int indent, int bracket)
1383         {
1384                 if (!e)
1385                         return;
1386                 switch (e->type) {
1387                 case Xbinode:
1388                         print_binode(cast(binode, e), indent, bracket); break;
1389                 ## print exec cases
1390                 }
1391         }
1392
1393 ###### forward decls
1394
1395         static void print_exec(struct exec *e, int indent, int bracket);
1396
1397 #### Analysing
1398
1399 As discussed, analysis involves propagating type requirements around
1400 the program and looking for errors.
1401
1402 So `propagate_types` is passed an expected type (being a `struct type`
1403 pointer together with some `val_rules` flags) that the `exec` is
1404 expected to return, and returns the type that it does return, either
1405 of which can be `NULL` signifying "unknown".  An `ok` flag is passed
1406 by reference. It is set to `0` when an error is found, and `2` when
1407 any change is made.  If it remains unchanged at `1`, then no more
1408 propagation is needed.
1409
1410 ###### ast
1411
1412         enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
1413
1414 ###### format cases
1415         case 'r':
1416                 if (rules & Rnolabel)
1417                         fputs(" (labels not permitted)", stderr);
1418                 break;
1419
1420 ###### core functions
1421
1422         static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1423                                             struct type *type, int rules)
1424         {
1425                 struct type *t;
1426
1427                 if (!prog)
1428                         return Tnone;
1429
1430                 switch (prog->type) {
1431                 case Xbinode:
1432                 {
1433                         struct binode *b = cast(binode, prog);
1434                         switch (b->op) {
1435                         ## propagate binode cases
1436                         }
1437                         break;
1438                 }
1439                 ## propagate exec cases
1440                 }
1441                 return Tnone;
1442         }
1443
1444 #### Interpreting
1445
1446 Interpreting an `exec` doesn't require anything but the `exec`.  State
1447 is stored in variables and each variable will be directly linked from
1448 within the `exec` tree.  The exception to this is the whole `program`
1449 which needs to look at command line arguments.  The `program` will be
1450 interpreted separately.
1451
1452 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1453
1454 ###### core functions
1455
1456         static struct value interp_exec(struct exec *e)
1457         {
1458                 struct value rv;
1459                 rv.type = Tnone;
1460                 if (!e)
1461                         return rv;
1462
1463                 switch(e->type) {
1464                 case Xbinode:
1465                 {
1466                         struct binode *b = cast(binode, e);
1467                         struct value left, right;
1468                         left.type = right.type = Tnone;
1469                         switch (b->op) {
1470                         ## interp binode cases
1471                         }
1472                         free_value(left); free_value(right);
1473                         break;
1474                 }
1475                 ## interp exec cases
1476                 }
1477                 return rv;
1478         }
1479
1480 ## Language elements
1481
1482 Each language element needs to be parsed, printed, analysed,
1483 interpreted, and freed.  There are several, so let's just start with
1484 the easy ones and work our way up.
1485
1486 ### Values
1487
1488 We have already met values as separate objects.  When manifest
1489 constants appear in the program text, that must result in an executable
1490 which has a constant value.  So the `val` structure embeds a value in
1491 an executable.
1492
1493 ###### exec type
1494         Xval,
1495
1496 ###### ast
1497         struct val {
1498                 struct exec;
1499                 struct value val;
1500         };
1501
1502 ###### Grammar
1503
1504         $*val
1505         Value ->  True ${
1506                         $0 = new_pos(val, $1);
1507                         $0->val.type = Tbool;
1508                         $0->val.bool = 1;
1509                         }$
1510                 | False ${
1511                         $0 = new_pos(val, $1);
1512                         $0->val.type = Tbool;
1513                         $0->val.bool = 0;
1514                         }$
1515                 | NUMBER ${
1516                         $0 = new_pos(val, $1);
1517                         $0->val.type = Tnum;
1518                         {
1519                         char tail[3];
1520                         if (number_parse($0->val.num, tail, $1.txt) == 0)
1521                                 mpq_init($0->val.num);
1522                                 if (tail[0])
1523                                         tok_err(config2context(config), "error: unsupported number suffix",
1524                                                 &$1);
1525                         }
1526                         }$
1527                 | STRING ${
1528                         $0 = new_pos(val, $1);
1529                         $0->val.type = Tstr;
1530                         {
1531                         char tail[3];
1532                         string_parse(&$1, '\\', &$0->val.str, tail);
1533                         if (tail[0])
1534                                 tok_err(config2context(config), "error: unsupported string suffix",
1535                                         &$1);
1536                         }
1537                         }$
1538                 | MULTI_STRING ${
1539                         $0 = new_pos(val, $1);
1540                         $0->val.type = Tstr;
1541                         {
1542                         char tail[3];
1543                         string_parse(&$1, '\\', &$0->val.str, tail);
1544                         if (tail[0])
1545                                 tok_err(config2context(config), "error: unsupported string suffix",
1546                                         &$1);
1547                         }
1548                         }$
1549
1550 ###### print exec cases
1551         case Xval:
1552         {
1553                 struct val *v = cast(val, e);
1554                 if (v->val.type == Tstr)
1555                         printf("\"");
1556                 print_value(v->val);
1557                 if (v->val.type == Tstr)
1558                         printf("\"");
1559                 break;
1560         }
1561
1562 ###### propagate exec cases
1563                 case Xval:
1564                 {
1565                         struct val *val = cast(val, prog);
1566                         if (!type_compat(type, val->val.type, rules)) {
1567                                 type_err(c, "error: expected %1%r found %2",
1568                                            prog, type, rules, val->val.type);
1569                                 *ok = 0;
1570                         }
1571                         return val->val.type;
1572                 }
1573
1574 ###### interp exec cases
1575         case Xval:
1576                 return dup_value(cast(val, e)->val);
1577
1578 ###### ast functions
1579         static void free_val(struct val *v)
1580         {
1581                 if (!v)
1582                         return;
1583                 free_value(v->val);
1584                 free(v);
1585         }
1586
1587 ###### free exec cases
1588         case Xval: free_val(cast(val, e)); break;
1589
1590 ###### ast functions
1591         // Move all nodes from 'b' to 'rv', reversing the order.
1592         // In 'b' 'left' is a list, and 'right' is the last node.
1593         // In 'rv', left' is the first node and 'right' is a list.
1594         static struct binode *reorder_bilist(struct binode *b)
1595         {
1596                 struct binode *rv = NULL;
1597
1598                 while (b) {
1599                         struct exec *t = b->right;
1600                         b->right = rv;
1601                         rv = b;
1602                         if (b->left)
1603                                 b = cast(binode, b->left);
1604                         else
1605                                 b = NULL;
1606                         rv->left = t;
1607                 }
1608                 return rv;
1609         }
1610
1611 ### Variables
1612
1613 Just as we used a `val` to wrap a value into an `exec`, we similarly
1614 need a `var` to wrap a `variable` into an exec.  While each `val`
1615 contained a copy of the value, each `var` hold a link to the variable
1616 because it really is the same variable no matter where it appears.
1617 When a variable is used, we need to remember to follow the `->merged`
1618 link to find the primary instance.
1619
1620 ###### exec type
1621         Xvar,
1622
1623 ###### ast
1624         struct var {
1625                 struct exec;
1626                 struct variable *var;
1627         };
1628
1629 ###### Grammar
1630
1631         $*var
1632         VariableDecl -> IDENTIFIER : ${ {
1633                 struct variable *v = var_decl(config2context(config), $1.txt);
1634                 $0 = new_pos(var, $1);
1635                 $0->var = v;
1636                 if (v)
1637                         v->where_decl = $0;
1638                 else {
1639                         v = var_ref(config2context(config), $1.txt);
1640                         $0->var = v;
1641                         type_err(config2context(config), "error: variable '%v' redeclared",
1642                                  $0, Tnone, 0, Tnone);
1643                         type_err(config2context(config), "info: this is where '%v' was first declared",
1644                                  v->where_decl, Tnone, 0, Tnone);
1645                 }
1646         } }$
1647             | IDENTIFIER :: ${ {
1648                 struct variable *v = var_decl(config2context(config), $1.txt);
1649                 $0 = new_pos(var, $1);
1650                 $0->var = v;
1651                 if (v) {
1652                         v->where_decl = $0;
1653                         v->constant = 1;
1654                 } else {
1655                         v = var_ref(config2context(config), $1.txt);
1656                         $0->var = v;
1657                         type_err(config2context(config), "error: variable '%v' redeclared",
1658                                  $0, Tnone, 0, Tnone);
1659                         type_err(config2context(config), "info: this is where '%v' was first declared",
1660                                  v->where_decl, Tnone, 0, Tnone);
1661                 }
1662         } }$
1663             | IDENTIFIER : Type ${ {
1664                 struct variable *v = var_decl(config2context(config), $1.txt);
1665                 $0 = new_pos(var, $1);
1666                 $0->var = v;
1667                 if (v) {
1668                         v->where_decl = $0;
1669                         v->where_set = $0;
1670                         v->val = val_prepare($<3);
1671                 } else {
1672                         v = var_ref(config2context(config), $1.txt);
1673                         $0->var = v;
1674                         type_err(config2context(config), "error: variable '%v' redeclared",
1675                                  $0, Tnone, 0, Tnone);
1676                         type_err(config2context(config), "info: this is where '%v' was first declared",
1677                                  v->where_decl, Tnone, 0, Tnone);
1678                 }
1679         } }$
1680             | IDENTIFIER :: Type ${ {
1681                 struct variable *v = var_decl(config2context(config), $1.txt);
1682                 $0 = new_pos(var, $1);
1683                 $0->var = v;
1684                 if (v) {
1685                         v->where_decl = $0;
1686                         v->where_set = $0;
1687                         v->val = val_prepare($<3);
1688                         v->constant = 1;
1689                 } else {
1690                         v = var_ref(config2context(config), $1.txt);
1691                         $0->var = v;
1692                         type_err(config2context(config), "error: variable '%v' redeclared",
1693                                  $0, Tnone, 0, Tnone);
1694                         type_err(config2context(config), "info: this is where '%v' was first declared",
1695                                  v->where_decl, Tnone, 0, Tnone);
1696                 }
1697         } }$
1698
1699         Variable -> IDENTIFIER ${ {
1700                 struct variable *v = var_ref(config2context(config), $1.txt);
1701                 $0 = new_pos(var, $1);
1702                 if (v == NULL) {
1703                         /* This might be a label - allocate a var just in case */
1704                         v = var_decl(config2context(config), $1.txt);
1705                         if (v) {
1706                                 v->val = val_prepare(Tlabel);
1707                                 v->val.label = &v->val;
1708                                 v->where_set = $0;
1709                         }
1710                 }
1711                 $0->var = v;
1712         } }$
1713
1714         $*type
1715         Type -> IDENTIFIER ${
1716                 $0 = find_type(config2context(config), $1.txt);
1717                 if (!$0) {
1718                         tok_err(config2context(config),
1719                                 "error: undefined type", &$1);
1720
1721                         $0 = Tnone;
1722                 }
1723         }$
1724
1725 ###### print exec cases
1726         case Xvar:
1727         {
1728                 struct var *v = cast(var, e);
1729                 if (v->var) {
1730                         struct binding *b = v->var->name;
1731                         printf("%.*s", b->name.len, b->name.txt);
1732                 }
1733                 break;
1734         }
1735
1736 ###### format cases
1737         case 'v':
1738                 if (loc->type == Xvar) {
1739                         struct var *v = cast(var, loc);
1740                         if (v->var) {
1741                                 struct binding *b = v->var->name;
1742                                 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1743                         } else
1744                                 fputs("???", stderr);
1745                 } else
1746                         fputs("NOTVAR", stderr);
1747                 break;
1748
1749 ###### propagate exec cases
1750
1751         case Xvar:
1752         {
1753                 struct var *var = cast(var, prog);
1754                 struct variable *v = var->var;
1755                 if (!v) {
1756                         type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone);
1757                         *ok = 0;
1758                         return Tnone;
1759                 }
1760                 if (v->merged)
1761                         v = v->merged;
1762                 if (v->val.type == NULL) {
1763                         if (type && *ok != 0) {
1764                                 v->val = val_prepare(type);
1765                                 v->where_set = prog;
1766                                 *ok = 2;
1767                         }
1768                         return type;
1769                 }
1770                 if (!type_compat(type, v->val.type, rules)) {
1771                         type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
1772                                  type, rules, v->val.type);
1773                         type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1774                                  v->val.type, rules, Tnone);
1775                         *ok = 0;
1776                 }
1777                 if (!type)
1778                         return v->val.type;
1779                 return type;
1780         }
1781
1782 ###### interp exec cases
1783         case Xvar:
1784         {
1785                 struct var *var = cast(var, e);
1786                 struct variable *v = var->var;
1787
1788                 if (v->merged)
1789                         v = v->merged;
1790                 return dup_value(v->val);
1791         }
1792
1793 ###### ast functions
1794
1795         static void free_var(struct var *v)
1796         {
1797                 free(v);
1798         }
1799
1800 ###### free exec cases
1801         case Xvar: free_var(cast(var, e)); break;
1802
1803 ### Expressions: Boolean
1804
1805 Our first user of the `binode` will be expressions, and particularly
1806 Boolean expressions.  As I haven't implemented precedence in the
1807 parser generator yet, we need different names for each precedence
1808 level used by expressions.  The outer most or lowest level precedence
1809 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1810 and `BFact`s.
1811
1812 ###### Binode types
1813         And,
1814         Or,
1815         Not,
1816
1817 ###### Grammar
1818
1819         $*exec
1820         Expression -> Expression or BTerm ${ {
1821                         struct binode *b = new(binode);
1822                         b->op = Or;
1823                         b->left = $<1;
1824                         b->right = $<3;
1825                         $0 = b;
1826                 } }$
1827                 | BTerm ${ $0 = $<1; }$
1828
1829         BTerm -> BTerm and BFact ${ {
1830                         struct binode *b = new(binode);
1831                         b->op = And;
1832                         b->left = $<1;
1833                         b->right = $<3;
1834                         $0 = b;
1835                 } }$
1836                 | BFact ${ $0 = $<1; }$
1837
1838         BFact -> not BFact ${ {
1839                         struct binode *b = new(binode);
1840                         b->op = Not;
1841                         b->right = $<2;
1842                         $0 = b;
1843                 } }$
1844                 ## other BFact
1845
1846 ###### print binode cases
1847         case And:
1848                 print_exec(b->left, -1, 0);
1849                 printf(" and ");
1850                 print_exec(b->right, -1, 0);
1851                 break;
1852         case Or:
1853                 print_exec(b->left, -1, 0);
1854                 printf(" or ");
1855                 print_exec(b->right, -1, 0);
1856                 break;
1857         case Not:
1858                 printf("not ");
1859                 print_exec(b->right, -1, 0);
1860                 break;
1861
1862 ###### propagate binode cases
1863         case And:
1864         case Or:
1865         case Not:
1866                 /* both must be Tbool, result is Tbool */
1867                 propagate_types(b->left, c, ok, Tbool, 0);
1868                 propagate_types(b->right, c, ok, Tbool, 0);
1869                 if (type && type != Tbool) {
1870                         type_err(c, "error: %1 operation found where %2 expected", prog,
1871                                    Tbool, 0, type);
1872                         *ok = 0;
1873                 }
1874                 return Tbool;
1875
1876 ###### interp binode cases
1877         case And:
1878                 rv = interp_exec(b->left);
1879                 right = interp_exec(b->right);
1880                 rv.bool = rv.bool && right.bool;
1881                 break;
1882         case Or:
1883                 rv = interp_exec(b->left);
1884                 right = interp_exec(b->right);
1885                 rv.bool = rv.bool || right.bool;
1886                 break;
1887         case Not:
1888                 rv = interp_exec(b->right);
1889                 rv.bool = !rv.bool;
1890                 break;
1891
1892 ### Expressions: Comparison
1893
1894 Of slightly higher precedence that Boolean expressions are
1895 Comparisons.
1896 A comparison takes arguments of any type, but the two types must be
1897 the same.
1898
1899 To simplify the parsing we introduce an `eop` which can record an
1900 expression operator.
1901
1902 ###### ast
1903         struct eop {
1904                 enum Btype op;
1905         };
1906
1907 ###### ast functions
1908         static void free_eop(struct eop *e)
1909         {
1910                 if (e)
1911                         free(e);
1912         }
1913
1914 ###### Binode types
1915         Less,
1916         Gtr,
1917         LessEq,
1918         GtrEq,
1919         Eql,
1920         NEql,
1921
1922 ###### other BFact
1923         | Expr CMPop Expr ${ {
1924                         struct binode *b = new(binode);
1925                         b->op = $2.op;
1926                         b->left = $<1;
1927                         b->right = $<3;
1928                         $0 = b;
1929         } }$
1930         | Expr ${ $0 = $<1; }$
1931
1932 ###### Grammar
1933
1934         $eop
1935         CMPop ->   < ${ $0.op = Less; }$
1936                 |  > ${ $0.op = Gtr; }$
1937                 |  <= ${ $0.op = LessEq; }$
1938                 |  >= ${ $0.op = GtrEq; }$
1939                 |  == ${ $0.op = Eql; }$
1940                 |  != ${ $0.op = NEql; }$
1941
1942 ###### print binode cases
1943
1944         case Less:
1945         case LessEq:
1946         case Gtr:
1947         case GtrEq:
1948         case Eql:
1949         case NEql:
1950                 print_exec(b->left, -1, 0);
1951                 switch(b->op) {
1952                 case Less:   printf(" < "); break;
1953                 case LessEq: printf(" <= "); break;
1954                 case Gtr:    printf(" > "); break;
1955                 case GtrEq:  printf(" >= "); break;
1956                 case Eql:    printf(" == "); break;
1957                 case NEql:   printf(" != "); break;
1958                 default: abort();
1959                 }
1960                 print_exec(b->right, -1, 0);
1961                 break;
1962
1963 ###### propagate binode cases
1964         case Less:
1965         case LessEq:
1966         case Gtr:
1967         case GtrEq:
1968         case Eql:
1969         case NEql:
1970                 /* Both must match but not be labels, result is Tbool */
1971                 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
1972                 if (t)
1973                         propagate_types(b->right, c, ok, t, 0);
1974                 else {
1975                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
1976                         if (t)
1977                                 t = propagate_types(b->left, c, ok, t, 0);
1978                 }
1979                 if (!type_compat(type, Tbool, 0)) {
1980                         type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1981                                     Tbool, rules, type);
1982                         *ok = 0;
1983                 }
1984                 return Tbool;
1985
1986 ###### interp binode cases
1987         case Less:
1988         case LessEq:
1989         case Gtr:
1990         case GtrEq:
1991         case Eql:
1992         case NEql:
1993         {
1994                 int cmp;
1995                 left = interp_exec(b->left);
1996                 right = interp_exec(b->right);
1997                 cmp = value_cmp(left, right);
1998                 rv.type = Tbool;
1999                 switch (b->op) {
2000                 case Less:      rv.bool = cmp <  0; break;
2001                 case LessEq:    rv.bool = cmp <= 0; break;
2002                 case Gtr:       rv.bool = cmp >  0; break;
2003                 case GtrEq:     rv.bool = cmp >= 0; break;
2004                 case Eql:       rv.bool = cmp == 0; break;
2005                 case NEql:      rv.bool = cmp != 0; break;
2006                 default: rv.bool = 0; break;
2007                 }
2008                 break;
2009         }
2010
2011 ### Expressions: The rest
2012
2013 The remaining expressions with the highest precedence are arithmetic
2014 and string concatenation.  They are `Expr`, `Term`, and `Factor`.
2015 The `Factor` is where the `Value` and `Variable` that we already have
2016 are included.
2017
2018 `+` and `-` are both infix and prefix operations (where they are
2019 absolute value and negation).  These have different operator names.
2020
2021 We also have a 'Bracket' operator which records where parentheses were
2022 found.  This makes it easy to reproduce these when printing.  Once
2023 precedence is handled better I might be able to discard this.
2024
2025 ###### Binode types
2026         Plus, Minus,
2027         Times, Divide,
2028         Concat,
2029         Absolute, Negate,
2030         Bracket,
2031
2032 ###### Grammar
2033
2034         $*exec
2035         Expr -> Expr Eop Term ${ {
2036                         struct binode *b = new(binode);
2037                         b->op = $2.op;
2038                         b->left = $<1;
2039                         b->right = $<3;
2040                         $0 = b;
2041                 } }$
2042                 | Term ${ $0 = $<1; }$
2043
2044         Term -> Term Top Factor ${ {
2045                         struct binode *b = new(binode);
2046                         b->op = $2.op;
2047                         b->left = $<1;
2048                         b->right = $<3;
2049                         $0 = b;
2050                 } }$
2051                 | Factor ${ $0 = $<1; }$
2052
2053         Factor -> ( Expression ) ${ {
2054                         struct binode *b = new_pos(binode, $1);
2055                         b->op = Bracket;
2056                         b->right = $<2;
2057                         $0 = b;
2058                 } }$
2059                 | Uop Factor ${ {
2060                         struct binode *b = new(binode);
2061                         b->op = $1.op;
2062                         b->right = $<2;
2063                         $0 = b;
2064                 } }$
2065                 | Value ${ $0 = $<1; }$
2066                 | Variable ${ $0 = $<1; }$
2067
2068         $eop
2069         Eop ->    + ${ $0.op = Plus; }$
2070                 | - ${ $0.op = Minus; }$
2071
2072         Uop ->    + ${ $0.op = Absolute; }$
2073                 | - ${ $0.op = Negate; }$
2074
2075         Top ->    * ${ $0.op = Times; }$
2076                 | / ${ $0.op = Divide; }$
2077                 | ++ ${ $0.op = Concat; }$
2078
2079 ###### print binode cases
2080         case Plus:
2081         case Minus:
2082         case Times:
2083         case Divide:
2084         case Concat:
2085                 print_exec(b->left, indent, 0);
2086                 switch(b->op) {
2087                 case Plus:   printf(" + "); break;
2088                 case Minus:  printf(" - "); break;
2089                 case Times:  printf(" * "); break;
2090                 case Divide: printf(" / "); break;
2091                 case Concat: printf(" ++ "); break;
2092                 default: abort();
2093                 }
2094                 print_exec(b->right, indent, 0);
2095                 break;
2096         case Absolute:
2097                 printf("+");
2098                 print_exec(b->right, indent, 0);
2099                 break;
2100         case Negate:
2101                 printf("-");
2102                 print_exec(b->right, indent, 0);
2103                 break;
2104         case Bracket:
2105                 printf("(");
2106                 print_exec(b->right, indent, 0);
2107                 printf(")");
2108                 break;
2109
2110 ###### propagate binode cases
2111         case Plus:
2112         case Minus:
2113         case Times:
2114         case Divide:
2115                 /* both must be numbers, result is Tnum */
2116         case Absolute:
2117         case Negate:
2118                 /* as propagate_types ignores a NULL,
2119                  * unary ops fit here too */
2120                 propagate_types(b->left, c, ok, Tnum, 0);
2121                 propagate_types(b->right, c, ok, Tnum, 0);
2122                 if (!type_compat(type, Tnum, 0)) {
2123                         type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2124                                    Tnum, rules, type);
2125                         *ok = 0;
2126                 }
2127                 return Tnum;
2128
2129         case Concat:
2130                 /* both must be Tstr, result is Tstr */
2131                 propagate_types(b->left, c, ok, Tstr, 0);
2132                 propagate_types(b->right, c, ok, Tstr, 0);
2133                 if (!type_compat(type, Tstr, 0)) {
2134                         type_err(c, "error: Concat returns %1 but %2 expected", prog,
2135                                    Tstr, rules, type);
2136                         *ok = 0;
2137                 }
2138                 return Tstr;
2139
2140         case Bracket:
2141                 return propagate_types(b->right, c, ok, type, 0);
2142
2143 ###### interp binode cases
2144
2145         case Plus:
2146                 rv = interp_exec(b->left);
2147                 right = interp_exec(b->right);
2148                 mpq_add(rv.num, rv.num, right.num);
2149                 break;
2150         case Minus:
2151                 rv = interp_exec(b->left);
2152                 right = interp_exec(b->right);
2153                 mpq_sub(rv.num, rv.num, right.num);
2154                 break;
2155         case Times:
2156                 rv = interp_exec(b->left);
2157                 right = interp_exec(b->right);
2158                 mpq_mul(rv.num, rv.num, right.num);
2159                 break;
2160         case Divide:
2161                 rv = interp_exec(b->left);
2162                 right = interp_exec(b->right);
2163                 mpq_div(rv.num, rv.num, right.num);
2164                 break;
2165         case Negate:
2166                 rv = interp_exec(b->right);
2167                 mpq_neg(rv.num, rv.num);
2168                 break;
2169         case Absolute:
2170                 rv = interp_exec(b->right);
2171                 mpq_abs(rv.num, rv.num);
2172                 break;
2173         case Bracket:
2174                 rv = interp_exec(b->right);
2175                 break;
2176         case Concat:
2177                 left = interp_exec(b->left);
2178                 right = interp_exec(b->right);
2179                 rv.type = Tstr;
2180                 rv.str = text_join(left.str, right.str);
2181                 break;
2182
2183
2184 ###### value functions
2185
2186         static struct text text_join(struct text a, struct text b)
2187         {
2188                 struct text rv;
2189                 rv.len = a.len + b.len;
2190                 rv.txt = malloc(rv.len);
2191                 memcpy(rv.txt, a.txt, a.len);
2192                 memcpy(rv.txt+a.len, b.txt, b.len);
2193                 return rv;
2194         }
2195
2196
2197 ### Blocks, Statements, and Statement lists.
2198
2199 Now that we have expressions out of the way we need to turn to
2200 statements.  There are simple statements and more complex statements.
2201 Simple statements do not contain newlines, complex statements do.
2202
2203 Statements often come in sequences and we have corresponding simple
2204 statement lists and complex statement lists.
2205 The former comprise only simple statements separated by semicolons.
2206 The later comprise complex statements and simple statement lists.  They are
2207 separated by newlines.  Thus the semicolon is only used to separate
2208 simple statements on the one line.  This may be overly restrictive,
2209 but I'm not sure I ever want a complex statement to share a line with
2210 anything else.
2211
2212 Note that a simple statement list can still use multiple lines if
2213 subsequent lines are indented, so
2214
2215 ###### Example: wrapped simple statement list
2216
2217         a = b; c = d;
2218            e = f; print g
2219
2220 is a single simple statement list.  This might allow room for
2221 confusion, so I'm not set on it yet.
2222
2223 A simple statement list needs no extra syntax.  A complex statement
2224 list has two syntactic forms.  It can be enclosed in braces (much like
2225 C blocks), or it can be introduced by a colon and continue until an
2226 unindented newline (much like Python blocks).  With this extra syntax
2227 it is referred to as a block.
2228
2229 Note that a block does not have to include any newlines if it only
2230 contains simple statements.  So both of:
2231
2232         if condition: a=b; d=f
2233
2234         if condition { a=b; print f }
2235
2236 are valid.
2237
2238 In either case the list is constructed from a `binode` list with
2239 `Block` as the operator.  When parsing the list it is most convenient
2240 to append to the end, so a list is a list and a statement.  When using
2241 the list it is more convenient to consider a list to be a statement
2242 and a list.  So we need a function to re-order a list.
2243 `reorder_bilist` serves this purpose.
2244
2245 The only stand-alone statement we introduce at this stage is `pass`
2246 which does nothing and is represented as a `NULL` pointer in a `Block`
2247 list.  Other stand-alone statements will follow once the infrastructure
2248 is in-place.
2249
2250 ###### Binode types
2251         Block,
2252
2253 ###### Grammar
2254
2255         $void
2256         OptNL -> Newlines
2257                 |
2258
2259         Newlines -> NEWLINE
2260                 | Newlines NEWLINE
2261
2262         $*binode
2263         Open -> {
2264                 | NEWLINE {
2265         Close -> }
2266                 | NEWLINE }
2267         Block -> Open Statementlist Close ${ $0 = $<2; }$
2268                 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2269                 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2270                 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2271                 | : Statementlist ${ $0 = $<2; }$
2272                 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2273
2274         Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2275
2276         ComplexStatements -> ComplexStatements ComplexStatement ${
2277                 $0 = new(binode);
2278                 $0->op = Block;
2279                 $0->left = $<1;
2280                 $0->right = $<2;
2281                 }$
2282                 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2283                 | ComplexStatement ${
2284                 $0 = new(binode);
2285                 $0->op = Block;
2286                 $0->left = NULL;
2287                 $0->right = $<1;
2288                 }$
2289
2290         $*exec
2291         ComplexStatement -> SimpleStatements NEWLINE ${
2292                         $0 = reorder_bilist($<1);
2293                         }$
2294                 ## ComplexStatement Grammar
2295
2296         $*binode
2297         SimpleStatements -> SimpleStatements ; SimpleStatement ${
2298                         $0 = new(binode);
2299                         $0->op = Block;
2300                         $0->left = $<1;
2301                         $0->right = $<3;
2302                         }$
2303                 | SimpleStatement ${
2304                         $0 = new(binode);
2305                         $0->op = Block;
2306                         $0->left = NULL;
2307                         $0->right = $<1;
2308                         }$
2309                 | SimpleStatements ; ${ $0 = $<1; }$
2310
2311         SimpleStatement -> pass ${ $0 = NULL; }$
2312                 ## SimpleStatement Grammar
2313
2314 ###### print binode cases
2315         case Block:
2316                 if (indent < 0) {
2317                         // simple statement
2318                         if (b->left == NULL)
2319                                 printf("pass");
2320                         else
2321                                 print_exec(b->left, indent, 0);
2322                         if (b->right) {
2323                                 printf("; ");
2324                                 print_exec(b->right, indent, 0);
2325                         }
2326                 } else {
2327                         // block, one per line
2328                         if (b->left == NULL)
2329                                 do_indent(indent, "pass\n");
2330                         else
2331                                 print_exec(b->left, indent, bracket);
2332                         if (b->right)
2333                                 print_exec(b->right, indent, bracket);
2334                 }
2335                 break;
2336
2337 ###### propagate binode cases
2338         case Block:
2339         {
2340                 /* If any statement returns something other than Tnone
2341                  * or Tbool then all such must return same type.
2342                  * As each statement may be Tnone or something else,
2343                  * we must always pass NULL (unknown) down, otherwise an incorrect
2344                  * error might occur.  We never return Tnone unless it is
2345                  * passed in.
2346                  */
2347                 struct binode *e;
2348
2349                 for (e = b; e; e = cast(binode, e->right)) {
2350                         t = propagate_types(e->left, c, ok, NULL, rules);
2351                         if ((rules & Rboolok) && t == Tbool)
2352                                 t = NULL;
2353                         if (t && t != Tnone && t != Tbool) {
2354                                 if (!type)
2355                                         type = t;
2356                                 else if (t != type) {
2357                                         type_err(c, "error: expected %1%r, found %2",
2358                                                  e->left, type, rules, t);
2359                                         *ok = 0;
2360                                 }
2361                         }
2362                 }
2363                 return type;
2364         }
2365
2366 ###### interp binode cases
2367         case Block:
2368                 while (rv.type == Tnone &&
2369                        b) {
2370                         if (b->left)
2371                                 rv = interp_exec(b->left);
2372                         b = cast(binode, b->right);
2373                 }
2374                 break;
2375
2376 ### The Print statement
2377
2378 `print` is a simple statement that takes a comma-separated list of
2379 expressions and prints the values separated by spaces and terminated
2380 by a newline.  No control of formatting is possible.
2381
2382 `print` faces the same list-ordering issue as blocks, and uses the
2383 same solution.
2384
2385 ###### Binode types
2386         Print,
2387
2388 ###### SimpleStatement Grammar
2389
2390         | print ExpressionList ${
2391                 $0 = reorder_bilist($<2);
2392         }$
2393         | print ExpressionList , ${
2394                 $0 = new(binode);
2395                 $0->op = Print;
2396                 $0->right = NULL;
2397                 $0->left = $<2;
2398                 $0 = reorder_bilist($0);
2399         }$
2400         | print ${
2401                 $0 = new(binode);
2402                 $0->op = Print;
2403                 $0->right = NULL;
2404         }$
2405
2406 ###### Grammar
2407
2408         $*binode
2409         ExpressionList -> ExpressionList , Expression ${
2410                 $0 = new(binode);
2411                 $0->op = Print;
2412                 $0->left = $<1;
2413                 $0->right = $<3;
2414                 }$
2415                 | Expression ${
2416                         $0 = new(binode);
2417                         $0->op = Print;
2418                         $0->left = NULL;
2419                         $0->right = $<1;
2420                 }$
2421
2422 ###### print binode cases
2423
2424         case Print:
2425                 do_indent(indent, "print");
2426                 while (b) {
2427                         if (b->left) {
2428                                 printf(" ");
2429                                 print_exec(b->left, -1, 0);
2430                                 if (b->right)
2431                                         printf(",");
2432                         }
2433                         b = cast(binode, b->right);
2434                 }
2435                 if (indent >= 0)
2436                         printf("\n");
2437                 break;
2438
2439 ###### propagate binode cases
2440
2441         case Print:
2442                 /* don't care but all must be consistent */
2443                 propagate_types(b->left, c, ok, NULL, Rnolabel);
2444                 propagate_types(b->right, c, ok, NULL, Rnolabel);
2445                 break;
2446
2447 ###### interp binode cases
2448
2449         case Print:
2450         {
2451                 char sep = 0;
2452                 int eol = 1;
2453                 for ( ; b; b = cast(binode, b->right))
2454                         if (b->left) {
2455                                 if (sep)
2456                                         putchar(sep);
2457                                 left = interp_exec(b->left);
2458                                 print_value(left);
2459                                 free_value(left);
2460                                 if (b->right)
2461                                         sep = ' ';
2462                         } else if (sep)
2463                                 eol = 0;
2464                 left.type = Tnone;
2465                 if (eol)
2466                         printf("\n");
2467                 break;
2468         }
2469
2470 ###### Assignment statement
2471
2472 An assignment will assign a value to a variable, providing it hasn't
2473 be declared as a constant.  The analysis phase ensures that the type
2474 will be correct so the interpreter just needs to perform the
2475 calculation.  There is a form of assignment which declares a new
2476 variable as well as assigning a value.  If a name is assigned before
2477 it is declared, and error will be raised as the name is created as
2478 `Tlabel` and it is illegal to assign to such names.
2479
2480 ###### Binode types
2481         Assign,
2482         Declare,
2483
2484 ###### SimpleStatement Grammar
2485         | Variable = Expression ${ {
2486                         struct var *v = cast(var, $1);
2487
2488                         $0 = new(binode);
2489                         $0->op = Assign;
2490                         $0->left = $<1;
2491                         $0->right = $<3;
2492                         if (v->var && v->var->constant) {
2493                                 type_err(config2context(config), "Cannot assign to a constant: %v",
2494                                          $0->left, NULL, 0, NULL);
2495                                 type_err(config2context(config), "name was defined as a constant here",
2496                                          v->var->where_decl, NULL, 0, NULL);
2497                         }
2498                 } }$
2499         | VariableDecl = Expression ${
2500                         $0 = new(binode);
2501                         $0->op = Declare;
2502                         $0->left = $<1;
2503                         $0->right =$<3;
2504                 }$
2505
2506         | VariableDecl ${
2507                         if ($1->var->where_set == NULL) {
2508                                 type_err(config2context(config), "Variable declared with no type or value: %v",
2509                                          $1, NULL, 0, NULL);
2510                         } else {
2511                                 $0 = new(binode);
2512                                 $0->op = Declare;
2513                                 $0->left = $<1;
2514                                 $0->right = NULL;
2515                         }
2516                 }$
2517
2518 ###### print binode cases
2519
2520         case Assign:
2521                 do_indent(indent, "");
2522                 print_exec(b->left, indent, 0);
2523                 printf(" = ");
2524                 print_exec(b->right, indent, 0);
2525                 if (indent >= 0)
2526                         printf("\n");
2527                 break;
2528
2529         case Declare:
2530                 {
2531                 struct variable *v = cast(var, b->left)->var;
2532                 do_indent(indent, "");
2533                 print_exec(b->left, indent, 0);
2534                 if (cast(var, b->left)->var->constant) {
2535                         if (v->where_decl == v->where_set)
2536                                 printf("::%.*s ", v->val.type->name.len,
2537                                        v->val.type->name.txt);
2538                         else
2539                                 printf(" ::");
2540                 } else {
2541                         if (v->where_decl == v->where_set)
2542                                 printf(":%.*s ", v->val.type->name.len,
2543                                        v->val.type->name.txt);
2544                         else
2545                                 printf(" :");
2546                 }
2547                 if (b->right) {
2548                         printf("= ");
2549                         print_exec(b->right, indent, 0);
2550                 }
2551                 if (indent >= 0)
2552                         printf("\n");
2553                 }
2554                 break;
2555
2556 ###### propagate binode cases
2557
2558         case Assign:
2559         case Declare:
2560                 /* Both must match and not be labels,
2561                  * Type must support 'dup',
2562                  * result is Tnone */
2563                 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2564                 if (!b->right)
2565                         return Tnone;
2566
2567                 if (t) {
2568                         if (propagate_types(b->right, c, ok, t, 0) != t)
2569                                 if (b->left->type == Xvar)
2570                                         type_err(c, "info: variable '%v' was set as %1 here.",
2571                                                  cast(var, b->left)->var->where_set, t, rules, Tnone);
2572                 } else {
2573                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2574                         if (t)
2575                                 propagate_types(b->left, c, ok, t, 0);
2576                 }
2577                 if (t && t->dup == NULL) {
2578                         type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
2579                         *ok = 0;
2580                 }
2581                 return Tnone;
2582
2583                 break;
2584
2585 ###### interp binode cases
2586
2587         case Assign:
2588         {
2589                 struct variable *v = cast(var, b->left)->var;
2590                 if (v->merged)
2591                         v = v->merged;
2592                 right = interp_exec(b->right);
2593                 free_value(v->val);
2594                 v->val = right;
2595                 right.type = NULL;
2596                 break;
2597         }
2598
2599         case Declare:
2600         {
2601                 struct variable *v = cast(var, b->left)->var;
2602                 if (v->merged)
2603                         v = v->merged;
2604                 if (b->right)
2605                         right = interp_exec(b->right);
2606                 else
2607                         right = val_init(v->val.type);
2608                 free_value(v->val);
2609                 v->val = right;
2610                 right.type = NULL;
2611                 break;
2612         }
2613
2614 ### The `use` statement
2615
2616 The `use` statement is the last "simple" statement.  It is needed when
2617 the condition in a conditional statement is a block.  `use` works much
2618 like `return` in C, but only completes the `condition`, not the whole
2619 function.
2620
2621 ###### Binode types
2622         Use,
2623
2624 ###### SimpleStatement Grammar
2625         | use Expression ${
2626                 $0 = new_pos(binode, $1);
2627                 $0->op = Use;
2628                 $0->right = $<2;
2629         }$
2630
2631 ###### print binode cases
2632
2633         case Use:
2634                 do_indent(indent, "use ");
2635                 print_exec(b->right, -1, 0);
2636                 if (indent >= 0)
2637                         printf("\n");
2638                 break;
2639
2640 ###### propagate binode cases
2641
2642         case Use:
2643                 /* result matches value */
2644                 return propagate_types(b->right, c, ok, type, 0);
2645
2646 ###### interp binode cases
2647
2648         case Use:
2649                 rv = interp_exec(b->right);
2650                 break;
2651
2652 ### The Conditional Statement
2653
2654 This is the biggy and currently the only complex statement.  This
2655 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2656 It is comprised of a number of parts, all of which are optional though
2657 set combinations apply.  Each part is (usually) a key word (`then` is
2658 sometimes optional) followed by either an expression or a code block,
2659 except the `casepart` which is a "key word and an expression" followed
2660 by a code block.  The code-block option is valid for all parts and,
2661 where an expression is also allowed, the code block can use the `use`
2662 statement to report a value.  If the code block does not report a value
2663 the effect is similar to reporting `True`.
2664
2665 The `else` and `case` parts, as well as `then` when combined with
2666 `if`, can contain a `use` statement which will apply to some
2667 containing conditional statement. `for` parts, `do` parts and `then`
2668 parts used with `for` can never contain a `use`, except in some
2669 subordinate conditional statement.
2670
2671 If there is a `forpart`, it is executed first, only once.
2672 If there is a `dopart`, then it is executed repeatedly providing
2673 always that the `condpart` or `cond`, if present, does not return a non-True
2674 value.  `condpart` can fail to return any value if it simply executes
2675 to completion.  This is treated the same as returning `True`.
2676
2677 If there is a `thenpart` it will be executed whenever the `condpart`
2678 or `cond` returns True (or does not return any value), but this will happen
2679 *after* `dopart` (when present).
2680
2681 If `elsepart` is present it will be executed at most once when the
2682 condition returns `False` or some value that isn't `True` and isn't
2683 matched by any `casepart`.  If there are any `casepart`s, they will be
2684 executed when the condition returns a matching value.
2685
2686 The particular sorts of values allowed in case parts has not yet been
2687 determined in the language design, so nothing is prohibited.
2688
2689 The various blocks in this complex statement potentially provide scope
2690 for variables as described earlier.  Each such block must include the
2691 "OpenScope" nonterminal before parsing the block, and must call
2692 `var_block_close()` when closing the block.
2693
2694 The code following "`if`", "`switch`" and "`for`" does not get its own
2695 scope, but is in a scope covering the whole statement, so names
2696 declared there cannot be redeclared elsewhere.  Similarly the
2697 condition following "`while`" is in a scope the covers the body
2698 ("`do`" part) of the loop, and which does not allow conditional scope
2699 extension.  Code following "`then`" (both looping and non-looping),
2700 "`else`" and "`case`" each get their own local scope.
2701
2702 The type requirements on the code block in a `whilepart` are quite
2703 unusal.  It is allowed to return a value of some identifiable type, in
2704 which case the loop aborts and an appropriate `casepart` is run, or it
2705 can return a Boolean, in which case the loop either continues to the
2706 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2707 This is different both from the `ifpart` code block which is expected to
2708 return a Boolean, or the `switchpart` code block which is expected to
2709 return the same type as the casepart values.  The correct analysis of
2710 the type of the `whilepart` code block is the reason for the
2711 `Rboolok` flag which is passed to `propagate_types()`.
2712
2713 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2714 defined.
2715
2716 ###### exec type
2717         Xcond_statement,
2718
2719 ###### ast
2720         struct casepart {
2721                 struct exec *value;
2722                 struct exec *action;
2723                 struct casepart *next;
2724         };
2725         struct cond_statement {
2726                 struct exec;
2727                 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2728                 struct casepart *casepart;
2729         };
2730
2731 ###### ast functions
2732
2733         static void free_casepart(struct casepart *cp)
2734         {
2735                 while (cp) {
2736                         struct casepart *t;
2737                         free_exec(cp->value);
2738                         free_exec(cp->action);
2739                         t = cp->next;
2740                         free(cp);
2741                         cp = t;
2742                 }
2743         }
2744
2745         static void free_cond_statement(struct cond_statement *s)
2746         {
2747                 if (!s)
2748                         return;
2749                 free_exec(s->forpart);
2750                 free_exec(s->condpart);
2751                 free_exec(s->dopart);
2752                 free_exec(s->thenpart);
2753                 free_exec(s->elsepart);
2754                 free_casepart(s->casepart);
2755                 free(s);
2756         }
2757
2758 ###### free exec cases
2759         case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2760
2761 ###### ComplexStatement Grammar
2762         | CondStatement ${ $0 = $<1; }$
2763
2764 ###### Grammar
2765
2766         $*cond_statement
2767         // both ForThen and Whilepart open scopes, and CondSuffix only
2768         // closes one - so in the first branch here we have another to close.
2769         CondStatement -> ForThen WhilePart CondSuffix ${
2770                         $0 = $<3;
2771                         $0->forpart = $1.forpart; $1.forpart = NULL;
2772                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2773                         $0->condpart = $2.condpart; $2.condpart = NULL;
2774                         $0->dopart = $2.dopart; $2.dopart = NULL;
2775                         var_block_close(config2context(config), CloseSequential);
2776                         }$
2777                 | WhilePart CondSuffix ${
2778                         $0 = $<2;
2779                         $0->condpart = $1.condpart; $1.condpart = NULL;
2780                         $0->dopart = $1.dopart; $1.dopart = NULL;
2781                         }$
2782                 | SwitchPart CondSuffix ${
2783                         $0 = $<2;
2784                         $0->condpart = $<1;
2785                         }$
2786                 | IfPart IfSuffix ${
2787                         $0 = $<2;
2788                         $0->condpart = $1.condpart; $1.condpart = NULL;
2789                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2790                         // This is where we close an "if" statement
2791                         var_block_close(config2context(config), CloseSequential);
2792                         }$
2793
2794         CondSuffix -> IfSuffix ${
2795                         $0 = $<1;
2796                         // This is where we close scope of the whole
2797                         // "for" or "while" statement
2798                         var_block_close(config2context(config), CloseSequential);
2799                 }$
2800                 | CasePart CondSuffix ${
2801                         $0 = $<2;
2802                         $1->next = $0->casepart;
2803                         $0->casepart = $<1;
2804                 }$
2805
2806         $*casepart
2807         CasePart -> Newlines case Expression OpenScope Block ${
2808                         $0 = calloc(1,sizeof(struct casepart));
2809                         $0->value = $<3;
2810                         $0->action = $<5;
2811                         var_block_close(config2context(config), CloseParallel);
2812                 }$
2813                 | case Expression OpenScope Block ${
2814                         $0 = calloc(1,sizeof(struct casepart));
2815                         $0->value = $<2;
2816                         $0->action = $<4;
2817                         var_block_close(config2context(config), CloseParallel);
2818                 }$
2819
2820         $*cond_statement
2821         IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2822                 | Newlines else OpenScope Block ${
2823                         $0 = new(cond_statement);
2824                         $0->elsepart = $<4;
2825                         var_block_close(config2context(config), CloseElse);
2826                 }$
2827                 | else OpenScope Block ${
2828                         $0 = new(cond_statement);
2829                         $0->elsepart = $<3;
2830                         var_block_close(config2context(config), CloseElse);
2831                 }$
2832                 | Newlines else OpenScope CondStatement ${
2833                         $0 = new(cond_statement);
2834                         $0->elsepart = $<4;
2835                         var_block_close(config2context(config), CloseElse);
2836                 }$
2837                 | else OpenScope CondStatement ${
2838                         $0 = new(cond_statement);
2839                         $0->elsepart = $<3;
2840                         var_block_close(config2context(config), CloseElse);
2841                 }$
2842
2843
2844         $*exec
2845         // These scopes are closed in CondSuffix
2846         ForPart -> for OpenScope SimpleStatements ${
2847                         $0 = reorder_bilist($<3);
2848                 }$
2849                 |  for OpenScope Block ${
2850                         $0 = $<3;
2851                 }$
2852
2853         ThenPart -> then OpenScope SimpleStatements ${
2854                         $0 = reorder_bilist($<3);
2855                         var_block_close(config2context(config), CloseSequential);
2856                 }$
2857                 |  then OpenScope Block ${
2858                         $0 = $<3;
2859                         var_block_close(config2context(config), CloseSequential);
2860                 }$
2861
2862         ThenPartNL -> ThenPart OptNL ${
2863                         $0 = $<1;
2864                 }$
2865
2866         // This scope is closed in CondSuffix
2867         WhileHead -> while OpenScope Block ${
2868                 $0 = $<3;
2869                 }$
2870
2871         $cond_statement
2872         ForThen -> ForPart OptNL ThenPartNL ${
2873                         $0.forpart = $<1;
2874                         $0.thenpart = $<3;
2875                 }$
2876                 | ForPart OptNL ${
2877                         $0.forpart = $<1;
2878                 }$
2879
2880         // This scope is closed in CondSuffix
2881         WhilePart -> while OpenScope Expression Block ${
2882                         $0.type = Xcond_statement;
2883                         $0.condpart = $<3;
2884                         $0.dopart = $<4;
2885                 }$
2886                 | WhileHead OptNL do Block ${
2887                         $0.type = Xcond_statement;
2888                         $0.condpart = $<1;
2889                         $0.dopart = $<4;
2890                 }$
2891
2892         IfPart -> if OpenScope Expression OpenScope Block ${
2893                         $0.type = Xcond_statement;
2894                         $0.condpart = $<3;
2895                         $0.thenpart = $<5;
2896                         var_block_close(config2context(config), CloseParallel);
2897                 }$
2898                 | if OpenScope Block OptNL then OpenScope Block ${
2899                         $0.type = Xcond_statement;
2900                         $0.condpart = $<3;
2901                         $0.thenpart = $<7;
2902                         var_block_close(config2context(config), CloseParallel);
2903                 }$
2904
2905         $*exec
2906         // This scope is closed in CondSuffix
2907         SwitchPart -> switch OpenScope Expression ${
2908                         $0 = $<3;
2909                 }$
2910                 | switch OpenScope Block ${
2911                         $0 = $<3;
2912                 }$
2913
2914 ###### print exec cases
2915
2916         case Xcond_statement:
2917         {
2918                 struct cond_statement *cs = cast(cond_statement, e);
2919                 struct casepart *cp;
2920                 if (cs->forpart) {
2921                         do_indent(indent, "for");
2922                         if (bracket) printf(" {\n"); else printf(":\n");
2923                         print_exec(cs->forpart, indent+1, bracket);
2924                         if (cs->thenpart) {
2925                                 if (bracket)
2926                                         do_indent(indent, "} then {\n");
2927                                 else
2928                                         do_indent(indent, "then:\n");
2929                                 print_exec(cs->thenpart, indent+1, bracket);
2930                         }
2931                         if (bracket) do_indent(indent, "}\n");
2932                 }
2933                 if (cs->dopart) {
2934                         // a loop
2935                         if (cs->condpart && cs->condpart->type == Xbinode &&
2936                             cast(binode, cs->condpart)->op == Block) {
2937                                 if (bracket)
2938                                         do_indent(indent, "while {\n");
2939                                 else
2940                                         do_indent(indent, "while:\n");
2941                                 print_exec(cs->condpart, indent+1, bracket);
2942                                 if (bracket)
2943                                         do_indent(indent, "} do {\n");
2944                                 else
2945                                         do_indent(indent, "do:\n");
2946                                 print_exec(cs->dopart, indent+1, bracket);
2947                                 if (bracket)
2948                                         do_indent(indent, "}\n");
2949                         } else {
2950                                 do_indent(indent, "while ");
2951                                 print_exec(cs->condpart, 0, bracket);
2952                                 if (bracket)
2953                                         printf(" {\n");
2954                                 else
2955                                         printf(":\n");
2956                                 print_exec(cs->dopart, indent+1, bracket);
2957                                 if (bracket)
2958                                         do_indent(indent, "}\n");
2959                         }
2960                 } else {
2961                         // a condition
2962                         if (cs->casepart)
2963                                 do_indent(indent, "switch");
2964                         else
2965                                 do_indent(indent, "if");
2966                         if (cs->condpart && cs->condpart->type == Xbinode &&
2967                             cast(binode, cs->condpart)->op == Block) {
2968                                 if (bracket)
2969                                         printf(" {\n");
2970                                 else
2971                                         printf(":\n");
2972                                 print_exec(cs->condpart, indent+1, bracket);
2973                                 if (bracket)
2974                                         do_indent(indent, "}\n");
2975                                 if (cs->thenpart) {
2976                                         do_indent(indent, "then:\n");
2977                                         print_exec(cs->thenpart, indent+1, bracket);
2978                                 }
2979                         } else {
2980                                 printf(" ");
2981                                 print_exec(cs->condpart, 0, bracket);
2982                                 if (cs->thenpart) {
2983                                         if (bracket)
2984                                                 printf(" {\n");
2985                                         else
2986                                                 printf(":\n");
2987                                         print_exec(cs->thenpart, indent+1, bracket);
2988                                         if (bracket)
2989                                                 do_indent(indent, "}\n");
2990                                 } else
2991                                         printf("\n");
2992                         }
2993                 }
2994                 for (cp = cs->casepart; cp; cp = cp->next) {
2995                         do_indent(indent, "case ");
2996                         print_exec(cp->value, -1, 0);
2997                         if (bracket)
2998                                 printf(" {\n");
2999                         else
3000                                 printf(":\n");
3001                         print_exec(cp->action, indent+1, bracket);
3002                         if (bracket)
3003                                 do_indent(indent, "}\n");
3004                 }
3005                 if (cs->elsepart) {
3006                         do_indent(indent, "else");
3007                         if (bracket)
3008                                 printf(" {\n");
3009                         else
3010                                 printf(":\n");
3011                         print_exec(cs->elsepart, indent+1, bracket);
3012                         if (bracket)
3013                                 do_indent(indent, "}\n");
3014                 }
3015                 break;
3016         }
3017
3018 ###### propagate exec cases
3019         case Xcond_statement:
3020         {
3021                 // forpart and dopart must return Tnone
3022                 // thenpart must return Tnone if there is a dopart,
3023                 // otherwise it is like elsepart.
3024                 // condpart must:
3025                 //    be bool if there is no casepart
3026                 //    match casepart->values if there is a switchpart
3027                 //    either be bool or match casepart->value if there
3028                 //             is a whilepart
3029                 // elsepart and casepart->action must match the return type
3030                 //   expected of this statement.
3031                 struct cond_statement *cs = cast(cond_statement, prog);
3032                 struct casepart *cp;
3033
3034                 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3035                 if (!type_compat(Tnone, t, 0))
3036                         *ok = 0;
3037                 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3038                 if (!type_compat(Tnone, t, 0))
3039                         *ok = 0;
3040                 if (cs->dopart) {
3041                         t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3042                         if (!type_compat(Tnone, t, 0))
3043                                 *ok = 0;
3044                 }
3045                 if (cs->casepart == NULL)
3046                         propagate_types(cs->condpart, c, ok, Tbool, 0);
3047                 else {
3048                         /* Condpart must match case values, with bool permitted */
3049                         t = NULL;
3050                         for (cp = cs->casepart;
3051                              cp && !t; cp = cp->next)
3052                                 t = propagate_types(cp->value, c, ok, NULL, 0);
3053                         if (!t && cs->condpart)
3054                                 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3055                         // Now we have a type (I hope) push it down
3056                         if (t) {
3057                                 for (cp = cs->casepart; cp; cp = cp->next)
3058                                         propagate_types(cp->value, c, ok, t, 0);
3059                                 propagate_types(cs->condpart, c, ok, t, Rboolok);
3060                         }
3061                 }
3062                 // (if)then, else, and case parts must return expected type.
3063                 if (!cs->dopart && !type)
3064                         type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3065                 if (!type)
3066                         type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3067                 for (cp = cs->casepart;
3068                      cp && !type;
3069                      cp = cp->next)
3070                         type = propagate_types(cp->action, c, ok, NULL, rules);
3071                 if (type) {
3072                         if (!cs->dopart)
3073                                 propagate_types(cs->thenpart, c, ok, type, rules);
3074                         propagate_types(cs->elsepart, c, ok, type, rules);
3075                         for (cp = cs->casepart; cp ; cp = cp->next)
3076                                 propagate_types(cp->action, c, ok, type, rules);
3077                         return type;
3078                 } else
3079                         return NULL;
3080         }
3081
3082 ###### interp exec cases
3083         case Xcond_statement:
3084         {
3085                 struct value v, cnd;
3086                 struct casepart *cp;
3087                 struct cond_statement *c = cast(cond_statement, e);
3088
3089                 if (c->forpart)
3090                         interp_exec(c->forpart);
3091                 do {
3092                         if (c->condpart)
3093                                 cnd = interp_exec(c->condpart);
3094                         else
3095                                 cnd.type = Tnone;
3096                         if (!(cnd.type == Tnone ||
3097                               (cnd.type == Tbool && cnd.bool != 0)))
3098                                 break;
3099                         // cnd is Tnone or Tbool, doesn't need to be freed
3100                         if (c->dopart)
3101                                 interp_exec(c->dopart);
3102
3103                         if (c->thenpart) {
3104                                 v = interp_exec(c->thenpart);
3105                                 if (v.type != Tnone || !c->dopart)
3106                                         return v;
3107                                 free_value(v);
3108                         }
3109                 } while (c->dopart);
3110
3111                 for (cp = c->casepart; cp; cp = cp->next) {
3112                         v = interp_exec(cp->value);
3113                         if (value_cmp(v, cnd) == 0) {
3114                                 free_value(v);
3115                                 free_value(cnd);
3116                                 return interp_exec(cp->action);
3117                         }
3118                         free_value(v);
3119                 }
3120                 free_value(cnd);
3121                 if (c->elsepart)
3122                         return interp_exec(c->elsepart);
3123                 v.type = Tnone;
3124                 return v;
3125         }
3126
3127 ### Finally the whole program.
3128
3129 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3130 the keyword "program" and a list of variable names which are assigned
3131 values from command line arguments.  Following this is a `block` which
3132 is the code to execute.
3133
3134 As this is the top level, several things are handled a bit
3135 differently.
3136 The whole program is not interpreted by `interp_exec` as that isn't
3137 passed the argument list which the program requires.  Similarly type
3138 analysis is a bit more interesting at this level.
3139
3140 ###### Binode types
3141         Program,
3142
3143 ###### Parser: grammar
3144
3145         $*binode
3146         Program -> program OpenScope Varlist Block OptNL ${
3147                 $0 = new(binode);
3148                 $0->op = Program;
3149                 $0->left = reorder_bilist($<3);
3150                 $0->right = $<4;
3151                 var_block_close(config2context(config), CloseSequential);
3152                 if (config2context(config)->scope_stack) abort();
3153                 }$
3154                 | ERROR ${
3155                         tok_err(config2context(config),
3156                                 "error: unhandled parse error", &$1);
3157                 }$
3158
3159         Varlist -> Varlist ArgDecl ${
3160                         $0 = new(binode);
3161                         $0->op = Program;
3162                         $0->left = $<1;
3163                         $0->right = $<2;
3164                 }$
3165                 | ${ $0 = NULL; }$
3166
3167         $*var
3168         ArgDecl -> IDENTIFIER ${ {
3169                 struct variable *v = var_decl(config2context(config), $1.txt);
3170                 $0 = new(var);
3171                 $0->var = v;
3172         } }$
3173
3174         ## Grammar
3175
3176 ###### print binode cases
3177         case Program:
3178                 do_indent(indent, "program");
3179                 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3180                         printf(" ");
3181                         print_exec(b2->left, 0, 0);
3182                 }
3183                 if (bracket)
3184                         printf(" {\n");
3185                 else
3186                         printf(":\n");
3187                 print_exec(b->right, indent+1, bracket);
3188                 if (bracket)
3189                         do_indent(indent, "}\n");
3190                 break;
3191
3192 ###### propagate binode cases
3193         case Program: abort();
3194
3195 ###### core functions
3196
3197         static int analyse_prog(struct exec *prog, struct parse_context *c)
3198         {
3199                 struct binode *b = cast(binode, prog);
3200                 int ok = 1;
3201
3202                 if (!b)
3203                         return 0;
3204                 do {
3205                         ok = 1;
3206                         propagate_types(b->right, c, &ok, Tnone, 0);
3207                 } while (ok == 2);
3208                 if (!ok)
3209                         return 0;
3210
3211                 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3212                         struct var *v = cast(var, b->left);
3213                         if (!v->var->val.type) {
3214                                 v->var->where_set = b;
3215                                 v->var->val = val_prepare(Tstr);
3216                         }
3217                 }
3218                 b = cast(binode, prog);
3219                 do {
3220                         ok = 1;
3221                         propagate_types(b->right, c, &ok, Tnone, 0);
3222                 } while (ok == 2);
3223                 if (!ok)
3224                         return 0;
3225
3226                 /* Make sure everything is still consistent */
3227                 propagate_types(b->right, c, &ok, Tnone, 0);
3228                 return !!ok;
3229         }
3230
3231         static void interp_prog(struct exec *prog, char **argv)
3232         {
3233                 struct binode *p = cast(binode, prog);
3234                 struct binode *al;
3235                 struct value v;
3236
3237                 if (!prog)
3238                         return;
3239                 al = cast(binode, p->left);
3240                 while (al) {
3241                         struct var *v = cast(var, al->left);
3242                         struct value *vl = &v->var->val;
3243
3244                         if (argv[0] == NULL) {
3245                                 printf("Not enough args\n");
3246                                 exit(1);
3247                         }
3248                         al = cast(binode, al->right);
3249                         free_value(*vl);
3250                         *vl = parse_value(vl->type, argv[0]);
3251                         if (vl->type == NULL)
3252                                 exit(1);
3253                         argv++;
3254                 }
3255                 v = interp_exec(p->right);
3256                 free_value(v);
3257         }
3258
3259 ###### interp binode cases
3260         case Program: abort();
3261
3262 ## And now to test it out.
3263
3264 Having a language requires having a "hello world" program. I'll
3265 provide a little more than that: a program that prints "Hello world"
3266 finds the GCD of two numbers, prints the first few elements of
3267 Fibonacci, and performs a binary search for a number.
3268
3269 ###### File: oceani.mk
3270         tests :: sayhello
3271         sayhello : oceani
3272                 @echo "===== TEST ====="
3273                 ./oceani --section "test: hello" oceani.mdc 55 33
3274
3275 ###### test: hello
3276
3277         program A B:
3278                 print "Hello World, what lovely oceans you have!"
3279                 /* When a variable is defined in both branches of an 'if',
3280                  * and used afterwards, the variables are merged.
3281                  */
3282                 if A > B:
3283                         bigger := "yes"
3284                 else:
3285                         bigger := "no"
3286                 print "Is", A, "bigger than", B,"? ", bigger
3287                 /* If a variable is not used after the 'if', no
3288                  * merge happens, so types can be different
3289                  */
3290                 if A * 2 > B:
3291                         double:string = "yes"
3292                         print A, "is more than twice", B, "?", double
3293                 else:
3294                         double := A*2
3295                         print "double", A, "is only", double
3296
3297                 a : number
3298                 a = A;
3299                 b:number = B
3300                 if a > 0 and b > 0:
3301                         while a != b:
3302                                 if a < b:
3303                                         b = b - a
3304                                 else:
3305                                         a = a - b
3306                         print "GCD of", A, "and", B,"is", a
3307                 else if a <= 0:
3308                         print a, "is not positive, cannot calculate GCD"
3309                 else:
3310                         print b, "is not positive, cannot calculate GCD"
3311
3312                 for:
3313                         togo := 10
3314                         f1 := 1; f2 := 1
3315                         print "Fibonacci:", f1,f2,
3316                 then togo = togo - 1
3317                 while togo > 0:
3318                         f3 := f1 + f2
3319                         print "", f3,
3320                         f1 = f2
3321                         f2 = f3
3322                 print ""
3323
3324                 /* Binary search... */
3325                 for:
3326                         lo:= 0; hi := 100
3327                         target := 77
3328                 while:
3329                         mid := (lo + hi) / 2
3330                         if mid == target:
3331                                 use Found
3332                         if mid < target:
3333                                 lo = mid
3334                         else:
3335                                 hi = mid
3336                         if hi - lo < 1:
3337                                 use GiveUp
3338                         use True
3339                 do: pass
3340                 case Found:
3341                         print "Yay, I found", target
3342                 case GiveUp:
3343                         print "Closest I found was", mid