ocean-lang.org Git - ocean/blob - csrc/oceani.mdc

   1 # Ocean Interpreter - Stoney Creek version
   2
   3 Ocean is intended to be a compiled language, so this interpreter is
   4 not targeted at being the final product.  It is, rather, an intermediate
   5 stage and fills that role in two distinct ways.
   6
   7 Firstly, it exists as a platform to experiment with the early language
   8 design.  An interpreter is easy to write and easy to get working, so
   9 the barrier for entry is lower if I aim to start with an interpreter.
  10
  11 Secondly, the plan for the Ocean compiler is to write it in the
  12 [Ocean language](http://ocean-lang.org).  To achieve this we naturally
  13 need some sort of boot-strap process and this interpreter - written in
  14 portable C - will fill that role.  It will be used to bootstrap the
  15 Ocean compiler.
  16
  17 Two features that are not needed to fill either of these roles are
  18 performance and completeness.  The interpreter only needs to be fast
  19 enough to run small test programs and occasionally to run the compiler
  20 on itself.  It only needs to be complete enough to test aspects of the
  21 design which are developed before the compiler is working, and to run
  22 the compiler on itself.  Any features not used by the compiler when
  23 compiling itself are superfluous.  They may be included anyway, but
  24 they may not.
  25
  26 Nonetheless, the interpreter should end up being reasonably complete,
  27 and any performance bottlenecks which appear and are easily fixed, will
  28 be.
  29
  30 ## Current version
  31
  32 This second version of the interpreter exists to test out the
  33 structured statement providing conditions and iteration, and simple
  34 variable scoping.  Clearly we need some minimal other functionality so
  35 that values can be tested and instructions iterated over.  All that
  36 functionality is clearly not normative at this stage (not that
  37 anything is **really** normative yet) and will change, so early test
  38 code will certainly break in later versions.
  39
  40 The under-test parts of the language are:
  41
  42  - conditional/looping structured statements
  43  - the `use` statement which is needed for that
  44  - Variable binding using ":=" and "::=", and assignment using "=".
  45
  46 Elements which are present to make a usable language are:
  47
  48  - "blocks" of multiple statements.
  49  - `pass`: a statement which does nothing.
  50  - expressions: `+`, `-`, `*`, `/` can apply to numbers and `++` can
  51    catenate strings.  `and`, `or`, `not` manipulate Booleans, and
  52    normal comparison operators can work on all three types.
  53  - `print`: will print the values in a list of expressions.
  54  - `program`: is given a list of identifiers to initialize from
  55    arguments.
  56
  57 ## Naming
  58
  59 Versions of the interpreter which obviously do not support a complete
  60 language will be named after creeks and streams.  This one is Stoney
  61 Creek.
  62
  63 Once we have something reasonably resembling a complete language, the
  64 names of rivers will be used.
  65 Early versions of the compiler will be named after seas.  Major
  66 releases of the compiler will be named after oceans.  Hopefully I will
  67 be finished once I get to the Pacific Ocean release.
  68
  69 ## Outline
  70
  71 As well as parsing and executing a program, the interpreter can print
  72 out the program from the parsed internal structure.  This is useful
  73 for validating the parsing.
  74 So the main requirements of the interpreter are:
  75
  76 - Parse the program, possibly with tracing,
  77 - Analyse the parsed program to ensure consistency,
  78 - Print the program,
  79 - Execute the program.
  80
  81 This is all performed by a single C program extracted with
  82 `parsergen`.
  83
  84 There will be two formats for printing the program: a default and one
  85 that uses bracketing.  So a `--bracket` command line option is needed
  86 for that.  Normally the first code section found is used, however an
  87 alternate section can be requested so that a file (such as this one)
  88 can contain multiple programs This is effected with the `--section`
  89 option.
  90
  91 This code must be compiled with `-fplan9-extensions` so that anonymous
  92 structures can be used.
  93
  94 ###### File: oceani.mk
  95
  96         myCFLAGS := -Wall -g -fplan9-extensions
  97         CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
  98         myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
  99         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 100         ## libs
 101         all :: $(LDLIBS) oceani
 102         oceani.c oceani.h : oceani.mdc parsergen
 103                 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
 104         oceani.mk: oceani.mdc md2c
 105                 ./md2c oceani.mdc
 106
 107         oceani: oceani.o $(LDLIBS)
 108                 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
 109
 110 ###### Parser: header
 111         ## macros
 112         ## ast
 113         struct parse_context {
 114                 struct token_config config;
 115                 char *file_name;
 116                 int parse_error;
 117                 ## parse context
 118         };
 119
 120 ###### macros
 121
 122         #define container_of(ptr, type, member) ({                      \
 123                 const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
 124                 (type *)( (char *)__mptr - offsetof(type,member) );})
 125
 126         #define config2context(_conf) container_of(_conf, struct parse_context, \
 127                 config)
 128
 129 ###### Parser: code
 130
 131         #include <unistd.h>
 132         #include <stdlib.h>
 133         #include <fcntl.h>
 134         #include <errno.h>
 135         #include <sys/mman.h>
 136         #include <string.h>
 137         #include <stdio.h>
 138         #include <locale.h>
 139         #include <malloc.h>
 140         #include "mdcode.h"
 141         #include "scanner.h"
 142         #include "parser.h"
 143
 144         ## includes
 145
 146         #include "oceani.h"
 147
 148         ## forward decls
 149         ## value functions
 150         ## ast functions
 151         ## core functions
 152
 153         #include <getopt.h>
 154         static char Usage[] = "Usage: oceani --trace --print --noexec --brackets"
 155                               "--section=SectionName prog.ocn\n";
 156         static const struct option long_options[] = {
 157                 {"trace",     0, NULL, 't'},
 158                 {"print",     0, NULL, 'p'},
 159                 {"noexec",    0, NULL, 'n'},
 160                 {"brackets",  0, NULL, 'b'},
 161                 {"section",   1, NULL, 's'},
 162                 {NULL,        0, NULL, 0},
 163         };
 164         const char *options = "tpnbs";
 165         int main(int argc, char *argv[])
 166         {
 167                 int fd;
 168                 int len;
 169                 char *file;
 170                 struct section *s;
 171                 char *section = NULL;
 172                 struct parse_context context = {
 173                         .config = {
 174                                 .ignored = (1 << TK_line_comment)
 175                                          | (1 << TK_block_comment),
 176                                 .number_chars = ".,_+-",
 177                                 .word_start = "_",
 178                                 .word_cont = "_",
 179                         },
 180                 };
 181                 int doprint=0, dotrace=0, doexec=1, brackets=0;
 182                 struct exec **prog;
 183                 int opt;
 184                 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
 185                        != -1) {
 186                         switch(opt) {
 187                         case 't': dotrace=1; break;
 188                         case 'p': doprint=1; break;
 189                         case 'n': doexec=0; break;
 190                         case 'b': brackets=1; break;
 191                         case 's': section = optarg; break;
 192                         default: fprintf(stderr, Usage);
 193                                 exit(1);
 194                         }
 195                 }
 196                 if (optind >= argc) {
 197                         fprintf(stderr, "oceani: no input file given\n");
 198                         exit(1);
 199                 }
 200                 fd = open(argv[optind], O_RDONLY);
 201                 if (fd < 0) {
 202                         fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
 203                         exit(1);
 204                 }
 205                 context.file_name = argv[optind];
 206                 len = lseek(fd, 0, 2);
 207                 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
 208                 s = code_extract(file, file+len, NULL);
 209                 if (!s) {
 210                         fprintf(stderr, "oceani: could not find any code in %s\n",
 211                                 argv[optind]);
 212                         exit(1);
 213                 }
 214
 215                 ## context initialization
 216
 217                 if (section) {
 218                         struct section *ss;
 219                         for (ss = s; ss; ss = ss->next) {
 220                                 struct text sec = ss->section;
 221                                 if (sec.len == strlen(section) &&
 222                                     strncmp(sec.txt, section, sec.len) == 0)
 223                                         break;
 224                         }
 225                         if (ss)
 226                                 prog = parse_oceani(ss->code, &context.config,
 227                                                     dotrace ? stderr : NULL);
 228                         else {
 229                                 fprintf(stderr, "oceani: cannot find section %s\n",
 230                                         section);
 231                                 exit(1);
 232                         }
 233                 } else
 234                         prog = parse_oceani(s->code, &context.config,
 235                                     dotrace ? stderr : NULL);
 236                 if (!prog) {
 237                         fprintf(stderr, "oceani: fatal parser error.\n");
 238                         context.parse_error = 1;
 239                 }
 240                 if (prog && doprint)
 241                         print_exec(*prog, 0, brackets);
 242                 if (prog && doexec && !context.parse_error) {
 243                         if (!analyse_prog(*prog, &context)) {
 244                                 fprintf(stderr, "oceani: type error in program - not running.\n");
 245                                 exit(1);
 246                         }
 247                         interp_prog(*prog, argv+optind+1);
 248                 }
 249                 if (prog) {
 250                         free_exec(*prog);
 251                         free(prog);
 252                 }
 253                 while (s) {
 254                         struct section *t = s->next;
 255                         code_free(s->code);
 256                         free(s);
 257                         s = t;
 258                 }
 259                 ## free context vars
 260                 ## free context types
 261                 exit(context.parse_error ? 1 : 0);
 262         }
 263
 264 ### Analysis
 265
 266 The four requirements of parse, analyse, print, interpret apply to
 267 each language element individually so that is how most of the code
 268 will be structured.
 269
 270 Three of the four are fairly self explanatory.  The one that requires
 271 a little explanation is the analysis step.
 272
 273 The current language design does not require the types of variables to
 274 be declared, but they must still have a single type.  Different
 275 operations impose different requirements on the variables, for example
 276 addition requires both arguments to be numeric, and assignment
 277 requires the variable on the left to have the same type as the
 278 expression on the right.
 279
 280 Analysis involves propagating these type requirements around and
 281 consequently setting the type of each variable.  If any requirements
 282 are violated (e.g. a string is compared with a number) or if a
 283 variable needs to have two different types, then an error is raised
 284 and the program will not run.
 285
 286 If the same variable is declared in both branchs of an 'if/else', or
 287 in all cases of a 'switch' then the multiple instances may be merged
 288 into just one variable if the variable is references after the
 289 conditional statement.  When this happens, the types must naturally be
 290 consistent across all the branches.  When the variable is not used
 291 outside the if, the variables in the different branches are distinct
 292 and can be of different types.
 293
 294 Determining the types of all variables early is important for
 295 processing command line arguments.  These can be assigned to any type
 296 of variable, but we must first know the correct type so any required
 297 conversion can happen.  If a variable is associated with a command
 298 line argument but no type can be interpreted (e.g. the variable is
 299 only ever used in a `print` statement), then the type is set to
 300 'string'.
 301
 302 Undeclared names may only appear in "use" statements and "case" expressions.
 303 These names are given a type of "label" and a unique value.
 304 This allows them to fill the role of a name in an enumerated type, which
 305 is useful for testing the `switch` statement.
 306
 307 As we will see, the condition part of a `while` statement can return
 308 either a Boolean or some other type.  This requires that the expect
 309 type that gets passed around comprises a type (`enum vtype`) and a
 310 flag to indicate that `Vbool` is also permitted.
 311
 312 As there are, as yet, no distinct types that are compatible, there
 313 isn't much subtlety in the analysis.  When we have distinct number
 314 types, this will become more interesting.
 315
 316 #### Error reporting
 317
 318 When analysis discovers an inconsistency it needs to report an error;
 319 just refusing to run the code ensures that the error doesn't cascade,
 320 but by itself it isn't very useful.  A clear understand of the sort of
 321 error message that are useful will help guide the process of analysis.
 322
 323 At a simplistic level, the only sort of error that type analysis can
 324 report is that the type of some construct doesn't match a contextual
 325 requirement.  For example, in `4 + "hello"` the addition provides a
 326 contextual requirement for numbers, but `"hello"` is not a number.  In
 327 this particular example no further information is needed as the types
 328 are obvious from local information.  When a variable is involved that
 329 isn't the case.  It may be helpful to explain why the variable has a
 330 particular type, by indicating the location where the type was set,
 331 whether by declaration or usage.
 332
 333 Using a recursive-descent analysis we can easily detect a problem at
 334 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
 335 will detect that one argument is not a number and the usage of `hello`
 336 will detect that a number was wanted, but not provided.  In this
 337 (early) version of the language, we will generate error reports at
 338 multiple locations, so the use of `hello` will report an error and
 339 explain were the value was set, and the addition will report an error
 340 and say why numbers are needed.  To be able to report locations for
 341 errors, each language element will need to record a file location
 342 (line and column) and each variable will need to record the language
 343 element where its type was set.  For now we will assume that each line
 344 of an error message indicates one location in the file, and up to 2
 345 types.  So we provide a `printf`-like function which takes a format, a
 346 language (a `struct exec` which has not yet been introduced), and 2
 347 types. "`%1`" reports the first type, "`%2`" reports the second.  We
 348 will need a function to print the location, once we know how that is
 349 stored.  As will be explained later, there are sometimes extra rules for
 350 type matching and they might affect error messages, we need to pass those
 351 in too.
 352
 353 As well as type errors, we sometimes need to report problems with
 354 tokens, which might be unexpected or might name a type that has not
 355 been defined.  For these we have `tok_err()` which reports an error
 356 with a given token.  Each of the error functions sets the flag in the
 357 context so indicate that parsing failed.
 358
 359 ###### forward decls
 360
 361         static void fput_loc(struct exec *loc, FILE *f);
 362
 363 ###### core functions
 364
 365         static void type_err(struct parse_context *c,
 366                              char *fmt, struct exec *loc,
 367                              struct type *t1, int rules, struct type *t2)
 368         {
 369                 fprintf(stderr, "%s:", c->file_name);
 370                 fput_loc(loc, stderr);
 371                 for (; *fmt ; fmt++) {
 372                         if (*fmt != '%') {
 373                                 fputc(*fmt, stderr);
 374                                 continue;
 375                         }
 376                         fmt++;
 377                         switch (*fmt) {
 378                         case '%': fputc(*fmt, stderr); break;
 379                         default: fputc('?', stderr); break;
 380                         case '1':
 381                                 if (t1)
 382                                         fprintf(stderr, "%.*s", t1->name.len, t1->name.txt);
 383                                 else
 384                                         fputs("*unknown*", stderr);
 385                                 break;
 386                         case '2':
 387                                 if (t2)
 388                                         fprintf(stderr, "%.*s", t2->name.len, t2->name.txt);
 389                                 else
 390                                         fputs("*unknown*", stderr);
 391                                 break;
 392                                 break;
 393                         ## format cases
 394                         }
 395                 }
 396                 fputs("\n", stderr);
 397                 c->parse_error = 1;
 398         }
 399
 400         static void tok_err(struct parse_context *c, char *fmt, struct token *t)
 401         {
 402                 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
 403                         t->txt.len, t->txt.txt);
 404                 c->parse_error = 1;
 405         }
 406
 407 ## Data Structures
 408
 409 One last introductory step before detailing the language elements and
 410 providing their four requirements is to establish the data structures
 411 to store these elements.
 412
 413 There are two key objects that we need to work with: executable
 414 elements which comprise the program, and values which the program
 415 works with.  Between these are the variables in their various scopes
 416 which hold the values, and types which classify the values stored and
 417 manipulatd by executables.
 418
 419 ### Types
 420
 421 Values come in a wide range of types, with more likely to be added.
 422 Each type needs to be able to parse and print its own values (for
 423 convenience at least) as well as to compare two values, at least for
 424 equality and possibly for order.  For now, values might need to be
 425 duplicated and freed, though eventually such manipulations will be
 426 better integrated into the language.
 427
 428 Rather than requiring every numeric type to support all numeric
 429 operations (add, multiple, etc), we allow types to be able to present
 430 as one of a few standard types: integer, float, and fraction.  The
 431 existance of these conversion functions enable types to determine if
 432 they are compatible with other types.
 433
 434 Named type are stored in a simple linked list.  Objects of each type are "values"
 435 which are often passed around by value.
 436
 437 ###### ast
 438
 439         struct value {
 440                 struct type *type;
 441                 union {
 442                         ## value union fields
 443                 };
 444         };
 445
 446         struct type {
 447                 struct text name;
 448                 struct type *next;
 449                 struct value (*init)(struct type *type);
 450                 struct value (*prepare)(struct type *type);
 451                 struct value (*parse)(struct type *type, char *str);
 452                 void (*print)(struct value val);
 453                 int (*cmp_order)(struct value v1, struct value v2);
 454                 int (*cmp_eq)(struct value v1, struct value v2);
 455                 struct value (*dup)(struct value val);
 456                 void (*free)(struct value val);
 457                 struct type *(*compat)(struct type *this, struct type *other);
 458                 long long (*to_int)(struct value *v);
 459                 double (*to_float)(struct value *v);
 460                 int (*to_mpq)(mpq_t *q, struct value *v);
 461                 union {
 462                         ## type union fields
 463                 };
 464         };
 465
 466 ###### parse context
 467
 468         struct type *typelist;
 469
 470 ###### ast functions
 471
 472         static struct type *find_type(struct parse_context *c, struct text s)
 473         {
 474                 struct type *l = c->typelist;
 475
 476                 while (l &&
 477                        text_cmp(l->name, s) != 0)
 478                                 l = l->next;
 479                 return l;
 480         }
 481
 482         static struct type *add_type(struct parse_context *c, struct text s,
 483                                      struct type *proto)
 484         {
 485                 struct type *n;
 486
 487                 n = calloc(1, sizeof(*n));
 488                 *n = *proto;
 489                 n->name = s;
 490                 n->next = c->typelist;
 491                 c->typelist = n;
 492                 return n;
 493         }
 494
 495         static void free_type(struct type *t)
 496         {
 497                 /* The type is always a reference to something in the
 498                  * context, so we don't need to free anything.
 499                  */
 500         }
 501
 502         static void free_value(struct value v)
 503         {
 504                 if (v.type)
 505                         v.type->free(v);
 506         }
 507
 508         static struct value val_prepare(struct type *type)
 509         {
 510                 struct value rv;
 511
 512                 if (type)
 513                         return type->prepare(type);
 514                 rv.type = type;
 515                 return rv;
 516         }
 517
 518         static struct value val_init(struct type *type)
 519         {
 520                 struct value rv;
 521
 522                 if (type)
 523                         return type->init(type);
 524                 rv.type = type;
 525                 return rv;
 526         }
 527
 528         static struct value dup_value(struct value v)
 529         {
 530                 if (v.type)
 531                         return v.type->dup(v);
 532                 return v;
 533         }
 534
 535         static int value_cmp(struct value left, struct value right)
 536         {
 537                 if (left.type && left.type->cmp_order)
 538                         return left.type->cmp_order(left, right);
 539                 if (left.type && left.type->cmp_eq)
 540                         return left.type->cmp_eq(left, right);
 541                 return -1;
 542         }
 543
 544         static void print_value(struct value v)
 545         {
 546                 if (v.type && v.type->print)
 547                         v.type->print(v);
 548                 else
 549                         printf("*Unknown*");
 550         }
 551
 552         static struct value parse_value(struct type *type, char *arg)
 553         {
 554                 struct value rv;
 555
 556                 if (type && type->parse)
 557                         return type->parse(type, arg);
 558                 rv.type = NULL;
 559                 return rv;
 560         }
 561
 562 ###### free context types
 563
 564         while (context.typelist) {
 565                 struct type *t = context.typelist;
 566
 567                 context.typelist = t->next;
 568                 free(t);
 569         }
 570
 571 #### Base Types
 572
 573 Values of the base types can be numbers, which we represent as
 574 multi-precision fractions, strings, Booleans and labels.  When
 575 analysing the program we also need to allow for places where no value
 576 is meaningful (type `Tnone`) and where we don't know what type to
 577 expect yet (type is `NULL`).
 578
 579 Values are never shared, they are always copied when used, and freed
 580 when no longer needed.
 581
 582 When propagating type information around the program, we need to
 583 determine if two types are compatible, where type `NULL` is compatible
 584 with anything.  There are two special cases with type compatibility,
 585 both related to the Conditional Statement which will be described
 586 later.  In some cases a Boolean can be accepted as well as some other
 587 primary type, and in others any type is acceptable except a label (`Vlabel`).
 588 A separate function encode these cases will simplify some code later.
 589
 590 When assigning command line arguments to variables, we need to be able
 591 to parse each type from a string.
 592
 593 ###### includes
 594         #include <gmp.h>
 595         #include "string.h"
 596         #include "number.h"
 597
 598 ###### libs
 599         myLDLIBS := libnumber.o libstring.o -lgmp
 600         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 601
 602 ###### type union fields
 603         enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
 604
 605 ###### value union fields
 606         struct text str;
 607         mpq_t num;
 608         int bool;
 609         void *label;
 610
 611 ###### ast functions
 612         static void _free_value(struct value v)
 613         {
 614                 switch (v.type->vtype) {
 615                 case Vnone: break;
 616                 case Vstr: free(v.str.txt); break;
 617                 case Vnum: mpq_clear(v.num); break;
 618                 case Vlabel:
 619                 case Vbool: break;
 620                 }
 621         }
 622
 623         static int vtype_compat(struct type *require, struct type *have, int rules)
 624         {
 625                 if ((rules & Rboolok) && have == Tbool)
 626                         return 1;
 627                 if ((rules & Rnolabel) && have == Tlabel)
 628                         return 0;
 629                 if (!require || !have)
 630                         return 1;
 631
 632                 return require == have;
 633         }
 634
 635 ###### value functions
 636
 637         static struct value _val_prepare(struct type *type)
 638         {
 639                 struct value rv;
 640
 641                 rv.type = type;
 642                 switch(type->vtype) {
 643                 case Vnone:
 644                         break;
 645                 case Vnum:
 646                         memset(&rv.num, 0, sizeof(rv.num));
 647                         break;
 648                 case Vstr:
 649                         rv.str.txt = NULL;
 650                         rv.str.len = 0;
 651                         break;
 652                 case Vbool:
 653                         rv.bool = 0;
 654                         break;
 655                 case Vlabel:
 656                         rv.label = NULL;
 657                         break;
 658                 }
 659                 return rv;
 660         }
 661
 662         static struct value _val_init(struct type *type)
 663         {
 664                 struct value rv;
 665
 666                 rv.type = type;
 667                 switch(type->vtype) {
 668                 case Vnone:
 669                         break;
 670                 case Vnum:
 671                         mpq_init(rv.num); break;
 672                 case Vstr:
 673                         rv.str.txt = malloc(1);
 674                         rv.str.len = 0;
 675                         break;
 676                 case Vbool:
 677                         rv.bool = 0;
 678                         break;
 679                 case Vlabel:
 680                         rv.label = NULL;
 681                         break;
 682                 }
 683                 return rv;
 684         }
 685
 686         static struct value _dup_value(struct value v)
 687         {
 688                 struct value rv;
 689                 rv.type = v.type;
 690                 switch (rv.type->vtype) {
 691                 case Vnone:
 692                         break;
 693                 case Vlabel:
 694                         rv.label = v.label;
 695                         break;
 696                 case Vbool:
 697                         rv.bool = v.bool;
 698                         break;
 699                 case Vnum:
 700                         mpq_init(rv.num);
 701                         mpq_set(rv.num, v.num);
 702                         break;
 703                 case Vstr:
 704                         rv.str.len = v.str.len;
 705                         rv.str.txt = malloc(rv.str.len);
 706                         memcpy(rv.str.txt, v.str.txt, v.str.len);
 707                         break;
 708                 }
 709                 return rv;
 710         }
 711
 712         static int _value_cmp(struct value left, struct value right)
 713         {
 714                 int cmp;
 715                 if (left.type != right.type)
 716                         return left.type - right.type;
 717                 switch (left.type->vtype) {
 718                 case Vlabel: cmp = left.label == right.label ? 0 : 1; break;
 719                 case Vnum: cmp = mpq_cmp(left.num, right.num); break;
 720                 case Vstr: cmp = text_cmp(left.str, right.str); break;
 721                 case Vbool: cmp = left.bool - right.bool; break;
 722                 case Vnone: cmp = 0;
 723                 }
 724                 return cmp;
 725         }
 726
 727         static void _print_value(struct value v)
 728         {
 729                 switch (v.type->vtype) {
 730                 case Vnone:
 731                         printf("*no-value*"); break;
 732                 case Vlabel:
 733                         printf("*label-%p*", v.label); break;
 734                 case Vstr:
 735                         printf("%.*s", v.str.len, v.str.txt); break;
 736                 case Vbool:
 737                         printf("%s", v.bool ? "True":"False"); break;
 738                 case Vnum:
 739                         {
 740                         mpf_t fl;
 741                         mpf_init2(fl, 20);
 742                         mpf_set_q(fl, v.num);
 743                         gmp_printf("%Fg", fl);
 744                         mpf_clear(fl);
 745                         break;
 746                         }
 747                 }
 748         }
 749
 750         static struct value _parse_value(struct type *type, char *arg)
 751         {
 752                 struct value val;
 753                 struct text tx;
 754                 int neg = 0;
 755                 char tail[3] = "";
 756
 757                 val.type = type;
 758                 switch(type->vtype) {
 759                 case Vlabel:
 760                 case Vnone:
 761                         val.type = NULL;
 762                         break;
 763                 case Vstr:
 764                         val.str.len = strlen(arg);
 765                         val.str.txt = malloc(val.str.len);
 766                         memcpy(val.str.txt, arg, val.str.len);
 767                         break;
 768                 case Vnum:
 769                         if (*arg == '-') {
 770                                 neg = 1;
 771                                 arg++;
 772                         }
 773                         tx.txt = arg; tx.len = strlen(tx.txt);
 774                         if (number_parse(val.num, tail, tx) == 0)
 775                                 mpq_init(val.num);
 776                         else if (neg)
 777                                 mpq_neg(val.num, val.num);
 778                         if (tail[0]) {
 779                                 printf("Unsupported suffix: %s\n", arg);
 780                                 val.type = NULL;
 781                         }
 782                         break;
 783                 case Vbool:
 784                         if (strcasecmp(arg, "true") == 0 ||
 785                             strcmp(arg, "1") == 0)
 786                                 val.bool = 1;
 787                         else if (strcasecmp(arg, "false") == 0 ||
 788                                  strcmp(arg, "0") == 0)
 789                                 val.bool = 0;
 790                         else {
 791                                 printf("Bad bool: %s\n", arg);
 792                                 val.type = NULL;
 793                         }
 794                         break;
 795                 }
 796                 return val;
 797         }
 798
 799         static void _free_value(struct value v);
 800
 801         static struct type base_prototype = {
 802                 .init = _val_init,
 803                 .prepare = _val_prepare,
 804                 .parse = _parse_value,
 805                 .print = _print_value,
 806                 .cmp_order = _value_cmp,
 807                 .cmp_eq = _value_cmp,
 808                 .dup = _dup_value,
 809                 .free = _free_value,
 810         };
 811
 812         static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
 813
 814 ###### ast functions
 815         static struct type *add_base_type(struct parse_context *c, char *n, enum vtype vt)
 816         {
 817                 struct text txt = { n, strlen(n) };
 818                 struct type *t;
 819
 820                 t = add_type(c, txt, &base_prototype);
 821                 t->vtype = vt;
 822                 return t;
 823         }
 824
 825 ###### context initialization
 826
 827         Tbool  = add_base_type(&context, "Boolean", Vbool);
 828         Tstr   = add_base_type(&context, "string", Vstr);
 829         Tnum   = add_base_type(&context, "number", Vnum);
 830         Tnone  = add_base_type(&context, "none", Vnone);
 831         Tlabel = add_base_type(&context, "label", Vlabel);
 832
 833 ### Variables
 834
 835 Variables are scoped named values.  We store the names in a linked
 836 list of "bindings" sorted lexically, and use sequential search and
 837 insertion sort.
 838
 839 ###### ast
 840
 841         struct binding {
 842                 struct text name;
 843                 struct binding *next;   // in lexical order
 844                 ## binding fields
 845         };
 846
 847 This linked list is stored in the parse context so that "reduce"
 848 functions can find or add variables, and so the analysis phase can
 849 ensure that every variable gets a type.
 850
 851 ###### parse context
 852
 853         struct binding *varlist;  // In lexical order
 854
 855 ###### ast functions
 856
 857         static struct binding *find_binding(struct parse_context *c, struct text s)
 858         {
 859                 struct binding **l = &c->varlist;
 860                 struct binding *n;
 861                 int cmp = 1;
 862
 863                 while (*l &&
 864                         (cmp = text_cmp((*l)->name, s)) < 0)
 865                                 l = & (*l)->next;
 866                 if (cmp == 0)
 867                         return *l;
 868                 n = calloc(1, sizeof(*n));
 869                 n->name = s;
 870                 n->next = *l;
 871                 *l = n;
 872                 return n;
 873         }
 874
 875 Each name can be linked to multiple variables defined in different
 876 scopes.  Each scope starts where the name is declared and continues
 877 until the end of the containing code block.  Scopes of a given name
 878 cannot nest, so a declaration while a name is in-scope is an error.
 879
 880 ###### binding fields
 881         struct variable *var;
 882
 883 ###### ast
 884         struct variable {
 885                 struct variable *previous;
 886                 struct value val;
 887                 struct binding *name;
 888                 struct exec *where_decl;// where name was declared
 889                 struct exec *where_set; // where type was set
 890                 ## variable fields
 891         };
 892
 893 While the naming seems strange, we include local constants in the
 894 definition of variables.  A name declared `var := value` can
 895 subsequently be changed, but a name declared `var ::= value` cannot -
 896 it is constant
 897
 898 ###### variable fields
 899         int constant;
 900
 901 Scopes in parallel branches can be partially merged.  More
 902 specifically, if a given name is declared in both branches of an
 903 if/else then its scope is a candidate for merging.  Similarly if
 904 every branch of an exhaustive switch (e.g. has an "else" clause)
 905 declares a given name, then the scopes from the branches are
 906 candidates for merging.
 907
 908 Note that names declared inside a loop (which is only parallel to
 909 itself) are never visible after the loop.  Similarly names defined in
 910 scopes which are not parallel, such as those started by `for` and
 911 `switch`, are never visible after the scope.  Only variables defined in
 912 both `then` and `else` (including the implicit then after an `if`, and
 913 excluding `then` used with `for`) and in all `case`s and `else` of a
 914 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
 915
 916 Labels, which are a bit like variables, follow different rules.
 917 Labels are not explicitly declared, but if an undeclared name appears
 918 in a context where a label is legal, that effectively declares the
 919 name as a label.  The declaration remains in force (or in scope) at
 920 least to the end of the immediately containing block and conditionally
 921 in any larger containing block which does not declare the name in some
 922 other way.  Importantly, the conditional scope extension happens even
 923 if the label is only used in one parallel branch of a conditional --
 924 when used in one branch it is treated as having been declared in all
 925 branches.
 926
 927 Merge candidates are tentatively visible beyond the end of the
 928 branching statement which creates them.  If the name is used, the
 929 merge is affirmed and they become a single variable visible at the
 930 outer layer.  If not - if it is redeclared first - the merge lapses.
 931
 932 To track scopes we have an extra stack, implemented as a linked list,
 933 which roughly parallels the parse stack and which is used exclusively
 934 for scoping.  When a new scope is opened, a new frame is pushed and
 935 the child-count of the parent frame is incremented.  This child-count
 936 is used to distinguish between the first of a set of parallel scopes,
 937 in which declared variables must not be in scope, and subsequent
 938 branches, whether they must already be conditionally scoped.
 939
 940 To push a new frame *before* any code in the frame is parsed, we need a
 941 grammar reduction.  This is most easily achieved with a grammar
 942 element which derives the empty string, and creates the new scope when
 943 it is recognized.  This can be placed, for example, between a keyword
 944 like "if" and the code following it.
 945
 946 ###### ast
 947         struct scope {
 948                 struct scope *parent;
 949                 int child_count;
 950         };
 951
 952 ###### parse context
 953         int scope_depth;
 954         struct scope *scope_stack;
 955
 956 ###### ast functions
 957         static void scope_pop(struct parse_context *c)
 958         {
 959                 struct scope *s = c->scope_stack;
 960
 961                 c->scope_stack = s->parent;
 962                 free(s);
 963                 c->scope_depth -= 1;
 964         }
 965
 966         static void scope_push(struct parse_context *c)
 967         {
 968                 struct scope *s = calloc(1, sizeof(*s));
 969                 if (c->scope_stack)
 970                         c->scope_stack->child_count += 1;
 971                 s->parent = c->scope_stack;
 972                 c->scope_stack = s;
 973                 c->scope_depth += 1;
 974         }
 975
 976 ###### Grammar
 977
 978         $void
 979         OpenScope -> ${ scope_push(config2context(config)); }$
 980
 981
 982 Each variable records a scope depth and is in one of four states:
 983
 984 - "in scope".  This is the case between the declaration of the
 985   variable and the end of the containing block, and also between
 986   the usage with affirms a merge and the end of that block.
 987
 988   The scope depth is not greater than the current parse context scope
 989   nest depth.  When the block of that depth closes, the state will
 990   change.  To achieve this, all "in scope" variables are linked
 991   together as a stack in nesting order.
 992
 993 - "pending".  The "in scope" block has closed, but other parallel
 994   scopes are still being processed.  So far, every parallel block at
 995   the same level that has closed has declared the name.
 996
 997   The scope depth is the depth of the last parallel block that
 998   enclosed the declaration, and that has closed.
 999
1000 - "conditionally in scope".  The "in scope" block and all parallel
1001   scopes have closed, and no further mention of the name has been
1002   seen.  This state includes a secondary nest depth which records the
1003   outermost scope seen since the variable became conditionally in
1004   scope.  If a use of the name is found, the variable becomes "in
1005   scope" and that secondary depth becomes the recorded scope depth.
1006   If the name is declared as a new variable, the old variable becomes
1007   "out of scope" and the recorded scope depth stays unchanged.
1008
1009 - "out of scope".  The variable is neither in scope nor conditionally
1010   in scope.  It is permanently out of scope now and can be removed from
1011   the "in scope" stack.
1012
1013
1014 ###### variable fields
1015         int depth, min_depth;
1016         enum { OutScope, PendingScope, CondScope, InScope } scope;
1017         struct variable *in_scope;
1018
1019 ###### parse context
1020
1021         struct variable *in_scope;
1022
1023 All variables with the same name are linked together using the
1024 'previous' link.  Those variable that have
1025 been affirmatively merged all have a 'merged' pointer that points to
1026 one primary variable - the most recently declared instance. When
1027 merging variables, we need to also adjust the 'merged' pointer on any
1028 other variables that had previously been merged with the one that will
1029 no longer be primary.
1030
1031 ###### variable fields
1032         struct variable *merged;
1033
1034 ###### ast functions
1035
1036         static void variable_merge(struct variable *primary, struct variable *secondary)
1037         {
1038                 struct variable *v;
1039
1040                 if (primary->merged)
1041                         // shouldn't happen
1042                         primary = primary->merged;
1043
1044                 for (v = primary->previous; v; v=v->previous)
1045                         if (v == secondary || v == secondary->merged ||
1046                             v->merged == secondary ||
1047                             (v->merged && v->merged == secondary->merged)) {
1048                                 v->scope = OutScope;
1049                                 v->merged = primary;
1050                         }
1051         }
1052
1053 ###### free context vars
1054
1055         while (context.varlist) {
1056                 struct binding *b = context.varlist;
1057                 struct variable *v = b->var;
1058                 context.varlist = b->next;
1059                 free(b);
1060                 while (v) {
1061                         struct variable *t = v;
1062
1063                         v = t->previous;
1064                         free_value(t->val);
1065                         free(t);
1066                 }
1067         }
1068
1069 #### Manipulating Bindings
1070
1071 When a name is conditionally visible, a new declaration discards the
1072 old binding - the condition lapses.  Conversely a usage of the name
1073 affirms the visibility and extends it to the end of the containing
1074 block - i.e. the block that contains both the original declaration and
1075 the latest usage.  This is determined from `min_depth`.  When a
1076 conditionally visible variable gets affirmed like this, it is also
1077 merged with other conditionally visible variables with the same name.
1078
1079 When we parse a variable declaration we either signal an error if the
1080 name is currently bound, or create a new variable at the current nest
1081 depth if the name is unbound or bound to a conditionally scoped or
1082 pending-scope variable.  If the previous variable was conditionally
1083 scoped, it and its homonyms becomes out-of-scope.
1084
1085 When we parse a variable reference (including non-declarative
1086 assignment) we signal an error if the name is not bound or is bound to
1087 a pending-scope variable; update the scope if the name is bound to a
1088 conditionally scoped variable; or just proceed normally if the named
1089 variable is in scope.
1090
1091 When we exit a scope, any variables bound at this level are either
1092 marked out of scope or pending-scoped, depending on whether the
1093 scope was sequential or parallel.
1094
1095 When exiting a parallel scope we check if there are any variables that
1096 were previously pending and are still visible. If there are, then
1097 there weren't redeclared in the most recent scope, so they cannot be
1098 merged and must become out-of-scope.  If it is not the first of
1099 parallel scopes (based on `child_count`), we check that there was a
1100 previous binding that is still pending-scope.  If there isn't, the new
1101 variable must now be out-of-scope.
1102
1103 When exiting a sequential scope that immediately enclosed parallel
1104 scopes, we need to resolve any pending-scope variables.  If there was
1105 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1106 we need to mark all pending-scope variable as out-of-scope.  Otherwise
1107 all pending-scope variables become conditionally scoped.
1108
1109 ###### ast
1110         enum closetype { CloseSequential, CloseParallel, CloseElse };
1111
1112 ###### ast functions
1113
1114         static struct variable *var_decl(struct parse_context *c, struct text s)
1115         {
1116                 struct binding *b = find_binding(c, s);
1117                 struct variable *v = b->var;
1118
1119                 switch (v ? v->scope : OutScope) {
1120                 case InScope:
1121                         /* Caller will report the error */
1122                         return NULL;
1123                 case CondScope:
1124                         for (;
1125                              v && v->scope == CondScope;
1126                              v = v->previous)
1127                                 v->scope = OutScope;
1128                         break;
1129                 default: break;
1130                 }
1131                 v = calloc(1, sizeof(*v));
1132                 v->previous = b->var;
1133                 b->var = v;
1134                 v->name = b;
1135                 v->min_depth = v->depth = c->scope_depth;
1136                 v->scope = InScope;
1137                 v->in_scope = c->in_scope;
1138                 c->in_scope = v;
1139                 v->val = val_prepare(NULL);
1140                 return v;
1141         }
1142
1143         static struct variable *var_ref(struct parse_context *c, struct text s)
1144         {
1145                 struct binding *b = find_binding(c, s);
1146                 struct variable *v = b->var;
1147                 struct variable *v2;
1148
1149                 switch (v ? v->scope : OutScope) {
1150                 case OutScope:
1151                 case PendingScope:
1152                         /* Signal an error - once that is possible */
1153                         return NULL;
1154                 case CondScope:
1155                         /* All CondScope variables of this name need to be merged
1156                          * and become InScope
1157                          */
1158                         v->depth = v->min_depth;
1159                         v->scope = InScope;
1160                         for (v2 = v->previous;
1161                              v2 && v2->scope == CondScope;
1162                              v2 = v2->previous)
1163                                 variable_merge(v, v2);
1164                         break;
1165                 case InScope:
1166                         break;
1167                 }
1168                 return v;
1169         }
1170
1171         static void var_block_close(struct parse_context *c, enum closetype ct)
1172         {
1173                 /* close of all variables that are in_scope */
1174                 struct variable *v, **vp, *v2;
1175
1176                 scope_pop(c);
1177                 for (vp = &c->in_scope;
1178                      v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1179                      ) {
1180                         switch (ct) {
1181                         case CloseElse:
1182                         case CloseParallel: /* handle PendingScope */
1183                                 switch(v->scope) {
1184                                 case InScope:
1185                                 case CondScope:
1186                                         if (c->scope_stack->child_count == 1)
1187                                                 v->scope = PendingScope;
1188                                         else if (v->previous &&
1189                                                  v->previous->scope == PendingScope)
1190                                                 v->scope = PendingScope;
1191                                         else if (v->val.type == Tlabel)
1192                                                 v->scope = PendingScope;
1193                                         else if (v->name->var == v)
1194                                                 v->scope = OutScope;
1195                                         if (ct == CloseElse) {
1196                                                 /* All Pending variables with this name
1197                                                  * are now Conditional */
1198                                                 for (v2 = v;
1199                                                      v2 && v2->scope == PendingScope;
1200                                                      v2 = v2->previous)
1201                                                         v2->scope = CondScope;
1202                                         }
1203                                         break;
1204                                 case PendingScope:
1205                                         for (v2 = v;
1206                                              v2 && v2->scope == PendingScope;
1207                                              v2 = v2->previous)
1208                                                 if (v2->val.type != Tlabel)
1209                                                         v2->scope = OutScope;
1210                                         break;
1211                                 case OutScope: break;
1212                                 }
1213                                 break;
1214                         case CloseSequential:
1215                                 if (v->val.type == Tlabel)
1216                                         v->scope = PendingScope;
1217                                 switch (v->scope) {
1218                                 case InScope:
1219                                         v->scope = OutScope;
1220                                         break;
1221                                 case PendingScope:
1222                                         /* There was no 'else', so we can only become
1223                                          * conditional if we know the cases were exhaustive,
1224                                          * and that doesn't mean anything yet.
1225                                          * So only labels become conditional..
1226                                          */
1227                                         for (v2 = v;
1228                                              v2 && v2->scope == PendingScope;
1229                                              v2 = v2->previous)
1230                                                 if (v2->val.type == Tlabel) {
1231                                                         v2->scope = CondScope;
1232                                                         v2->min_depth = c->scope_depth;
1233                                                 } else
1234                                                         v2->scope = OutScope;
1235                                         break;
1236                                 case CondScope:
1237                                 case OutScope: break;
1238                                 }
1239                                 break;
1240                         }
1241                         if (v->scope == OutScope)
1242                                 *vp = v->in_scope;
1243                         else
1244                                 vp = &v->in_scope;
1245                 }
1246         }
1247
1248 ### Executables
1249
1250 Executables can be lots of different things.  In many cases an
1251 executable is just an operation combined with one or two other
1252 executables.  This allows for expressions and lists etc.  Other times
1253 an executable is something quite specific like a constant or variable
1254 name.  So we define a `struct exec` to be a general executable with a
1255 type, and a `struct binode` which is a subclass of `exec`, forms a
1256 node in a binary tree, and holds an operation. There will be other
1257 subclasses, and to access these we need to be able to `cast` the
1258 `exec` into the various other types.
1259
1260 ###### macros
1261         #define cast(structname, pointer) ({            \
1262                 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1263                 if (__mptr && *__mptr != X##structname) abort();                \
1264                 (struct structname *)( (char *)__mptr);})
1265
1266         #define new(structname) ({                                              \
1267                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1268                 __ptr->type = X##structname;                                            \
1269                 __ptr->line = -1; __ptr->column = -1;                                   \
1270                 __ptr;})
1271
1272         #define new_pos(structname, token) ({                                           \
1273                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1274                 __ptr->type = X##structname;                                            \
1275                 __ptr->line = token.line; __ptr->column = token.col;                    \
1276                 __ptr;})
1277
1278 ###### ast
1279         enum exec_types {
1280                 Xbinode,
1281                 ## exec type
1282         };
1283         struct exec {
1284                 enum exec_types type;
1285                 int line, column;
1286         };
1287         struct binode {
1288                 struct exec;
1289                 enum Btype {
1290                         ## Binode types
1291                 } op;
1292                 struct exec *left, *right;
1293         };
1294
1295 ###### ast functions
1296
1297         static int __fput_loc(struct exec *loc, FILE *f)
1298         {
1299                 if (loc->line >= 0) {
1300                         fprintf(f, "%d:%d: ", loc->line, loc->column);
1301                         return 1;
1302                 }
1303                 if (loc->type == Xbinode)
1304                         return __fput_loc(cast(binode,loc)->left, f) ||
1305                                __fput_loc(cast(binode,loc)->right, f);
1306                 return 0;
1307         }
1308         static void fput_loc(struct exec *loc, FILE *f)
1309         {
1310                 if (!__fput_loc(loc, f))
1311                         fprintf(f, "??:??: ");
1312         }
1313
1314 Each different type of `exec` node needs a number of functions
1315 defined, a bit like methods.  We must be able to be able to free it,
1316 print it, analyse it and execute it.  Once we have specific `exec`
1317 types we will need to parse them too.  Let's take this a bit more
1318 slowly.
1319
1320 #### Freeing
1321
1322 The parser generator requires a `free_foo` function for each struct
1323 that stores attributes and they will be `exec`s and subtypes there-of.
1324 So we need `free_exec` which can handle all the subtypes, and we need
1325 `free_binode`.
1326
1327 ###### ast functions
1328
1329         static void free_binode(struct binode *b)
1330         {
1331                 if (!b)
1332                         return;
1333                 free_exec(b->left);
1334                 free_exec(b->right);
1335                 free(b);
1336         }
1337
1338 ###### core functions
1339         static void free_exec(struct exec *e)
1340         {
1341                 if (!e)
1342                         return;
1343                 switch(e->type) {
1344                         ## free exec cases
1345                 }
1346         }
1347
1348 ###### forward decls
1349
1350         static void free_exec(struct exec *e);
1351
1352 ###### free exec cases
1353         case Xbinode: free_binode(cast(binode, e)); break;
1354
1355 #### Printing
1356
1357 Printing an `exec` requires that we know the current indent level for
1358 printing line-oriented components.  As will become clear later, we
1359 also want to know what sort of bracketing to use.
1360
1361 ###### ast functions
1362
1363         static void do_indent(int i, char *str)
1364         {
1365                 while (i--)
1366                         printf("    ");
1367                 printf("%s", str);
1368         }
1369
1370 ###### core functions
1371         static void print_binode(struct binode *b, int indent, int bracket)
1372         {
1373                 struct binode *b2;
1374                 switch(b->op) {
1375                 ## print binode cases
1376                 }
1377         }
1378
1379         static void print_exec(struct exec *e, int indent, int bracket)
1380         {
1381                 if (!e)
1382                         return;
1383                 switch (e->type) {
1384                 case Xbinode:
1385                         print_binode(cast(binode, e), indent, bracket); break;
1386                 ## print exec cases
1387                 }
1388         }
1389
1390 ###### forward decls
1391
1392         static void print_exec(struct exec *e, int indent, int bracket);
1393
1394 #### Analysing
1395
1396 As discussed, analysis involves propagating type requirements around
1397 the program and looking for errors.
1398
1399 So `propagate_types` is passed an expected type (being a `struct type`
1400 pointer together with some `val_rules` flags) that the `exec` is
1401 expected to return, and returns the type that it does return, either
1402 of which can be `NULL` signifying "unknown".  An `ok` flag is passed
1403 by reference. It is set to `0` when an error is found, and `2` when
1404 any change is made.  If it remains unchanged at `1`, then no more
1405 propagation is needed.
1406
1407 ###### ast
1408
1409         enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1};
1410
1411 ###### format cases
1412         case 'r':
1413                 if (rules & Rnolabel)
1414                         fputs(" (labels not permitted)", stderr);
1415                 break;
1416
1417 ###### core functions
1418
1419         static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1420                                             struct type *type, int rules)
1421         {
1422                 struct type *t;
1423
1424                 if (!prog)
1425                         return Tnone;
1426
1427                 switch (prog->type) {
1428                 case Xbinode:
1429                 {
1430                         struct binode *b = cast(binode, prog);
1431                         switch (b->op) {
1432                         ## propagate binode cases
1433                         }
1434                         break;
1435                 }
1436                 ## propagate exec cases
1437                 }
1438                 return Tnone;
1439         }
1440
1441 #### Interpreting
1442
1443 Interpreting an `exec` doesn't require anything but the `exec`.  State
1444 is stored in variables and each variable will be directly linked from
1445 within the `exec` tree.  The exception to this is the whole `program`
1446 which needs to look at command line arguments.  The `program` will be
1447 interpreted separately.
1448
1449 Each `exec` can return a value, which may be `Tnone` but must be non-NULL;
1450
1451 ###### core functions
1452
1453         static struct value interp_exec(struct exec *e)
1454         {
1455                 struct value rv;
1456                 rv.type = Tnone;
1457                 if (!e)
1458                         return rv;
1459
1460                 switch(e->type) {
1461                 case Xbinode:
1462                 {
1463                         struct binode *b = cast(binode, e);
1464                         struct value left, right;
1465                         left.type = right.type = Tnone;
1466                         switch (b->op) {
1467                         ## interp binode cases
1468                         }
1469                         free_value(left); free_value(right);
1470                         break;
1471                 }
1472                 ## interp exec cases
1473                 }
1474                 return rv;
1475         }
1476
1477 ## Language elements
1478
1479 Each language element needs to be parsed, printed, analysed,
1480 interpreted, and freed.  There are several, so let's just start with
1481 the easy ones and work our way up.
1482
1483 ### Values
1484
1485 We have already met values as separate objects.  When manifest
1486 constants appear in the program text, that must result in an executable
1487 which has a constant value.  So the `val` structure embeds a value in
1488 an executable.
1489
1490 ###### exec type
1491         Xval,
1492
1493 ###### ast
1494         struct val {
1495                 struct exec;
1496                 struct value val;
1497         };
1498
1499 ###### Grammar
1500
1501         $*val
1502         Value ->  True ${
1503                         $0 = new_pos(val, $1);
1504                         $0->val.type = Tbool;
1505                         $0->val.bool = 1;
1506                         }$
1507                 | False ${
1508                         $0 = new_pos(val, $1);
1509                         $0->val.type = Tbool;
1510                         $0->val.bool = 0;
1511                         }$
1512                 | NUMBER ${
1513                         $0 = new_pos(val, $1);
1514                         $0->val.type = Tnum;
1515                         {
1516                         char tail[3];
1517                         if (number_parse($0->val.num, tail, $1.txt) == 0)
1518                                 mpq_init($0->val.num);
1519                                 if (tail[0])
1520                                         tok_err(config2context(config), "error: unsupported number suffix",
1521                                                 &$1);
1522                         }
1523                         }$
1524                 | STRING ${
1525                         $0 = new_pos(val, $1);
1526                         $0->val.type = Tstr;
1527                         {
1528                         char tail[3];
1529                         string_parse(&$1, '\\', &$0->val.str, tail);
1530                         if (tail[0])
1531                                 tok_err(config2context(config), "error: unsupported string suffix",
1532                                         &$1);
1533                         }
1534                         }$
1535                 | MULTI_STRING ${
1536                         $0 = new_pos(val, $1);
1537                         $0->val.type = Tstr;
1538                         {
1539                         char tail[3];
1540                         string_parse(&$1, '\\', &$0->val.str, tail);
1541                         if (tail[0])
1542                                 tok_err(config2context(config), "error: unsupported string suffix",
1543                                         &$1);
1544                         }
1545                         }$
1546
1547 ###### print exec cases
1548         case Xval:
1549         {
1550                 struct val *v = cast(val, e);
1551                 if (v->val.type == Tstr)
1552                         printf("\"");
1553                 print_value(v->val);
1554                 if (v->val.type == Tstr)
1555                         printf("\"");
1556                 break;
1557         }
1558
1559 ###### propagate exec cases
1560                 case Xval:
1561                 {
1562                         struct val *val = cast(val, prog);
1563                         if (!vtype_compat(type, val->val.type, rules)) {
1564                                 type_err(c, "error: expected %1%r found %2",
1565                                            prog, type, rules, val->val.type);
1566                                 *ok = 0;
1567                         }
1568                         return val->val.type;
1569                 }
1570
1571 ###### interp exec cases
1572         case Xval:
1573                 return dup_value(cast(val, e)->val);
1574
1575 ###### ast functions
1576         static void free_val(struct val *v)
1577         {
1578                 if (!v)
1579                         return;
1580                 free_value(v->val);
1581                 free(v);
1582         }
1583
1584 ###### free exec cases
1585         case Xval: free_val(cast(val, e)); break;
1586
1587 ###### ast functions
1588         // Move all nodes from 'b' to 'rv', reversing the order.
1589         // In 'b' 'left' is a list, and 'right' is the last node.
1590         // In 'rv', left' is the first node and 'right' is a list.
1591         static struct binode *reorder_bilist(struct binode *b)
1592         {
1593                 struct binode *rv = NULL;
1594
1595                 while (b) {
1596                         struct exec *t = b->right;
1597                         b->right = rv;
1598                         rv = b;
1599                         if (b->left)
1600                                 b = cast(binode, b->left);
1601                         else
1602                                 b = NULL;
1603                         rv->left = t;
1604                 }
1605                 return rv;
1606         }
1607
1608 ### Variables
1609
1610 Just as we used a `val` to wrap a value into an `exec`, we similarly
1611 need a `var` to wrap a `variable` into an exec.  While each `val`
1612 contained a copy of the value, each `var` hold a link to the variable
1613 because it really is the same variable no matter where it appears.
1614 When a variable is used, we need to remember to follow the `->merged`
1615 link to find the primary instance.
1616
1617 ###### exec type
1618         Xvar,
1619
1620 ###### ast
1621         struct var {
1622                 struct exec;
1623                 struct variable *var;
1624         };
1625
1626 ###### Grammar
1627
1628         $*var
1629         VariableDecl -> IDENTIFIER : ${ {
1630                 struct variable *v = var_decl(config2context(config), $1.txt);
1631                 $0 = new_pos(var, $1);
1632                 $0->var = v;
1633                 if (v)
1634                         v->where_decl = $0;
1635                 else {
1636                         v = var_ref(config2context(config), $1.txt);
1637                         $0->var = v;
1638                         type_err(config2context(config), "error: variable '%v' redeclared",
1639                                  $0, Tnone, 0, Tnone);
1640                         type_err(config2context(config), "info: this is where '%v' was first declared",
1641                                  v->where_decl, Tnone, 0, Tnone);
1642                 }
1643         } }$
1644             | IDENTIFIER :: ${ {
1645                 struct variable *v = var_decl(config2context(config), $1.txt);
1646                 $0 = new_pos(var, $1);
1647                 $0->var = v;
1648                 if (v) {
1649                         v->where_decl = $0;
1650                         v->constant = 1;
1651                 } else {
1652                         v = var_ref(config2context(config), $1.txt);
1653                         $0->var = v;
1654                         type_err(config2context(config), "error: variable '%v' redeclared",
1655                                  $0, Tnone, 0, Tnone);
1656                         type_err(config2context(config), "info: this is where '%v' was first declared",
1657                                  v->where_decl, Tnone, 0, Tnone);
1658                 }
1659         } }$
1660             | IDENTIFIER : Type ${ {
1661                 struct variable *v = var_decl(config2context(config), $1.txt);
1662                 $0 = new_pos(var, $1);
1663                 $0->var = v;
1664                 if (v) {
1665                         v->where_decl = $0;
1666                         v->where_set = $0;
1667                         v->val = val_prepare($<3);
1668                 } else {
1669                         v = var_ref(config2context(config), $1.txt);
1670                         $0->var = v;
1671                         type_err(config2context(config), "error: variable '%v' redeclared",
1672                                  $0, Tnone, 0, Tnone);
1673                         type_err(config2context(config), "info: this is where '%v' was first declared",
1674                                  v->where_decl, Tnone, 0, Tnone);
1675                 }
1676         } }$
1677             | IDENTIFIER :: Type ${ {
1678                 struct variable *v = var_decl(config2context(config), $1.txt);
1679                 $0 = new_pos(var, $1);
1680                 $0->var = v;
1681                 if (v) {
1682                         v->where_decl = $0;
1683                         v->where_set = $0;
1684                         v->val = val_prepare($<3);
1685                         v->constant = 1;
1686                 } else {
1687                         v = var_ref(config2context(config), $1.txt);
1688                         $0->var = v;
1689                         type_err(config2context(config), "error: variable '%v' redeclared",
1690                                  $0, Tnone, 0, Tnone);
1691                         type_err(config2context(config), "info: this is where '%v' was first declared",
1692                                  v->where_decl, Tnone, 0, Tnone);
1693                 }
1694         } }$
1695
1696         Variable -> IDENTIFIER ${ {
1697                 struct variable *v = var_ref(config2context(config), $1.txt);
1698                 $0 = new_pos(var, $1);
1699                 if (v == NULL) {
1700                         /* This might be a label - allocate a var just in case */
1701                         v = var_decl(config2context(config), $1.txt);
1702                         if (v) {
1703                                 v->val = val_prepare(Tlabel);
1704                                 v->val.label = &v->val;
1705                                 v->where_set = $0;
1706                         }
1707                 }
1708                 $0->var = v;
1709         } }$
1710
1711         $*type
1712         Type -> IDENTIFIER ${
1713                 $0 = find_type(config2context(config), $1.txt);
1714                 if (!$0) {
1715                         tok_err(config2context(config),
1716                                 "error: undefined type", &$1);
1717
1718                         $0 = Tnone;
1719                 }
1720         }$
1721
1722 ###### print exec cases
1723         case Xvar:
1724         {
1725                 struct var *v = cast(var, e);
1726                 if (v->var) {
1727                         struct binding *b = v->var->name;
1728                         printf("%.*s", b->name.len, b->name.txt);
1729                 }
1730                 break;
1731         }
1732
1733 ###### format cases
1734         case 'v':
1735                 if (loc->type == Xvar) {
1736                         struct var *v = cast(var, loc);
1737                         if (v->var) {
1738                                 struct binding *b = v->var->name;
1739                                 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
1740                         } else
1741                                 fputs("???", stderr);
1742                 } else
1743                         fputs("NOTVAR", stderr);
1744                 break;
1745
1746 ###### propagate exec cases
1747
1748         case Xvar:
1749         {
1750                 struct var *var = cast(var, prog);
1751                 struct variable *v = var->var;
1752                 if (!v) {
1753                         type_err(c, "%d:BUG: no variable!!", prog, Tnone, 0, Tnone);
1754                         *ok = 0;
1755                         return Tnone;
1756                 }
1757                 if (v->merged)
1758                         v = v->merged;
1759                 if (v->val.type == NULL) {
1760                         if (type && *ok != 0) {
1761                                 v->val = val_prepare(type);
1762                                 v->where_set = prog;
1763                                 *ok = 2;
1764                         }
1765                         return type;
1766                 }
1767                 if (!vtype_compat(type, v->val.type, rules)) {
1768                         type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
1769                                  type, rules, v->val.type);
1770                         type_err(c, "info: this is where '%v' was set to %1", v->where_set,
1771                                  v->val.type, rules, Tnone);
1772                         *ok = 0;
1773                 }
1774                 if (!type)
1775                         return v->val.type;
1776                 return type;
1777         }
1778
1779 ###### interp exec cases
1780         case Xvar:
1781         {
1782                 struct var *var = cast(var, e);
1783                 struct variable *v = var->var;
1784
1785                 if (v->merged)
1786                         v = v->merged;
1787                 return dup_value(v->val);
1788         }
1789
1790 ###### ast functions
1791
1792         static void free_var(struct var *v)
1793         {
1794                 free(v);
1795         }
1796
1797 ###### free exec cases
1798         case Xvar: free_var(cast(var, e)); break;
1799
1800 ### Expressions: Boolean
1801
1802 Our first user of the `binode` will be expressions, and particularly
1803 Boolean expressions.  As I haven't implemented precedence in the
1804 parser generator yet, we need different names for each precedence
1805 level used by expressions.  The outer most or lowest level precedence
1806 are Boolean `or` `and`, and `not` which form an `Expression` out of `BTerm`s
1807 and `BFact`s.
1808
1809 ###### Binode types
1810         And,
1811         Or,
1812         Not,
1813
1814 ###### Grammar
1815
1816         $*exec
1817         Expression -> Expression or BTerm ${ {
1818                         struct binode *b = new(binode);
1819                         b->op = Or;
1820                         b->left = $<1;
1821                         b->right = $<3;
1822                         $0 = b;
1823                 } }$
1824                 | BTerm ${ $0 = $<1; }$
1825
1826         BTerm -> BTerm and BFact ${ {
1827                         struct binode *b = new(binode);
1828                         b->op = And;
1829                         b->left = $<1;
1830                         b->right = $<3;
1831                         $0 = b;
1832                 } }$
1833                 | BFact ${ $0 = $<1; }$
1834
1835         BFact -> not BFact ${ {
1836                         struct binode *b = new(binode);
1837                         b->op = Not;
1838                         b->right = $<2;
1839                         $0 = b;
1840                 } }$
1841                 ## other BFact
1842
1843 ###### print binode cases
1844         case And:
1845                 print_exec(b->left, -1, 0);
1846                 printf(" and ");
1847                 print_exec(b->right, -1, 0);
1848                 break;
1849         case Or:
1850                 print_exec(b->left, -1, 0);
1851                 printf(" or ");
1852                 print_exec(b->right, -1, 0);
1853                 break;
1854         case Not:
1855                 printf("not ");
1856                 print_exec(b->right, -1, 0);
1857                 break;
1858
1859 ###### propagate binode cases
1860         case And:
1861         case Or:
1862         case Not:
1863                 /* both must be Tbool, result is Tbool */
1864                 propagate_types(b->left, c, ok, Tbool, 0);
1865                 propagate_types(b->right, c, ok, Tbool, 0);
1866                 if (type && type != Tbool) {
1867                         type_err(c, "error: %1 operation found where %2 expected", prog,
1868                                    Tbool, 0, type);
1869                         *ok = 0;
1870                 }
1871                 return Tbool;
1872
1873 ###### interp binode cases
1874         case And:
1875                 rv = interp_exec(b->left);
1876                 right = interp_exec(b->right);
1877                 rv.bool = rv.bool && right.bool;
1878                 break;
1879         case Or:
1880                 rv = interp_exec(b->left);
1881                 right = interp_exec(b->right);
1882                 rv.bool = rv.bool || right.bool;
1883                 break;
1884         case Not:
1885                 rv = interp_exec(b->right);
1886                 rv.bool = !rv.bool;
1887                 break;
1888
1889 ### Expressions: Comparison
1890
1891 Of slightly higher precedence that Boolean expressions are
1892 Comparisons.
1893 A comparison takes arguments of any type, but the two types must be
1894 the same.
1895
1896 To simplify the parsing we introduce an `eop` which can record an
1897 expression operator.
1898
1899 ###### ast
1900         struct eop {
1901                 enum Btype op;
1902         };
1903
1904 ###### ast functions
1905         static void free_eop(struct eop *e)
1906         {
1907                 if (e)
1908                         free(e);
1909         }
1910
1911 ###### Binode types
1912         Less,
1913         Gtr,
1914         LessEq,
1915         GtrEq,
1916         Eql,
1917         NEql,
1918
1919 ###### other BFact
1920         | Expr CMPop Expr ${ {
1921                         struct binode *b = new(binode);
1922                         b->op = $2.op;
1923                         b->left = $<1;
1924                         b->right = $<3;
1925                         $0 = b;
1926         } }$
1927         | Expr ${ $0 = $<1; }$
1928
1929 ###### Grammar
1930
1931         $eop
1932         CMPop ->   < ${ $0.op = Less; }$
1933                 |  > ${ $0.op = Gtr; }$
1934                 |  <= ${ $0.op = LessEq; }$
1935                 |  >= ${ $0.op = GtrEq; }$
1936                 |  == ${ $0.op = Eql; }$
1937                 |  != ${ $0.op = NEql; }$
1938
1939 ###### print binode cases
1940
1941         case Less:
1942         case LessEq:
1943         case Gtr:
1944         case GtrEq:
1945         case Eql:
1946         case NEql:
1947                 print_exec(b->left, -1, 0);
1948                 switch(b->op) {
1949                 case Less:   printf(" < "); break;
1950                 case LessEq: printf(" <= "); break;
1951                 case Gtr:    printf(" > "); break;
1952                 case GtrEq:  printf(" >= "); break;
1953                 case Eql:    printf(" == "); break;
1954                 case NEql:   printf(" != "); break;
1955                 default: abort();
1956                 }
1957                 print_exec(b->right, -1, 0);
1958                 break;
1959
1960 ###### propagate binode cases
1961         case Less:
1962         case LessEq:
1963         case Gtr:
1964         case GtrEq:
1965         case Eql:
1966         case NEql:
1967                 /* Both must match but not be labels, result is Tbool */
1968                 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
1969                 if (t)
1970                         propagate_types(b->right, c, ok, t, 0);
1971                 else {
1972                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
1973                         if (t)
1974                                 t = propagate_types(b->left, c, ok, t, 0);
1975                 }
1976                 if (!vtype_compat(type, Tbool, 0)) {
1977                         type_err(c, "error: Comparison returns %1 but %2 expected", prog,
1978                                     Tbool, rules, type);
1979                         *ok = 0;
1980                 }
1981                 return Tbool;
1982
1983 ###### interp binode cases
1984         case Less:
1985         case LessEq:
1986         case Gtr:
1987         case GtrEq:
1988         case Eql:
1989         case NEql:
1990         {
1991                 int cmp;
1992                 left = interp_exec(b->left);
1993                 right = interp_exec(b->right);
1994                 cmp = value_cmp(left, right);
1995                 rv.type = Tbool;
1996                 switch (b->op) {
1997                 case Less:      rv.bool = cmp <  0; break;
1998                 case LessEq:    rv.bool = cmp <= 0; break;
1999                 case Gtr:       rv.bool = cmp >  0; break;
2000                 case GtrEq:     rv.bool = cmp >= 0; break;
2001                 case Eql:       rv.bool = cmp == 0; break;
2002                 case NEql:      rv.bool = cmp != 0; break;
2003                 default: rv.bool = 0; break;
2004                 }
2005                 break;
2006         }
2007
2008 ### Expressions: The rest
2009
2010 The remaining expressions with the highest precedence are arithmetic
2011 and string concatenation.  They are `Expr`, `Term`, and `Factor`.
2012 The `Factor` is where the `Value` and `Variable` that we already have
2013 are included.
2014
2015 `+` and `-` are both infix and prefix operations (where they are
2016 absolute value and negation).  These have different operator names.
2017
2018 We also have a 'Bracket' operator which records where parentheses were
2019 found.  This makes it easy to reproduce these when printing.  Once
2020 precedence is handled better I might be able to discard this.
2021
2022 ###### Binode types
2023         Plus, Minus,
2024         Times, Divide,
2025         Concat,
2026         Absolute, Negate,
2027         Bracket,
2028
2029 ###### Grammar
2030
2031         $*exec
2032         Expr -> Expr Eop Term ${ {
2033                         struct binode *b = new(binode);
2034                         b->op = $2.op;
2035                         b->left = $<1;
2036                         b->right = $<3;
2037                         $0 = b;
2038                 } }$
2039                 | Term ${ $0 = $<1; }$
2040
2041         Term -> Term Top Factor ${ {
2042                         struct binode *b = new(binode);
2043                         b->op = $2.op;
2044                         b->left = $<1;
2045                         b->right = $<3;
2046                         $0 = b;
2047                 } }$
2048                 | Factor ${ $0 = $<1; }$
2049
2050         Factor -> ( Expression ) ${ {
2051                         struct binode *b = new_pos(binode, $1);
2052                         b->op = Bracket;
2053                         b->right = $<2;
2054                         $0 = b;
2055                 } }$
2056                 | Uop Factor ${ {
2057                         struct binode *b = new(binode);
2058                         b->op = $1.op;
2059                         b->right = $<2;
2060                         $0 = b;
2061                 } }$
2062                 | Value ${ $0 = $<1; }$
2063                 | Variable ${ $0 = $<1; }$
2064
2065         $eop
2066         Eop ->    + ${ $0.op = Plus; }$
2067                 | - ${ $0.op = Minus; }$
2068
2069         Uop ->    + ${ $0.op = Absolute; }$
2070                 | - ${ $0.op = Negate; }$
2071
2072         Top ->    * ${ $0.op = Times; }$
2073                 | / ${ $0.op = Divide; }$
2074                 | ++ ${ $0.op = Concat; }$
2075
2076 ###### print binode cases
2077         case Plus:
2078         case Minus:
2079         case Times:
2080         case Divide:
2081         case Concat:
2082                 print_exec(b->left, indent, 0);
2083                 switch(b->op) {
2084                 case Plus:   printf(" + "); break;
2085                 case Minus:  printf(" - "); break;
2086                 case Times:  printf(" * "); break;
2087                 case Divide: printf(" / "); break;
2088                 case Concat: printf(" ++ "); break;
2089                 default: abort();
2090                 }
2091                 print_exec(b->right, indent, 0);
2092                 break;
2093         case Absolute:
2094                 printf("+");
2095                 print_exec(b->right, indent, 0);
2096                 break;
2097         case Negate:
2098                 printf("-");
2099                 print_exec(b->right, indent, 0);
2100                 break;
2101         case Bracket:
2102                 printf("(");
2103                 print_exec(b->right, indent, 0);
2104                 printf(")");
2105                 break;
2106
2107 ###### propagate binode cases
2108         case Plus:
2109         case Minus:
2110         case Times:
2111         case Divide:
2112                 /* both must be numbers, result is Tnum */
2113         case Absolute:
2114         case Negate:
2115                 /* as propagate_types ignores a NULL,
2116                  * unary ops fit here too */
2117                 propagate_types(b->left, c, ok, Tnum, 0);
2118                 propagate_types(b->right, c, ok, Tnum, 0);
2119                 if (!vtype_compat(type, Tnum, 0)) {
2120                         type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2121                                    Tnum, rules, type);
2122                         *ok = 0;
2123                 }
2124                 return Tnum;
2125
2126         case Concat:
2127                 /* both must be Tstr, result is Tstr */
2128                 propagate_types(b->left, c, ok, Tstr, 0);
2129                 propagate_types(b->right, c, ok, Tstr, 0);
2130                 if (!vtype_compat(type, Tstr, 0)) {
2131                         type_err(c, "error: Concat returns %1 but %2 expected", prog,
2132                                    Tstr, rules, type);
2133                         *ok = 0;
2134                 }
2135                 return Tstr;
2136
2137         case Bracket:
2138                 return propagate_types(b->right, c, ok, type, 0);
2139
2140 ###### interp binode cases
2141
2142         case Plus:
2143                 rv = interp_exec(b->left);
2144                 right = interp_exec(b->right);
2145                 mpq_add(rv.num, rv.num, right.num);
2146                 break;
2147         case Minus:
2148                 rv = interp_exec(b->left);
2149                 right = interp_exec(b->right);
2150                 mpq_sub(rv.num, rv.num, right.num);
2151                 break;
2152         case Times:
2153                 rv = interp_exec(b->left);
2154                 right = interp_exec(b->right);
2155                 mpq_mul(rv.num, rv.num, right.num);
2156                 break;
2157         case Divide:
2158                 rv = interp_exec(b->left);
2159                 right = interp_exec(b->right);
2160                 mpq_div(rv.num, rv.num, right.num);
2161                 break;
2162         case Negate:
2163                 rv = interp_exec(b->right);
2164                 mpq_neg(rv.num, rv.num);
2165                 break;
2166         case Absolute:
2167                 rv = interp_exec(b->right);
2168                 mpq_abs(rv.num, rv.num);
2169                 break;
2170         case Bracket:
2171                 rv = interp_exec(b->right);
2172                 break;
2173         case Concat:
2174                 left = interp_exec(b->left);
2175                 right = interp_exec(b->right);
2176                 rv.type = Tstr;
2177                 rv.str = text_join(left.str, right.str);
2178                 break;
2179
2180
2181 ###### value functions
2182
2183         static struct text text_join(struct text a, struct text b)
2184         {
2185                 struct text rv;
2186                 rv.len = a.len + b.len;
2187                 rv.txt = malloc(rv.len);
2188                 memcpy(rv.txt, a.txt, a.len);
2189                 memcpy(rv.txt+a.len, b.txt, b.len);
2190                 return rv;
2191         }
2192
2193
2194 ### Blocks, Statements, and Statement lists.
2195
2196 Now that we have expressions out of the way we need to turn to
2197 statements.  There are simple statements and more complex statements.
2198 Simple statements do not contain newlines, complex statements do.
2199
2200 Statements often come in sequences and we have corresponding simple
2201 statement lists and complex statement lists.
2202 The former comprise only simple statements separated by semicolons.
2203 The later comprise complex statements and simple statement lists.  They are
2204 separated by newlines.  Thus the semicolon is only used to separate
2205 simple statements on the one line.  This may be overly restrictive,
2206 but I'm not sure I ever want a complex statement to share a line with
2207 anything else.
2208
2209 Note that a simple statement list can still use multiple lines if
2210 subsequent lines are indented, so
2211
2212 ###### Example: wrapped simple statement list
2213
2214         a = b; c = d;
2215            e = f; print g
2216
2217 is a single simple statement list.  This might allow room for
2218 confusion, so I'm not set on it yet.
2219
2220 A simple statement list needs no extra syntax.  A complex statement
2221 list has two syntactic forms.  It can be enclosed in braces (much like
2222 C blocks), or it can be introduced by a colon and continue until an
2223 unindented newline (much like Python blocks).  With this extra syntax
2224 it is referred to as a block.
2225
2226 Note that a block does not have to include any newlines if it only
2227 contains simple statements.  So both of:
2228
2229         if condition: a=b; d=f
2230
2231         if condition { a=b; print f }
2232
2233 are valid.
2234
2235 In either case the list is constructed from a `binode` list with
2236 `Block` as the operator.  When parsing the list it is most convenient
2237 to append to the end, so a list is a list and a statement.  When using
2238 the list it is more convenient to consider a list to be a statement
2239 and a list.  So we need a function to re-order a list.
2240 `reorder_bilist` serves this purpose.
2241
2242 The only stand-alone statement we introduce at this stage is `pass`
2243 which does nothing and is represented as a `NULL` pointer in a `Block`
2244 list.  Other stand-alone statements will follow once the infrastructure
2245 is in-place.
2246
2247 ###### Binode types
2248         Block,
2249
2250 ###### Grammar
2251
2252         $void
2253         OptNL -> Newlines
2254                 |
2255
2256         Newlines -> NEWLINE
2257                 | Newlines NEWLINE
2258
2259         $*binode
2260         Open -> {
2261                 | NEWLINE {
2262         Close -> }
2263                 | NEWLINE }
2264         Block -> Open Statementlist Close ${ $0 = $<2; }$
2265                 | Open Newlines Statementlist Close ${ $0 = $<3; }$
2266                 | Open SimpleStatements } ${ $0 = reorder_bilist($<2); }$
2267                 | Open Newlines SimpleStatements } ${ $0 = reorder_bilist($<3); }$
2268                 | : Statementlist ${ $0 = $<2; }$
2269                 | : SimpleStatements ${ $0 = reorder_bilist($<2); }$
2270
2271         Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<1); }$
2272
2273         ComplexStatements -> ComplexStatements ComplexStatement ${
2274                 $0 = new(binode);
2275                 $0->op = Block;
2276                 $0->left = $<1;
2277                 $0->right = $<2;
2278                 }$
2279                 | ComplexStatements NEWLINE ${ $0 = $<1; }$
2280                 | ComplexStatement ${
2281                 $0 = new(binode);
2282                 $0->op = Block;
2283                 $0->left = NULL;
2284                 $0->right = $<1;
2285                 }$
2286
2287         $*exec
2288         ComplexStatement -> SimpleStatements NEWLINE ${
2289                         $0 = reorder_bilist($<1);
2290                         }$
2291                 ## ComplexStatement Grammar
2292
2293         $*binode
2294         SimpleStatements -> SimpleStatements ; SimpleStatement ${
2295                         $0 = new(binode);
2296                         $0->op = Block;
2297                         $0->left = $<1;
2298                         $0->right = $<3;
2299                         }$
2300                 | SimpleStatement ${
2301                         $0 = new(binode);
2302                         $0->op = Block;
2303                         $0->left = NULL;
2304                         $0->right = $<1;
2305                         }$
2306                 | SimpleStatements ; ${ $0 = $<1; }$
2307
2308         SimpleStatement -> pass ${ $0 = NULL; }$
2309                 ## SimpleStatement Grammar
2310
2311 ###### print binode cases
2312         case Block:
2313                 if (indent < 0) {
2314                         // simple statement
2315                         if (b->left == NULL)
2316                                 printf("pass");
2317                         else
2318                                 print_exec(b->left, indent, 0);
2319                         if (b->right) {
2320                                 printf("; ");
2321                                 print_exec(b->right, indent, 0);
2322                         }
2323                 } else {
2324                         // block, one per line
2325                         if (b->left == NULL)
2326                                 do_indent(indent, "pass\n");
2327                         else
2328                                 print_exec(b->left, indent, bracket);
2329                         if (b->right)
2330                                 print_exec(b->right, indent, bracket);
2331                 }
2332                 break;
2333
2334 ###### propagate binode cases
2335         case Block:
2336         {
2337                 /* If any statement returns something other than Tnone
2338                  * or Tbool then all such must return same type.
2339                  * As each statement may be Tnone or something else,
2340                  * we must always pass NULL (unknown) down, otherwise an incorrect
2341                  * error might occur.  We never return Tnone unless it is
2342                  * passed in.
2343                  */
2344                 struct binode *e;
2345
2346                 for (e = b; e; e = cast(binode, e->right)) {
2347                         t = propagate_types(e->left, c, ok, NULL, rules);
2348                         if ((rules & Rboolok) && t == Tbool)
2349                                 t = NULL;
2350                         if (t && t != Tnone && t != Tbool) {
2351                                 if (!type)
2352                                         type = t;
2353                                 else if (t != type) {
2354                                         type_err(c, "error: expected %1%r, found %2",
2355                                                  e->left, type, rules, t);
2356                                         *ok = 0;
2357                                 }
2358                         }
2359                 }
2360                 return type;
2361         }
2362
2363 ###### interp binode cases
2364         case Block:
2365                 while (rv.type == Tnone &&
2366                        b) {
2367                         if (b->left)
2368                                 rv = interp_exec(b->left);
2369                         b = cast(binode, b->right);
2370                 }
2371                 break;
2372
2373 ### The Print statement
2374
2375 `print` is a simple statement that takes a comma-separated list of
2376 expressions and prints the values separated by spaces and terminated
2377 by a newline.  No control of formatting is possible.
2378
2379 `print` faces the same list-ordering issue as blocks, and uses the
2380 same solution.
2381
2382 ###### Binode types
2383         Print,
2384
2385 ###### SimpleStatement Grammar
2386
2387         | print ExpressionList ${
2388                 $0 = reorder_bilist($<2);
2389         }$
2390         | print ExpressionList , ${
2391                 $0 = new(binode);
2392                 $0->op = Print;
2393                 $0->right = NULL;
2394                 $0->left = $<2;
2395                 $0 = reorder_bilist($0);
2396         }$
2397         | print ${
2398                 $0 = new(binode);
2399                 $0->op = Print;
2400                 $0->right = NULL;
2401         }$
2402
2403 ###### Grammar
2404
2405         $*binode
2406         ExpressionList -> ExpressionList , Expression ${
2407                 $0 = new(binode);
2408                 $0->op = Print;
2409                 $0->left = $<1;
2410                 $0->right = $<3;
2411                 }$
2412                 | Expression ${
2413                         $0 = new(binode);
2414                         $0->op = Print;
2415                         $0->left = NULL;
2416                         $0->right = $<1;
2417                 }$
2418
2419 ###### print binode cases
2420
2421         case Print:
2422                 do_indent(indent, "print");
2423                 while (b) {
2424                         if (b->left) {
2425                                 printf(" ");
2426                                 print_exec(b->left, -1, 0);
2427                                 if (b->right)
2428                                         printf(",");
2429                         }
2430                         b = cast(binode, b->right);
2431                 }
2432                 if (indent >= 0)
2433                         printf("\n");
2434                 break;
2435
2436 ###### propagate binode cases
2437
2438         case Print:
2439                 /* don't care but all must be consistent */
2440                 propagate_types(b->left, c, ok, NULL, Rnolabel);
2441                 propagate_types(b->right, c, ok, NULL, Rnolabel);
2442                 break;
2443
2444 ###### interp binode cases
2445
2446         case Print:
2447         {
2448                 char sep = 0;
2449                 int eol = 1;
2450                 for ( ; b; b = cast(binode, b->right))
2451                         if (b->left) {
2452                                 if (sep)
2453                                         putchar(sep);
2454                                 left = interp_exec(b->left);
2455                                 print_value(left);
2456                                 free_value(left);
2457                                 if (b->right)
2458                                         sep = ' ';
2459                         } else if (sep)
2460                                 eol = 0;
2461                 left.type = Tnone;
2462                 if (eol)
2463                         printf("\n");
2464                 break;
2465         }
2466
2467 ###### Assignment statement
2468
2469 An assignment will assign a value to a variable, providing it hasn't
2470 be declared as a constant.  The analysis phase ensures that the type
2471 will be correct so the interpreter just needs to perform the
2472 calculation.  There is a form of assignment which declares a new
2473 variable as well as assigning a value.  If a name is assigned before
2474 it is declared, and error will be raised as the name is created as
2475 `Tlabel` and it is illegal to assign to such names.
2476
2477 ###### Binode types
2478         Assign,
2479         Declare,
2480
2481 ###### SimpleStatement Grammar
2482         | Variable = Expression ${ {
2483                         struct var *v = cast(var, $1);
2484
2485                         $0 = new(binode);
2486                         $0->op = Assign;
2487                         $0->left = $<1;
2488                         $0->right = $<3;
2489                         if (v->var && v->var->constant) {
2490                                 type_err(config2context(config), "Cannot assign to a constant: %v",
2491                                          $0->left, NULL, 0, NULL);
2492                                 type_err(config2context(config), "name was defined as a constant here",
2493                                          v->var->where_decl, NULL, 0, NULL);
2494                         }
2495                 } }$
2496         | VariableDecl = Expression ${
2497                         $0 = new(binode);
2498                         $0->op = Declare;
2499                         $0->left = $<1;
2500                         $0->right =$<3;
2501                 }$
2502
2503         | VariableDecl ${
2504                         if ($1->var->where_set == NULL) {
2505                                 type_err(config2context(config), "Variable declared with no type or value: %v",
2506                                          $1, NULL, 0, NULL);
2507                         } else {
2508                                 $0 = new(binode);
2509                                 $0->op = Declare;
2510                                 $0->left = $<1;
2511                                 $0->right = NULL;
2512                         }
2513                 }$
2514
2515 ###### print binode cases
2516
2517         case Assign:
2518                 do_indent(indent, "");
2519                 print_exec(b->left, indent, 0);
2520                 printf(" = ");
2521                 print_exec(b->right, indent, 0);
2522                 if (indent >= 0)
2523                         printf("\n");
2524                 break;
2525
2526         case Declare:
2527                 {
2528                 struct variable *v = cast(var, b->left)->var;
2529                 do_indent(indent, "");
2530                 print_exec(b->left, indent, 0);
2531                 if (cast(var, b->left)->var->constant) {
2532                         if (v->where_decl == v->where_set)
2533                                 printf("::%.*s ", v->val.type->name.len,
2534                                        v->val.type->name.txt);
2535                         else
2536                                 printf(" ::");
2537                 } else {
2538                         if (v->where_decl == v->where_set)
2539                                 printf(":%.*s ", v->val.type->name.len,
2540                                        v->val.type->name.txt);
2541                         else
2542                                 printf(" :");
2543                 }
2544                 if (b->right) {
2545                         printf("= ");
2546                         print_exec(b->right, indent, 0);
2547                 }
2548                 if (indent >= 0)
2549                         printf("\n");
2550                 }
2551                 break;
2552
2553 ###### propagate binode cases
2554
2555         case Assign:
2556         case Declare:
2557                 /* Both must match and not be labels,
2558                  * Type must support 'dup',
2559                  * result is Tnone */
2560                 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2561                 if (!b->right)
2562                         return Tnone;
2563
2564                 if (t) {
2565                         if (propagate_types(b->right, c, ok, t, 0) != t)
2566                                 if (b->left->type == Xvar)
2567                                         type_err(c, "info: variable '%v' was set as %1 here.",
2568                                                  cast(var, b->left)->var->where_set, t, rules, Tnone);
2569                 } else {
2570                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2571                         if (t)
2572                                 propagate_types(b->left, c, ok, t, 0);
2573                 }
2574                 if (t && t->dup == NULL) {
2575                         type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
2576                         *ok = 0;
2577                 }
2578                 return Tnone;
2579
2580                 break;
2581
2582 ###### interp binode cases
2583
2584         case Assign:
2585         {
2586                 struct variable *v = cast(var, b->left)->var;
2587                 if (v->merged)
2588                         v = v->merged;
2589                 right = interp_exec(b->right);
2590                 free_value(v->val);
2591                 v->val = right;
2592                 right.type = NULL;
2593                 break;
2594         }
2595
2596         case Declare:
2597         {
2598                 struct variable *v = cast(var, b->left)->var;
2599                 if (v->merged)
2600                         v = v->merged;
2601                 if (b->right)
2602                         right = interp_exec(b->right);
2603                 else
2604                         right = val_init(v->val.type);
2605                 free_value(v->val);
2606                 v->val = right;
2607                 right.type = NULL;
2608                 break;
2609         }
2610
2611 ### The `use` statement
2612
2613 The `use` statement is the last "simple" statement.  It is needed when
2614 the condition in a conditional statement is a block.  `use` works much
2615 like `return` in C, but only completes the `condition`, not the whole
2616 function.
2617
2618 ###### Binode types
2619         Use,
2620
2621 ###### SimpleStatement Grammar
2622         | use Expression ${
2623                 $0 = new_pos(binode, $1);
2624                 $0->op = Use;
2625                 $0->right = $<2;
2626         }$
2627
2628 ###### print binode cases
2629
2630         case Use:
2631                 do_indent(indent, "use ");
2632                 print_exec(b->right, -1, 0);
2633                 if (indent >= 0)
2634                         printf("\n");
2635                 break;
2636
2637 ###### propagate binode cases
2638
2639         case Use:
2640                 /* result matches value */
2641                 return propagate_types(b->right, c, ok, type, 0);
2642
2643 ###### interp binode cases
2644
2645         case Use:
2646                 rv = interp_exec(b->right);
2647                 break;
2648
2649 ### The Conditional Statement
2650
2651 This is the biggy and currently the only complex statement.  This
2652 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
2653 It is comprised of a number of parts, all of which are optional though
2654 set combinations apply.  Each part is (usually) a key word (`then` is
2655 sometimes optional) followed by either an expression or a code block,
2656 except the `casepart` which is a "key word and an expression" followed
2657 by a code block.  The code-block option is valid for all parts and,
2658 where an expression is also allowed, the code block can use the `use`
2659 statement to report a value.  If the code block does not report a value
2660 the effect is similar to reporting `True`.
2661
2662 The `else` and `case` parts, as well as `then` when combined with
2663 `if`, can contain a `use` statement which will apply to some
2664 containing conditional statement. `for` parts, `do` parts and `then`
2665 parts used with `for` can never contain a `use`, except in some
2666 subordinate conditional statement.
2667
2668 If there is a `forpart`, it is executed first, only once.
2669 If there is a `dopart`, then it is executed repeatedly providing
2670 always that the `condpart` or `cond`, if present, does not return a non-True
2671 value.  `condpart` can fail to return any value if it simply executes
2672 to completion.  This is treated the same as returning `True`.
2673
2674 If there is a `thenpart` it will be executed whenever the `condpart`
2675 or `cond` returns True (or does not return any value), but this will happen
2676 *after* `dopart` (when present).
2677
2678 If `elsepart` is present it will be executed at most once when the
2679 condition returns `False` or some value that isn't `True` and isn't
2680 matched by any `casepart`.  If there are any `casepart`s, they will be
2681 executed when the condition returns a matching value.
2682
2683 The particular sorts of values allowed in case parts has not yet been
2684 determined in the language design, so nothing is prohibited.
2685
2686 The various blocks in this complex statement potentially provide scope
2687 for variables as described earlier.  Each such block must include the
2688 "OpenScope" nonterminal before parsing the block, and must call
2689 `var_block_close()` when closing the block.
2690
2691 The code following "`if`", "`switch`" and "`for`" does not get its own
2692 scope, but is in a scope covering the whole statement, so names
2693 declared there cannot be redeclared elsewhere.  Similarly the
2694 condition following "`while`" is in a scope the covers the body
2695 ("`do`" part) of the loop, and which does not allow conditional scope
2696 extension.  Code following "`then`" (both looping and non-looping),
2697 "`else`" and "`case`" each get their own local scope.
2698
2699 The type requirements on the code block in a `whilepart` are quite
2700 unusal.  It is allowed to return a value of some identifiable type, in
2701 which case the loop aborts and an appropriate `casepart` is run, or it
2702 can return a Boolean, in which case the loop either continues to the
2703 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
2704 This is different both from the `ifpart` code block which is expected to
2705 return a Boolean, or the `switchpart` code block which is expected to
2706 return the same type as the casepart values.  The correct analysis of
2707 the type of the `whilepart` code block is the reason for the
2708 `Rboolok` flag which is passed to `propagate_types()`.
2709
2710 The `cond_statement` cannot fit into a `binode` so a new `exec` is
2711 defined.
2712
2713 ###### exec type
2714         Xcond_statement,
2715
2716 ###### ast
2717         struct casepart {
2718                 struct exec *value;
2719                 struct exec *action;
2720                 struct casepart *next;
2721         };
2722         struct cond_statement {
2723                 struct exec;
2724                 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
2725                 struct casepart *casepart;
2726         };
2727
2728 ###### ast functions
2729
2730         static void free_casepart(struct casepart *cp)
2731         {
2732                 while (cp) {
2733                         struct casepart *t;
2734                         free_exec(cp->value);
2735                         free_exec(cp->action);
2736                         t = cp->next;
2737                         free(cp);
2738                         cp = t;
2739                 }
2740         }
2741
2742         static void free_cond_statement(struct cond_statement *s)
2743         {
2744                 if (!s)
2745                         return;
2746                 free_exec(s->forpart);
2747                 free_exec(s->condpart);
2748                 free_exec(s->dopart);
2749                 free_exec(s->thenpart);
2750                 free_exec(s->elsepart);
2751                 free_casepart(s->casepart);
2752                 free(s);
2753         }
2754
2755 ###### free exec cases
2756         case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
2757
2758 ###### ComplexStatement Grammar
2759         | CondStatement ${ $0 = $<1; }$
2760
2761 ###### Grammar
2762
2763         $*cond_statement
2764         // both ForThen and Whilepart open scopes, and CondSuffix only
2765         // closes one - so in the first branch here we have another to close.
2766         CondStatement -> ForThen WhilePart CondSuffix ${
2767                         $0 = $<3;
2768                         $0->forpart = $1.forpart; $1.forpart = NULL;
2769                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2770                         $0->condpart = $2.condpart; $2.condpart = NULL;
2771                         $0->dopart = $2.dopart; $2.dopart = NULL;
2772                         var_block_close(config2context(config), CloseSequential);
2773                         }$
2774                 | WhilePart CondSuffix ${
2775                         $0 = $<2;
2776                         $0->condpart = $1.condpart; $1.condpart = NULL;
2777                         $0->dopart = $1.dopart; $1.dopart = NULL;
2778                         }$
2779                 | SwitchPart CondSuffix ${
2780                         $0 = $<2;
2781                         $0->condpart = $<1;
2782                         }$
2783                 | IfPart IfSuffix ${
2784                         $0 = $<2;
2785                         $0->condpart = $1.condpart; $1.condpart = NULL;
2786                         $0->thenpart = $1.thenpart; $1.thenpart = NULL;
2787                         // This is where we close an "if" statement
2788                         var_block_close(config2context(config), CloseSequential);
2789                         }$
2790
2791         CondSuffix -> IfSuffix ${
2792                         $0 = $<1;
2793                         // This is where we close scope of the whole
2794                         // "for" or "while" statement
2795                         var_block_close(config2context(config), CloseSequential);
2796                 }$
2797                 | CasePart CondSuffix ${
2798                         $0 = $<2;
2799                         $1->next = $0->casepart;
2800                         $0->casepart = $<1;
2801                 }$
2802
2803         $*casepart
2804         CasePart -> Newlines case Expression OpenScope Block ${
2805                         $0 = calloc(1,sizeof(struct casepart));
2806                         $0->value = $<3;
2807                         $0->action = $<5;
2808                         var_block_close(config2context(config), CloseParallel);
2809                 }$
2810                 | case Expression OpenScope Block ${
2811                         $0 = calloc(1,sizeof(struct casepart));
2812                         $0->value = $<2;
2813                         $0->action = $<4;
2814                         var_block_close(config2context(config), CloseParallel);
2815                 }$
2816
2817         $*cond_statement
2818         IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
2819                 | Newlines else OpenScope Block ${
2820                         $0 = new(cond_statement);
2821                         $0->elsepart = $<4;
2822                         var_block_close(config2context(config), CloseElse);
2823                 }$
2824                 | else OpenScope Block ${
2825                         $0 = new(cond_statement);
2826                         $0->elsepart = $<3;
2827                         var_block_close(config2context(config), CloseElse);
2828                 }$
2829                 | Newlines else OpenScope CondStatement ${
2830                         $0 = new(cond_statement);
2831                         $0->elsepart = $<4;
2832                         var_block_close(config2context(config), CloseElse);
2833                 }$
2834                 | else OpenScope CondStatement ${
2835                         $0 = new(cond_statement);
2836                         $0->elsepart = $<3;
2837                         var_block_close(config2context(config), CloseElse);
2838                 }$
2839
2840
2841         $*exec
2842         // These scopes are closed in CondSuffix
2843         ForPart -> for OpenScope SimpleStatements ${
2844                         $0 = reorder_bilist($<3);
2845                 }$
2846                 |  for OpenScope Block ${
2847                         $0 = $<3;
2848                 }$
2849
2850         ThenPart -> then OpenScope SimpleStatements ${
2851                         $0 = reorder_bilist($<3);
2852                         var_block_close(config2context(config), CloseSequential);
2853                 }$
2854                 |  then OpenScope Block ${
2855                         $0 = $<3;
2856                         var_block_close(config2context(config), CloseSequential);
2857                 }$
2858
2859         ThenPartNL -> ThenPart OptNL ${
2860                         $0 = $<1;
2861                 }$
2862
2863         // This scope is closed in CondSuffix
2864         WhileHead -> while OpenScope Block ${
2865                 $0 = $<3;
2866                 }$
2867
2868         $cond_statement
2869         ForThen -> ForPart OptNL ThenPartNL ${
2870                         $0.forpart = $<1;
2871                         $0.thenpart = $<3;
2872                 }$
2873                 | ForPart OptNL ${
2874                         $0.forpart = $<1;
2875                 }$
2876
2877         // This scope is closed in CondSuffix
2878         WhilePart -> while OpenScope Expression Block ${
2879                         $0.type = Xcond_statement;
2880                         $0.condpart = $<3;
2881                         $0.dopart = $<4;
2882                 }$
2883                 | WhileHead OptNL do Block ${
2884                         $0.type = Xcond_statement;
2885                         $0.condpart = $<1;
2886                         $0.dopart = $<4;
2887                 }$
2888
2889         IfPart -> if OpenScope Expression OpenScope Block ${
2890                         $0.type = Xcond_statement;
2891                         $0.condpart = $<3;
2892                         $0.thenpart = $<5;
2893                         var_block_close(config2context(config), CloseParallel);
2894                 }$
2895                 | if OpenScope Block OptNL then OpenScope Block ${
2896                         $0.type = Xcond_statement;
2897                         $0.condpart = $<3;
2898                         $0.thenpart = $<7;
2899                         var_block_close(config2context(config), CloseParallel);
2900                 }$
2901
2902         $*exec
2903         // This scope is closed in CondSuffix
2904         SwitchPart -> switch OpenScope Expression ${
2905                         $0 = $<3;
2906                 }$
2907                 | switch OpenScope Block ${
2908                         $0 = $<3;
2909                 }$
2910
2911 ###### print exec cases
2912
2913         case Xcond_statement:
2914         {
2915                 struct cond_statement *cs = cast(cond_statement, e);
2916                 struct casepart *cp;
2917                 if (cs->forpart) {
2918                         do_indent(indent, "for");
2919                         if (bracket) printf(" {\n"); else printf(":\n");
2920                         print_exec(cs->forpart, indent+1, bracket);
2921                         if (cs->thenpart) {
2922                                 if (bracket)
2923                                         do_indent(indent, "} then {\n");
2924                                 else
2925                                         do_indent(indent, "then:\n");
2926                                 print_exec(cs->thenpart, indent+1, bracket);
2927                         }
2928                         if (bracket) do_indent(indent, "}\n");
2929                 }
2930                 if (cs->dopart) {
2931                         // a loop
2932                         if (cs->condpart && cs->condpart->type == Xbinode &&
2933                             cast(binode, cs->condpart)->op == Block) {
2934                                 if (bracket)
2935                                         do_indent(indent, "while {\n");
2936                                 else
2937                                         do_indent(indent, "while:\n");
2938                                 print_exec(cs->condpart, indent+1, bracket);
2939                                 if (bracket)
2940                                         do_indent(indent, "} do {\n");
2941                                 else
2942                                         do_indent(indent, "do:\n");
2943                                 print_exec(cs->dopart, indent+1, bracket);
2944                                 if (bracket)
2945                                         do_indent(indent, "}\n");
2946                         } else {
2947                                 do_indent(indent, "while ");
2948                                 print_exec(cs->condpart, 0, bracket);
2949                                 if (bracket)
2950                                         printf(" {\n");
2951                                 else
2952                                         printf(":\n");
2953                                 print_exec(cs->dopart, indent+1, bracket);
2954                                 if (bracket)
2955                                         do_indent(indent, "}\n");
2956                         }
2957                 } else {
2958                         // a condition
2959                         if (cs->casepart)
2960                                 do_indent(indent, "switch");
2961                         else
2962                                 do_indent(indent, "if");
2963                         if (cs->condpart && cs->condpart->type == Xbinode &&
2964                             cast(binode, cs->condpart)->op == Block) {
2965                                 if (bracket)
2966                                         printf(" {\n");
2967                                 else
2968                                         printf(":\n");
2969                                 print_exec(cs->condpart, indent+1, bracket);
2970                                 if (bracket)
2971                                         do_indent(indent, "}\n");
2972                                 if (cs->thenpart) {
2973                                         do_indent(indent, "then:\n");
2974                                         print_exec(cs->thenpart, indent+1, bracket);
2975                                 }
2976                         } else {
2977                                 printf(" ");
2978                                 print_exec(cs->condpart, 0, bracket);
2979                                 if (cs->thenpart) {
2980                                         if (bracket)
2981                                                 printf(" {\n");
2982                                         else
2983                                                 printf(":\n");
2984                                         print_exec(cs->thenpart, indent+1, bracket);
2985                                         if (bracket)
2986                                                 do_indent(indent, "}\n");
2987                                 } else
2988                                         printf("\n");
2989                         }
2990                 }
2991                 for (cp = cs->casepart; cp; cp = cp->next) {
2992                         do_indent(indent, "case ");
2993                         print_exec(cp->value, -1, 0);
2994                         if (bracket)
2995                                 printf(" {\n");
2996                         else
2997                                 printf(":\n");
2998                         print_exec(cp->action, indent+1, bracket);
2999                         if (bracket)
3000                                 do_indent(indent, "}\n");
3001                 }
3002                 if (cs->elsepart) {
3003                         do_indent(indent, "else");
3004                         if (bracket)
3005                                 printf(" {\n");
3006                         else
3007                                 printf(":\n");
3008                         print_exec(cs->elsepart, indent+1, bracket);
3009                         if (bracket)
3010                                 do_indent(indent, "}\n");
3011                 }
3012                 break;
3013         }
3014
3015 ###### propagate exec cases
3016         case Xcond_statement:
3017         {
3018                 // forpart and dopart must return Tnone
3019                 // thenpart must return Tnone if there is a dopart,
3020                 // otherwise it is like elsepart.
3021                 // condpart must:
3022                 //    be bool if there is no casepart
3023                 //    match casepart->values if there is a switchpart
3024                 //    either be bool or match casepart->value if there
3025                 //             is a whilepart
3026                 // elsepart and casepart->action must match the return type
3027                 //   expected of this statement.
3028                 struct cond_statement *cs = cast(cond_statement, prog);
3029                 struct casepart *cp;
3030
3031                 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3032                 if (!vtype_compat(Tnone, t, 0))
3033                         *ok = 0;
3034                 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3035                 if (!vtype_compat(Tnone, t, 0))
3036                         *ok = 0;
3037                 if (cs->dopart) {
3038                         t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3039                         if (!vtype_compat(Tnone, t, 0))
3040                                 *ok = 0;
3041                 }
3042                 if (cs->casepart == NULL)
3043                         propagate_types(cs->condpart, c, ok, Tbool, 0);
3044                 else {
3045                         /* Condpart must match case values, with bool permitted */
3046                         t = NULL;
3047                         for (cp = cs->casepart;
3048                              cp && !t; cp = cp->next)
3049                                 t = propagate_types(cp->value, c, ok, NULL, 0);
3050                         if (!t && cs->condpart)
3051                                 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3052                         // Now we have a type (I hope) push it down
3053                         if (t) {
3054                                 for (cp = cs->casepart; cp; cp = cp->next)
3055                                         propagate_types(cp->value, c, ok, t, 0);
3056                                 propagate_types(cs->condpart, c, ok, t, Rboolok);
3057                         }
3058                 }
3059                 // (if)then, else, and case parts must return expected type.
3060                 if (!cs->dopart && !type)
3061                         type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3062                 if (!type)
3063                         type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3064                 for (cp = cs->casepart;
3065                      cp && !type;
3066                      cp = cp->next)
3067                         type = propagate_types(cp->action, c, ok, NULL, rules);
3068                 if (type) {
3069                         if (!cs->dopart)
3070                                 propagate_types(cs->thenpart, c, ok, type, rules);
3071                         propagate_types(cs->elsepart, c, ok, type, rules);
3072                         for (cp = cs->casepart; cp ; cp = cp->next)
3073                                 propagate_types(cp->action, c, ok, type, rules);
3074                         return type;
3075                 } else
3076                         return NULL;
3077         }
3078
3079 ###### interp exec cases
3080         case Xcond_statement:
3081         {
3082                 struct value v, cnd;
3083                 struct casepart *cp;
3084                 struct cond_statement *c = cast(cond_statement, e);
3085
3086                 if (c->forpart)
3087                         interp_exec(c->forpart);
3088                 do {
3089                         if (c->condpart)
3090                                 cnd = interp_exec(c->condpart);
3091                         else
3092                                 cnd.type = Tnone;
3093                         if (!(cnd.type == Tnone ||
3094                               (cnd.type == Tbool && cnd.bool != 0)))
3095                                 break;
3096                         // cnd is Tnone or Tbool, doesn't need to be freed
3097                         if (c->dopart)
3098                                 interp_exec(c->dopart);
3099
3100                         if (c->thenpart) {
3101                                 v = interp_exec(c->thenpart);
3102                                 if (v.type != Tnone || !c->dopart)
3103                                         return v;
3104                                 free_value(v);
3105                         }
3106                 } while (c->dopart);
3107
3108                 for (cp = c->casepart; cp; cp = cp->next) {
3109                         v = interp_exec(cp->value);
3110                         if (value_cmp(v, cnd) == 0) {
3111                                 free_value(v);
3112                                 free_value(cnd);
3113                                 return interp_exec(cp->action);
3114                         }
3115                         free_value(v);
3116                 }
3117                 free_value(cnd);
3118                 if (c->elsepart)
3119                         return interp_exec(c->elsepart);
3120                 v.type = Tnone;
3121                 return v;
3122         }
3123
3124 ### Finally the whole program.
3125
3126 Somewhat reminiscent of Pascal a (current) Ocean program starts with
3127 the keyword "program" and a list of variable names which are assigned
3128 values from command line arguments.  Following this is a `block` which
3129 is the code to execute.
3130
3131 As this is the top level, several things are handled a bit
3132 differently.
3133 The whole program is not interpreted by `interp_exec` as that isn't
3134 passed the argument list which the program requires.  Similarly type
3135 analysis is a bit more interesting at this level.
3136
3137 ###### Binode types
3138         Program,
3139
3140 ###### Parser: grammar
3141
3142         $*binode
3143         Program -> program OpenScope Varlist Block OptNL ${
3144                 $0 = new(binode);
3145                 $0->op = Program;
3146                 $0->left = reorder_bilist($<3);
3147                 $0->right = $<4;
3148                 var_block_close(config2context(config), CloseSequential);
3149                 if (config2context(config)->scope_stack) abort();
3150                 }$
3151                 | ERROR ${
3152                         tok_err(config2context(config),
3153                                 "error: unhandled parse error", &$1);
3154                 }$
3155
3156         Varlist -> Varlist ArgDecl ${
3157                         $0 = new(binode);
3158                         $0->op = Program;
3159                         $0->left = $<1;
3160                         $0->right = $<2;
3161                 }$
3162                 | ${ $0 = NULL; }$
3163
3164         $*var
3165         ArgDecl -> IDENTIFIER ${ {
3166                 struct variable *v = var_decl(config2context(config), $1.txt);
3167                 $0 = new(var);
3168                 $0->var = v;
3169         } }$
3170
3171         ## Grammar
3172
3173 ###### print binode cases
3174         case Program:
3175                 do_indent(indent, "program");
3176                 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
3177                         printf(" ");
3178                         print_exec(b2->left, 0, 0);
3179                 }
3180                 if (bracket)
3181                         printf(" {\n");
3182                 else
3183                         printf(":\n");
3184                 print_exec(b->right, indent+1, bracket);
3185                 if (bracket)
3186                         do_indent(indent, "}\n");
3187                 break;
3188
3189 ###### propagate binode cases
3190         case Program: abort();
3191
3192 ###### core functions
3193
3194         static int analyse_prog(struct exec *prog, struct parse_context *c)
3195         {
3196                 struct binode *b = cast(binode, prog);
3197                 int ok = 1;
3198
3199                 if (!b)
3200                         return 0;
3201                 do {
3202                         ok = 1;
3203                         propagate_types(b->right, c, &ok, Tnone, 0);
3204                 } while (ok == 2);
3205                 if (!ok)
3206                         return 0;
3207
3208                 for (b = cast(binode, b->left); b; b = cast(binode, b->right)) {
3209                         struct var *v = cast(var, b->left);
3210                         if (!v->var->val.type) {
3211                                 v->var->where_set = b;
3212                                 v->var->val = val_prepare(Tstr);
3213                         }
3214                 }
3215                 b = cast(binode, prog);
3216                 do {
3217                         ok = 1;
3218                         propagate_types(b->right, c, &ok, Tnone, 0);
3219                 } while (ok == 2);
3220                 if (!ok)
3221                         return 0;
3222
3223                 /* Make sure everything is still consistent */
3224                 propagate_types(b->right, c, &ok, Tnone, 0);
3225                 return !!ok;
3226         }
3227
3228         static void interp_prog(struct exec *prog, char **argv)
3229         {
3230                 struct binode *p = cast(binode, prog);
3231                 struct binode *al;
3232                 struct value v;
3233
3234                 if (!prog)
3235                         return;
3236                 al = cast(binode, p->left);
3237                 while (al) {
3238                         struct var *v = cast(var, al->left);
3239                         struct value *vl = &v->var->val;
3240
3241                         if (argv[0] == NULL) {
3242                                 printf("Not enough args\n");
3243                                 exit(1);
3244                         }
3245                         al = cast(binode, al->right);
3246                         free_value(*vl);
3247                         *vl = parse_value(vl->type, argv[0]);
3248                         if (vl->type == NULL)
3249                                 exit(1);
3250                         argv++;
3251                 }
3252                 v = interp_exec(p->right);
3253                 free_value(v);
3254         }
3255
3256 ###### interp binode cases
3257         case Program: abort();
3258
3259 ## And now to test it out.
3260
3261 Having a language requires having a "hello world" program. I'll
3262 provide a little more than that: a program that prints "Hello world"
3263 finds the GCD of two numbers, prints the first few elements of
3264 Fibonacci, and performs a binary search for a number.
3265
3266 ###### File: oceani.mk
3267         tests :: sayhello
3268         sayhello : oceani
3269                 @echo "===== TEST ====="
3270                 ./oceani --section "test: hello" oceani.mdc 55 33
3271
3272 ###### test: hello
3273
3274         program A B:
3275                 print "Hello World, what lovely oceans you have!"
3276                 /* When a variable is defined in both branches of an 'if',
3277                  * and used afterwards, the variables are merged.
3278                  */
3279                 if A > B:
3280                         bigger := "yes"
3281                 else:
3282                         bigger := "no"
3283                 print "Is", A, "bigger than", B,"? ", bigger
3284                 /* If a variable is not used after the 'if', no
3285                  * merge happens, so types can be different
3286                  */
3287                 if A * 2 > B:
3288                         double:string = "yes"
3289                         print A, "is more than twice", B, "?", double
3290                 else:
3291                         double := A*2
3292                         print "double", A, "is only", double
3293
3294                 a : number
3295                 a = A;
3296                 b:number = B
3297                 if a > 0 and b > 0:
3298                         while a != b:
3299                                 if a < b:
3300                                         b = b - a
3301                                 else:
3302                                         a = a - b
3303                         print "GCD of", A, "and", B,"is", a
3304                 else if a <= 0:
3305                         print a, "is not positive, cannot calculate GCD"
3306                 else:
3307                         print b, "is not positive, cannot calculate GCD"
3308
3309                 for:
3310                         togo := 10
3311                         f1 := 1; f2 := 1
3312                         print "Fibonacci:", f1,f2,
3313                 then togo = togo - 1
3314                 while togo > 0:
3315                         f3 := f1 + f2
3316                         print "", f3,
3317                         f1 = f2
3318                         f2 = f3
3319                 print ""
3320
3321                 /* Binary search... */
3322                 for:
3323                         lo:= 0; hi := 100
3324                         target := 77
3325                 while:
3326                         mid := (lo + hi) / 2
3327                         if mid == target:
3328                                 use Found
3329                         if mid < target:
3330                                 lo = mid
3331                         else:
3332                                 hi = mid
3333                         if hi - lo < 1:
3334                                 use GiveUp
3335                         use True
3336                 do: pass
3337                 case Found:
3338                         print "Yay, I found", target
3339                 case GiveUp:
3340                         print "Closest I found was", mid