ocean-lang.org Git - ocean/blob - csrc/oceani.mdc

   1 # Ocean Interpreter - Jamison Creek version
   2
   3 Ocean is intended to be a compiled language, so this interpreter is
   4 not targeted at being the final product.  It is, rather, an intermediate
   5 stage and fills that role in two distinct ways.
   6
   7 Firstly, it exists as a platform to experiment with the early language
   8 design.  An interpreter is easy to write and easy to get working, so
   9 the barrier for entry is lower if I aim to start with an interpreter.
  10
  11 Secondly, the plan for the Ocean compiler is to write it in the
  12 [Ocean language](http://ocean-lang.org).  To achieve this we naturally
  13 need some sort of boot-strap process and this interpreter - written in
  14 portable C - will fill that role.  It will be used to bootstrap the
  15 Ocean compiler.
  16
  17 Two features that are not needed to fill either of these roles are
  18 performance and completeness.  The interpreter only needs to be fast
  19 enough to run small test programs and occasionally to run the compiler
  20 on itself.  It only needs to be complete enough to test aspects of the
  21 design which are developed before the compiler is working, and to run
  22 the compiler on itself.  Any features not used by the compiler when
  23 compiling itself are superfluous.  They may be included anyway, but
  24 they may not.
  25
  26 Nonetheless, the interpreter should end up being reasonably complete,
  27 and any performance bottlenecks which appear and are easily fixed, will
  28 be.
  29
  30 ## Current version
  31
  32 This third version of the interpreter exists to test out some initial
  33 ideas relating to types.  Particularly it adds arrays (indexed from
  34 zero) and simple structures.  Basic control flow and variable scoping
  35 are already fairly well established, as are basic numerical and
  36 boolean operators.
  37
  38 Some operators that have only recently been added, and so have not
  39 generated all that much experience yet are "and then" and "or else" as
  40 short-circuit Boolean operators, and the "if ... else" trinary
  41 operator which can select between two expressions based on a third
  42 (which appears syntactically in the middle).
  43
  44 Elements that are present purely to make a usable language, and
  45 without any expectation that they will remain, are the "program'
  46 clause, which provides a list of variables to received command-line
  47 arguments, and the "print" statement which performs simple output.
  48
  49 The current scalar types are "number", "Boolean", and "string".
  50 Boolean will likely stay in its current form, the other two might, but
  51 could just as easily be changed.
  52
  53 ## Naming
  54
  55 Versions of the interpreter which obviously do not support a complete
  56 language will be named after creeks and streams.  This one is Jamison
  57 Creek.
  58
  59 Once we have something reasonably resembling a complete language, the
  60 names of rivers will be used.
  61 Early versions of the compiler will be named after seas.  Major
  62 releases of the compiler will be named after oceans.  Hopefully I will
  63 be finished once I get to the Pacific Ocean release.
  64
  65 ## Outline
  66
  67 As well as parsing and executing a program, the interpreter can print
  68 out the program from the parsed internal structure.  This is useful
  69 for validating the parsing.
  70 So the main requirements of the interpreter are:
  71
  72 - Parse the program, possibly with tracing,
  73 - Analyse the parsed program to ensure consistency,
  74 - Print the program,
  75 - Execute the program, if no parsing or consistency errors were found.
  76
  77 This is all performed by a single C program extracted with
  78 `parsergen`.
  79
  80 There will be two formats for printing the program: a default and one
  81 that uses bracketing.  So a `--bracket` command line option is needed
  82 for that.  Normally the first code section found is used, however an
  83 alternate section can be requested so that a file (such as this one)
  84 can contain multiple programs.  This is effected with the `--section`
  85 option.
  86
  87 This code must be compiled with `-fplan9-extensions` so that anonymous
  88 structures can be used.
  89
  90 ###### File: oceani.mk
  91
  92         myCFLAGS := -Wall -g -fplan9-extensions
  93         CFLAGS := $(filter-out $(myCFLAGS),$(CFLAGS)) $(myCFLAGS)
  94         myLDLIBS:= libparser.o libscanner.o libmdcode.o -licuuc
  95         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
  96         ## libs
  97         all :: $(LDLIBS) oceani
  98         oceani.c oceani.h : oceani.mdc parsergen
  99                 ./parsergen -o oceani --LALR --tag Parser oceani.mdc
 100         oceani.mk: oceani.mdc md2c
 101                 ./md2c oceani.mdc
 102
 103         oceani: oceani.o $(LDLIBS)
 104                 $(CC) $(CFLAGS) -o oceani oceani.o $(LDLIBS)
 105
 106 ###### Parser: header
 107         ## macros
 108         struct parse_context;
 109         ## ast
 110         struct parse_context {
 111                 struct token_config config;
 112                 char *file_name;
 113                 int parse_error;
 114                 struct exec *prog;
 115                 ## parse context
 116         };
 117
 118 ###### macros
 119
 120         #define container_of(ptr, type, member) ({                      \
 121                 const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
 122                 (type *)( (char *)__mptr - offsetof(type,member) );})
 123
 124         #define config2context(_conf) container_of(_conf, struct parse_context, \
 125                 config)
 126
 127 ###### Parser: reduce
 128         struct parse_context *c = config2context(config);
 129
 130 ###### Parser: code
 131
 132         #include <unistd.h>
 133         #include <stdlib.h>
 134         #include <fcntl.h>
 135         #include <errno.h>
 136         #include <sys/mman.h>
 137         #include <string.h>
 138         #include <stdio.h>
 139         #include <locale.h>
 140         #include <malloc.h>
 141         #include "mdcode.h"
 142         #include "scanner.h"
 143         #include "parser.h"
 144
 145         ## includes
 146
 147         #include "oceani.h"
 148
 149         ## forward decls
 150         ## value functions
 151         ## ast functions
 152         ## core functions
 153
 154         #include <getopt.h>
 155         static char Usage[] =
 156                 "Usage: oceani --trace --print --noexec --brackets --section=SectionName prog.ocn\n";
 157         static const struct option long_options[] = {
 158                 {"trace",     0, NULL, 't'},
 159                 {"print",     0, NULL, 'p'},
 160                 {"noexec",    0, NULL, 'n'},
 161                 {"brackets",  0, NULL, 'b'},
 162                 {"section",   1, NULL, 's'},
 163                 {NULL,        0, NULL, 0},
 164         };
 165         const char *options = "tpnbs";
 166         int main(int argc, char *argv[])
 167         {
 168                 int fd;
 169                 int len;
 170                 char *file;
 171                 struct section *s, *ss;
 172                 char *section = NULL;
 173                 struct parse_context context = {
 174                         .config = {
 175                                 .ignored = (1 << TK_mark),
 176                                 .number_chars = ".,_+- ",
 177                                 .word_start = "_",
 178                                 .word_cont = "_",
 179                         },
 180                 };
 181                 int doprint=0, dotrace=0, doexec=1, brackets=0;
 182                 int opt;
 183                 while ((opt = getopt_long(argc, argv, options, long_options, NULL))
 184                        != -1) {
 185                         switch(opt) {
 186                         case 't': dotrace=1; break;
 187                         case 'p': doprint=1; break;
 188                         case 'n': doexec=0; break;
 189                         case 'b': brackets=1; break;
 190                         case 's': section = optarg; break;
 191                         default: fprintf(stderr, Usage);
 192                                 exit(1);
 193                         }
 194                 }
 195                 if (optind >= argc) {
 196                         fprintf(stderr, "oceani: no input file given\n");
 197                         exit(1);
 198                 }
 199                 fd = open(argv[optind], O_RDONLY);
 200                 if (fd < 0) {
 201                         fprintf(stderr, "oceani: cannot open %s\n", argv[optind]);
 202                         exit(1);
 203                 }
 204                 context.file_name = argv[optind];
 205                 len = lseek(fd, 0, 2);
 206                 file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
 207                 s = code_extract(file, file+len, NULL);
 208                 if (!s) {
 209                         fprintf(stderr, "oceani: could not find any code in %s\n",
 210                                 argv[optind]);
 211                         exit(1);
 212                 }
 213
 214                 ## context initialization
 215
 216                 if (section) {
 217                         for (ss = s; ss; ss = ss->next) {
 218                                 struct text sec = ss->section;
 219                                 if (sec.len == strlen(section) &&
 220                                     strncmp(sec.txt, section, sec.len) == 0)
 221                                         break;
 222                         }
 223                         if (!ss) {
 224                                 fprintf(stderr, "oceani: cannot find section %s\n",
 225                                         section);
 226                                 exit(1);
 227                         }
 228                 } else
 229                         ss = s;
 230                 parse_oceani(ss->code, &context.config, dotrace ? stderr : NULL);
 231
 232                 if (!context.prog) {
 233                         fprintf(stderr, "oceani: no program found.\n");
 234                         context.parse_error = 1;
 235                 }
 236                 if (context.prog && doprint) {
 237                         ## print const decls
 238                         ## print type decls
 239                         print_exec(context.prog, 0, brackets);
 240                 }
 241                 if (context.prog && doexec && !context.parse_error) {
 242                         if (!analyse_prog(context.prog, &context)) {
 243                                 fprintf(stderr, "oceani: type error in program - not running.\n");
 244                                 exit(1);
 245                         }
 246                         interp_prog(&context, context.prog, argc - optind, argv+optind);
 247                 }
 248                 free_exec(context.prog);
 249
 250                 while (s) {
 251                         struct section *t = s->next;
 252                         code_free(s->code);
 253                         free(s);
 254                         s = t;
 255                 }
 256                 ## free context vars
 257                 ## free context types
 258                 exit(context.parse_error ? 1 : 0);
 259         }
 260
 261 ### Analysis
 262
 263 The four requirements of parse, analyse, print, interpret apply to
 264 each language element individually so that is how most of the code
 265 will be structured.
 266
 267 Three of the four are fairly self explanatory.  The one that requires
 268 a little explanation is the analysis step.
 269
 270 The current language design does not require the types of variables to
 271 be declared, but they must still have a single type.  Different
 272 operations impose different requirements on the variables, for example
 273 addition requires both arguments to be numeric, and assignment
 274 requires the variable on the left to have the same type as the
 275 expression on the right.
 276
 277 Analysis involves propagating these type requirements around and
 278 consequently setting the type of each variable.  If any requirements
 279 are violated (e.g. a string is compared with a number) or if a
 280 variable needs to have two different types, then an error is raised
 281 and the program will not run.
 282
 283 If the same variable is declared in both branchs of an 'if/else', or
 284 in all cases of a 'switch' then the multiple instances may be merged
 285 into just one variable if the variable is referenced after the
 286 conditional statement.  When this happens, the types must naturally be
 287 consistent across all the branches.  When the variable is not used
 288 outside the if, the variables in the different branches are distinct
 289 and can be of different types.
 290
 291 Undeclared names may only appear in "use" statements and "case" expressions.
 292 These names are given a type of "label" and a unique value.
 293 This allows them to fill the role of a name in an enumerated type, which
 294 is useful for testing the `switch` statement.
 295
 296 As we will see, the condition part of a `while` statement can return
 297 either a Boolean or some other type.  This requires that the expected
 298 type that gets passed around comprises a type and a flag to indicate
 299 that `Tbool` is also permitted.
 300
 301 As there are, as yet, no distinct types that are compatible, there
 302 isn't much subtlety in the analysis.  When we have distinct number
 303 types, this will become more interesting.
 304
 305 #### Error reporting
 306
 307 When analysis discovers an inconsistency it needs to report an error;
 308 just refusing to run the code ensures that the error doesn't cascade,
 309 but by itself it isn't very useful.  A clear understanding of the sort
 310 of error message that are useful will help guide the process of
 311 analysis.
 312
 313 At a simplistic level, the only sort of error that type analysis can
 314 report is that the type of some construct doesn't match a contextual
 315 requirement.  For example, in `4 + "hello"` the addition provides a
 316 contextual requirement for numbers, but `"hello"` is not a number.  In
 317 this particular example no further information is needed as the types
 318 are obvious from local information.  When a variable is involved that
 319 isn't the case.  It may be helpful to explain why the variable has a
 320 particular type, by indicating the location where the type was set,
 321 whether by declaration or usage.
 322
 323 Using a recursive-descent analysis we can easily detect a problem at
 324 multiple locations. In "`hello:= "there"; 4 + hello`" the addition
 325 will detect that one argument is not a number and the usage of `hello`
 326 will detect that a number was wanted, but not provided.  In this
 327 (early) version of the language, we will generate error reports at
 328 multiple locations, so the use of `hello` will report an error and
 329 explain were the value was set, and the addition will report an error
 330 and say why numbers are needed.  To be able to report locations for
 331 errors, each language element will need to record a file location
 332 (line and column) and each variable will need to record the language
 333 element where its type was set.  For now we will assume that each line
 334 of an error message indicates one location in the file, and up to 2
 335 types.  So we provide a `printf`-like function which takes a format, a
 336 location (a `struct exec` which has not yet been introduced), and 2
 337 types. "`%1`" reports the first type, "`%2`" reports the second.  We
 338 will need a function to print the location, once we know how that is
 339 stored. e As will be explained later, there are sometimes extra rules for
 340 type matching and they might affect error messages, we need to pass those
 341 in too.
 342
 343 As well as type errors, we sometimes need to report problems with
 344 tokens, which might be unexpected or might name a type that has not
 345 been defined.  For these we have `tok_err()` which reports an error
 346 with a given token.  Each of the error functions sets the flag in the
 347 context so indicate that parsing failed.
 348
 349 ###### forward decls
 350
 351         static void fput_loc(struct exec *loc, FILE *f);
 352
 353 ###### core functions
 354
 355         static void type_err(struct parse_context *c,
 356                              char *fmt, struct exec *loc,
 357                              struct type *t1, int rules, struct type *t2)
 358         {
 359                 fprintf(stderr, "%s:", c->file_name);
 360                 fput_loc(loc, stderr);
 361                 for (; *fmt ; fmt++) {
 362                         if (*fmt != '%') {
 363                                 fputc(*fmt, stderr);
 364                                 continue;
 365                         }
 366                         fmt++;
 367                         switch (*fmt) {
 368                         case '%': fputc(*fmt, stderr); break;   // NOTEST
 369                         default: fputc('?', stderr); break;     // NOTEST
 370                         case '1':
 371                                 type_print(t1, stderr);
 372                                 break;
 373                         case '2':
 374                                 type_print(t2, stderr);
 375                                 break;
 376                         ## format cases
 377                         }
 378                 }
 379                 fputs("\n", stderr);
 380                 c->parse_error = 1;
 381         }
 382
 383         static void tok_err(struct parse_context *c, char *fmt, struct token *t)
 384         {
 385                 fprintf(stderr, "%s:%d:%d: %s: %.*s\n", c->file_name, t->line, t->col, fmt,
 386                         t->txt.len, t->txt.txt);
 387                 c->parse_error = 1;
 388         }
 389
 390 ## Entities: declared and predeclared.
 391
 392 There are various "things" that the language and/or the interpreter
 393 needs to know about to parse and execute a program.  These include
 394 types, variables, values, and executable code.  These are all lumped
 395 together under the term "entities" (calling them "objects" would be
 396 confusing) and introduced here.  The following section will present the
 397 different specific code elements which comprise or manipulate these
 398 various entities.
 399
 400 ### Types
 401
 402 Values come in a wide range of types, with more likely to be added.
 403 Each type needs to be able to print its own values (for convenience at
 404 least) as well as to compare two values, at least for equality and
 405 possibly for order.  For now, values might need to be duplicated and
 406 freed, though eventually such manipulations will be better integrated
 407 into the language.
 408
 409 Rather than requiring every numeric type to support all numeric
 410 operations (add, multiple, etc), we allow types to be able to present
 411 as one of a few standard types: integer, float, and fraction.  The
 412 existence of these conversion functions eventually enable types to
 413 determine if they are compatible with other types, though such types
 414 have not yet been implemented.
 415
 416 Named type are stored in a simple linked list.  Objects of each type are
 417 "values" which are often passed around by value.
 418
 419 ###### ast
 420
 421         struct value {
 422                 union {
 423                         char ptr[1];
 424                         ## value union fields
 425                 };
 426         };
 427
 428         struct type {
 429                 struct text name;
 430                 struct type *next;
 431                 int size, align;
 432                 void (*init)(struct type *type, struct value *val);
 433                 void (*prepare_type)(struct parse_context *c, struct type *type, int parse_time);
 434                 void (*print)(struct type *type, struct value *val);
 435                 void (*print_type)(struct type *type, FILE *f);
 436                 int (*cmp_order)(struct type *t1, struct type *t2,
 437                                  struct value *v1, struct value *v2);
 438                 int (*cmp_eq)(struct type *t1, struct type *t2,
 439                               struct value *v1, struct value *v2);
 440                 void (*dup)(struct type *type, struct value *vold, struct value *vnew);
 441                 void (*free)(struct type *type, struct value *val);
 442                 void (*free_type)(struct type *t);
 443                 long long (*to_int)(struct value *v);
 444                 double (*to_float)(struct value *v);
 445                 int (*to_mpq)(mpq_t *q, struct value *v);
 446                 ## type functions
 447                 union {
 448                         ## type union fields
 449                 };
 450         };
 451
 452 ###### parse context
 453
 454         struct type *typelist;
 455
 456 ###### ast functions
 457
 458         static struct type *find_type(struct parse_context *c, struct text s)
 459         {
 460                 struct type *l = c->typelist;
 461
 462                 while (l &&
 463                        text_cmp(l->name, s) != 0)
 464                                 l = l->next;
 465                 return l;
 466         }
 467
 468         static struct type *add_type(struct parse_context *c, struct text s,
 469                                      struct type *proto)
 470         {
 471                 struct type *n;
 472
 473                 n = calloc(1, sizeof(*n));
 474                 *n = *proto;
 475                 n->name = s;
 476                 n->next = c->typelist;
 477                 c->typelist = n;
 478                 return n;
 479         }
 480
 481         static void free_type(struct type *t)
 482         {
 483                 /* The type is always a reference to something in the
 484                  * context, so we don't need to free anything.
 485                  */
 486         }
 487
 488         static void free_value(struct type *type, struct value *v)
 489         {
 490                 if (type && v)
 491                         type->free(type, v);
 492         }
 493
 494         static void type_print(struct type *type, FILE *f)
 495         {
 496                 if (!type)
 497                         fputs("*unknown*type*", f);
 498                 else if (type->name.len)
 499                         fprintf(f, "%.*s", type->name.len, type->name.txt);
 500                 else if (type->print_type)
 501                         type->print_type(type, f);
 502                 else
 503                         fputs("*invalid*type*", f);     // NOTEST
 504         }
 505
 506         static void val_init(struct type *type, struct value *val)
 507         {
 508                 if (type && type->init)
 509                         type->init(type, val);
 510         }
 511
 512         static void dup_value(struct type *type,
 513                               struct value *vold, struct value *vnew)
 514         {
 515                 if (type && type->dup)
 516                         type->dup(type, vold, vnew);
 517         }
 518
 519         static int value_cmp(struct type *tl, struct type *tr,
 520                              struct value *left, struct value *right)
 521         {
 522                 if (tl && tl->cmp_order)
 523                         return tl->cmp_order(tl, tr, left, right);
 524                 if (tl && tl->cmp_eq)
 525                         return tl->cmp_eq(tl, tr, left, right);
 526                 return -1;
 527         }
 528
 529         static void print_value(struct type *type, struct value *v)
 530         {
 531                 if (type && type->print)
 532                         type->print(type, v);
 533                 else
 534                         printf("*Unknown*");            // NOTEST
 535         }
 536
 537 ###### forward decls
 538
 539         static void free_value(struct type *type, struct value *v);
 540         static int type_compat(struct type *require, struct type *have, int rules);
 541         static void type_print(struct type *type, FILE *f);
 542         static void val_init(struct type *type, struct value *v);
 543         static void dup_value(struct type *type,
 544                               struct value *vold, struct value *vnew);
 545         static int value_cmp(struct type *tl, struct type *tr,
 546                              struct value *left, struct value *right);
 547         static void print_value(struct type *type, struct value *v);
 548
 549 ###### free context types
 550
 551         while (context.typelist) {
 552                 struct type *t = context.typelist;
 553
 554                 context.typelist = t->next;
 555                 if (t->free_type)
 556                         t->free_type(t);
 557                 free(t);
 558         }
 559
 560 #### Base Types
 561
 562 Values of the base types can be numbers, which we represent as
 563 multi-precision fractions, strings, Booleans and labels.  When
 564 analysing the program we also need to allow for places where no value
 565 is meaningful (type `Tnone`) and where we don't know what type to
 566 expect yet (type is `NULL`).
 567
 568 Values are never shared, they are always copied when used, and freed
 569 when no longer needed.
 570
 571 When propagating type information around the program, we need to
 572 determine if two types are compatible, where type `NULL` is compatible
 573 with anything.  There are two special cases with type compatibility,
 574 both related to the Conditional Statement which will be described
 575 later.  In some cases a Boolean can be accepted as well as some other
 576 primary type, and in others any type is acceptable except a label (`Vlabel`).
 577 A separate function encoding these cases will simplify some code later.
 578
 579 ###### type functions
 580
 581         int (*compat)(struct type *this, struct type *other);
 582
 583 ###### ast functions
 584
 585         static int type_compat(struct type *require, struct type *have, int rules)
 586         {
 587                 if ((rules & Rboolok) && have == Tbool)
 588                         return 1;
 589                 if ((rules & Rnolabel) && have == Tlabel)
 590                         return 0;
 591                 if (!require || !have)
 592                         return 1;
 593
 594                 if (require->compat)
 595                         return require->compat(require, have);
 596
 597                 return require == have;
 598         }
 599
 600 ###### includes
 601         #include <gmp.h>
 602         #include "parse_string.h"
 603         #include "parse_number.h"
 604
 605 ###### libs
 606         myLDLIBS := libnumber.o libstring.o -lgmp
 607         LDLIBS := $(filter-out $(myLDLIBS),$(LDLIBS)) $(myLDLIBS)
 608
 609 ###### type union fields
 610         enum vtype {Vnone, Vstr, Vnum, Vbool, Vlabel} vtype;
 611
 612 ###### value union fields
 613         struct text str;
 614         mpq_t num;
 615         unsigned char bool;
 616         void *label;
 617
 618 ###### ast functions
 619         static void _free_value(struct type *type, struct value *v)
 620         {
 621                 if (!v)
 622                         return;
 623                 switch (type->vtype) {
 624                 case Vnone: break;
 625                 case Vstr: free(v->str.txt); break;
 626                 case Vnum: mpq_clear(v->num); break;
 627                 case Vlabel:
 628                 case Vbool: break;
 629                 }
 630         }
 631
 632 ###### value functions
 633
 634         static void _val_init(struct type *type, struct value *val)
 635         {
 636                 switch(type->vtype) {
 637                 case Vnone:             // NOTEST
 638                         break;          // NOTEST
 639                 case Vnum:
 640                         mpq_init(val->num); break;
 641                 case Vstr:
 642                         val->str.txt = malloc(1);
 643                         val->str.len = 0;
 644                         break;
 645                 case Vbool:
 646                         val->bool = 0;
 647                         break;
 648                 case Vlabel:                    // NOTEST
 649                         val->label = NULL;      // NOTEST
 650                         break;                  // NOTEST
 651                 }
 652         }
 653
 654         static void _dup_value(struct type *type,
 655                                struct value *vold, struct value *vnew)
 656         {
 657                 switch (type->vtype) {
 658                 case Vnone:             // NOTEST
 659                         break;          // NOTEST
 660                 case Vlabel:
 661                         vnew->label = vold->label;
 662                         break;
 663                 case Vbool:
 664                         vnew->bool = vold->bool;
 665                         break;
 666                 case Vnum:
 667                         mpq_init(vnew->num);
 668                         mpq_set(vnew->num, vold->num);
 669                         break;
 670                 case Vstr:
 671                         vnew->str.len = vold->str.len;
 672                         vnew->str.txt = malloc(vnew->str.len);
 673                         memcpy(vnew->str.txt, vold->str.txt, vnew->str.len);
 674                         break;
 675                 }
 676         }
 677
 678         static int _value_cmp(struct type *tl, struct type *tr,
 679                               struct value *left, struct value *right)
 680         {
 681                 int cmp;
 682                 if (tl != tr)
 683                         return tl - tr; // NOTEST
 684                 switch (tl->vtype) {
 685                 case Vlabel: cmp = left->label == right->label ? 0 : 1; break;
 686                 case Vnum: cmp = mpq_cmp(left->num, right->num); break;
 687                 case Vstr: cmp = text_cmp(left->str, right->str); break;
 688                 case Vbool: cmp = left->bool - right->bool; break;
 689                 case Vnone: cmp = 0;                    // NOTEST
 690                 }
 691                 return cmp;
 692         }
 693
 694         static void _print_value(struct type *type, struct value *v)
 695         {
 696                 switch (type->vtype) {
 697                 case Vnone:                             // NOTEST
 698                         printf("*no-value*"); break;    // NOTEST
 699                 case Vlabel:                            // NOTEST
 700                         printf("*label-%p*", v->label); break; // NOTEST
 701                 case Vstr:
 702                         printf("%.*s", v->str.len, v->str.txt); break;
 703                 case Vbool:
 704                         printf("%s", v->bool ? "True":"False"); break;
 705                 case Vnum:
 706                         {
 707                         mpf_t fl;
 708                         mpf_init2(fl, 20);
 709                         mpf_set_q(fl, v->num);
 710                         gmp_printf("%Fg", fl);
 711                         mpf_clear(fl);
 712                         break;
 713                         }
 714                 }
 715         }
 716
 717         static void _free_value(struct type *type, struct value *v);
 718
 719         static struct type base_prototype = {
 720                 .init = _val_init,
 721                 .print = _print_value,
 722                 .cmp_order = _value_cmp,
 723                 .cmp_eq = _value_cmp,
 724                 .dup = _dup_value,
 725                 .free = _free_value,
 726         };
 727
 728         static struct type *Tbool, *Tstr, *Tnum, *Tnone, *Tlabel;
 729
 730 ###### ast functions
 731         static struct type *add_base_type(struct parse_context *c, char *n,
 732                                           enum vtype vt, int size)
 733         {
 734                 struct text txt = { n, strlen(n) };
 735                 struct type *t;
 736
 737                 t = add_type(c, txt, &base_prototype);
 738                 t->vtype = vt;
 739                 t->size = size;
 740                 t->align = size > sizeof(void*) ? sizeof(void*) : size;
 741                 if (t->size & (t->align - 1))
 742                         t->size = (t->size | (t->align - 1)) + 1;
 743                 return t;
 744         }
 745
 746 ###### context initialization
 747
 748         Tbool  = add_base_type(&context, "Boolean", Vbool, sizeof(char));
 749         Tstr   = add_base_type(&context, "string", Vstr, sizeof(struct text));
 750         Tnum   = add_base_type(&context, "number", Vnum, sizeof(mpq_t));
 751         Tnone  = add_base_type(&context, "none", Vnone, 0);
 752         Tlabel = add_base_type(&context, "label", Vlabel, sizeof(void*));
 753
 754 ### Variables
 755
 756 Variables are scoped named values.  We store the names in a linked list
 757 of "bindings" sorted in lexical order, and use sequential search and
 758 insertion sort.
 759
 760 ###### ast
 761
 762         struct binding {
 763                 struct text name;
 764                 struct binding *next;   // in lexical order
 765                 ## binding fields
 766         };
 767
 768 This linked list is stored in the parse context so that "reduce"
 769 functions can find or add variables, and so the analysis phase can
 770 ensure that every variable gets a type.
 771
 772 ###### parse context
 773
 774         struct binding *varlist;  // In lexical order
 775
 776 ###### ast functions
 777
 778         static struct binding *find_binding(struct parse_context *c, struct text s)
 779         {
 780                 struct binding **l = &c->varlist;
 781                 struct binding *n;
 782                 int cmp = 1;
 783
 784                 while (*l &&
 785                         (cmp = text_cmp((*l)->name, s)) < 0)
 786                                 l = & (*l)->next;
 787                 if (cmp == 0)
 788                         return *l;
 789                 n = calloc(1, sizeof(*n));
 790                 n->name = s;
 791                 n->next = *l;
 792                 *l = n;
 793                 return n;
 794         }
 795
 796 Each name can be linked to multiple variables defined in different
 797 scopes.  Each scope starts where the name is declared and continues
 798 until the end of the containing code block.  Scopes of a given name
 799 cannot nest, so a declaration while a name is in-scope is an error.
 800
 801 ###### binding fields
 802         struct variable *var;
 803
 804 ###### ast
 805         struct variable {
 806                 struct variable *previous;
 807                 struct type *type;
 808                 struct binding *name;
 809                 struct exec *where_decl;// where name was declared
 810                 struct exec *where_set; // where type was set
 811                 ## variable fields
 812         };
 813
 814 While the naming seems strange, we include local constants in the
 815 definition of variables.  A name declared `var := value` can
 816 subsequently be changed, but a name declared `var ::= value` cannot -
 817 it is constant
 818
 819 ###### variable fields
 820         int constant;
 821
 822 Scopes in parallel branches can be partially merged.  More
 823 specifically, if a given name is declared in both branches of an
 824 if/else then its scope is a candidate for merging.  Similarly if
 825 every branch of an exhaustive switch (e.g. has an "else" clause)
 826 declares a given name, then the scopes from the branches are
 827 candidates for merging.
 828
 829 Note that names declared inside a loop (which is only parallel to
 830 itself) are never visible after the loop.  Similarly names defined in
 831 scopes which are not parallel, such as those started by `for` and
 832 `switch`, are never visible after the scope.  Only variables defined in
 833 both `then` and `else` (including the implicit then after an `if`, and
 834 excluding `then` used with `for`) and in all `case`s and `else` of a
 835 `switch` or `while` can be visible beyond the `if`/`switch`/`while`.
 836
 837 Labels, which are a bit like variables, follow different rules.
 838 Labels are not explicitly declared, but if an undeclared name appears
 839 in a context where a label is legal, that effectively declares the
 840 name as a label.  The declaration remains in force (or in scope) at
 841 least to the end of the immediately containing block and conditionally
 842 in any larger containing block which does not declare the name in some
 843 other way.  Importantly, the conditional scope extension happens even
 844 if the label is only used in one parallel branch of a conditional --
 845 when used in one branch it is treated as having been declared in all
 846 branches.
 847
 848 Merge candidates are tentatively visible beyond the end of the
 849 branching statement which creates them.  If the name is used, the
 850 merge is affirmed and they become a single variable visible at the
 851 outer layer.  If not - if it is redeclared first - the merge lapses.
 852
 853 To track scopes we have an extra stack, implemented as a linked list,
 854 which roughly parallels the parse stack and which is used exclusively
 855 for scoping.  When a new scope is opened, a new frame is pushed and
 856 the child-count of the parent frame is incremented.  This child-count
 857 is used to distinguish between the first of a set of parallel scopes,
 858 in which declared variables must not be in scope, and subsequent
 859 branches, whether they may already be conditionally scoped.
 860
 861 To push a new frame *before* any code in the frame is parsed, we need a
 862 grammar reduction.  This is most easily achieved with a grammar
 863 element which derives the empty string, and creates the new scope when
 864 it is recognised.  This can be placed, for example, between a keyword
 865 like "if" and the code following it.
 866
 867 ###### ast
 868         struct scope {
 869                 struct scope *parent;
 870                 int child_count;
 871         };
 872
 873 ###### parse context
 874         int scope_depth;
 875         struct scope *scope_stack;
 876
 877 ###### ast functions
 878         static void scope_pop(struct parse_context *c)
 879         {
 880                 struct scope *s = c->scope_stack;
 881
 882                 c->scope_stack = s->parent;
 883                 free(s);
 884                 c->scope_depth -= 1;
 885         }
 886
 887         static void scope_push(struct parse_context *c)
 888         {
 889                 struct scope *s = calloc(1, sizeof(*s));
 890                 if (c->scope_stack)
 891                         c->scope_stack->child_count += 1;
 892                 s->parent = c->scope_stack;
 893                 c->scope_stack = s;
 894                 c->scope_depth += 1;
 895         }
 896
 897 ###### Grammar
 898
 899         $void
 900         OpenScope -> ${ scope_push(c); }$
 901         ClosePara -> ${ var_block_close(c, CloseParallel); }$
 902
 903 Each variable records a scope depth and is in one of four states:
 904
 905 - "in scope".  This is the case between the declaration of the
 906   variable and the end of the containing block, and also between
 907   the usage with affirms a merge and the end of that block.
 908
 909   The scope depth is not greater than the current parse context scope
 910   nest depth.  When the block of that depth closes, the state will
 911   change.  To achieve this, all "in scope" variables are linked
 912   together as a stack in nesting order.
 913
 914 - "pending".  The "in scope" block has closed, but other parallel
 915   scopes are still being processed.  So far, every parallel block at
 916   the same level that has closed has declared the name.
 917
 918   The scope depth is the depth of the last parallel block that
 919   enclosed the declaration, and that has closed.
 920
 921 - "conditionally in scope".  The "in scope" block and all parallel
 922   scopes have closed, and no further mention of the name has been
 923   seen.  This state includes a secondary nest depth which records the
 924   outermost scope seen since the variable became conditionally in
 925   scope.  If a use of the name is found, the variable becomes "in
 926   scope" and that secondary depth becomes the recorded scope depth.
 927   If the name is declared as a new variable, the old variable becomes
 928   "out of scope" and the recorded scope depth stays unchanged.
 929
 930 - "out of scope".  The variable is neither in scope nor conditionally
 931   in scope.  It is permanently out of scope now and can be removed from
 932   the "in scope" stack.
 933
 934 ###### variable fields
 935         int depth, min_depth;
 936         enum { OutScope, PendingScope, CondScope, InScope } scope;
 937         struct variable *in_scope;
 938
 939 ###### parse context
 940
 941         struct variable *in_scope;
 942
 943 All variables with the same name are linked together using the
 944 'previous' link.  Those variable that have been affirmatively merged all
 945 have a 'merged' pointer that points to one primary variable - the most
 946 recently declared instance.  When merging variables, we need to also
 947 adjust the 'merged' pointer on any other variables that had previously
 948 been merged with the one that will no longer be primary.
 949
 950 A variable that is no longer the most recent instance of a name may
 951 still have "pending" scope, if it might still be merged with most
 952 recent instance.  These variables don't really belong in the
 953 "in_scope" list, but are not immediately removed when a new instance
 954 is found.  Instead, they are detected and ignored when considering the
 955 list of in_scope names.
 956
 957 The storage of the value of a variable will be described later.  For now
 958 we just need to know that when a variable goes out of scope, it might
 959 need to be freed.  For this we need to be able to find it, so assume that
 960 `var_value()` will provide that.
 961
 962 ###### variable fields
 963         struct variable *merged;
 964
 965 ###### ast functions
 966
 967         static void variable_merge(struct variable *primary, struct variable *secondary)
 968         {
 969                 struct variable *v;
 970
 971                 if (primary->merged)
 972                         // shouldn't happen
 973                         primary = primary->merged;
 974
 975                 for (v = primary->previous; v; v=v->previous)
 976                         if (v == secondary || v == secondary->merged ||
 977                             v->merged == secondary ||
 978                             (v->merged && v->merged == secondary->merged)) {
 979                                 v->scope = OutScope;
 980                                 v->merged = primary;
 981                         }
 982         }
 983
 984 ###### forward decls
 985         static struct value *var_value(struct parse_context *c, struct variable *v);
 986
 987 ###### free context vars
 988
 989         while (context.varlist) {
 990                 struct binding *b = context.varlist;
 991                 struct variable *v = b->var;
 992                 context.varlist = b->next;
 993                 free(b);
 994                 while (v) {
 995                         struct variable *t = v;
 996
 997                         v = t->previous;
 998                         free_value(t->type, var_value(&context, t));
 999                         if (t->depth == 0)
1000                                 // This is a global constant
1001                                 free_exec(t->where_decl);
1002                         free(t);
1003                 }
1004         }
1005
1006 #### Manipulating Bindings
1007
1008 When a name is conditionally visible, a new declaration discards the
1009 old binding - the condition lapses.  Conversely a usage of the name
1010 affirms the visibility and extends it to the end of the containing
1011 block - i.e. the block that contains both the original declaration and
1012 the latest usage.  This is determined from `min_depth`.  When a
1013 conditionally visible variable gets affirmed like this, it is also
1014 merged with other conditionally visible variables with the same name.
1015
1016 When we parse a variable declaration we either report an error if the
1017 name is currently bound, or create a new variable at the current nest
1018 depth if the name is unbound or bound to a conditionally scoped or
1019 pending-scope variable.  If the previous variable was conditionally
1020 scoped, it and its homonyms becomes out-of-scope.
1021
1022 When we parse a variable reference (including non-declarative assignment
1023 "foo = bar") we report an error if the name is not bound or is bound to
1024 a pending-scope variable; update the scope if the name is bound to a
1025 conditionally scoped variable; or just proceed normally if the named
1026 variable is in scope.
1027
1028 When we exit a scope, any variables bound at this level are either
1029 marked out of scope or pending-scoped, depending on whether the scope
1030 was sequential or parallel.  Here a "parallel" scope means the "then"
1031 or "else" part of a conditional, or any "case" or "else" branch of a
1032 switch.  Other scopes are "sequential".
1033
1034 When exiting a parallel scope we check if there are any variables that
1035 were previously pending and are still visible. If there are, then
1036 there weren't redeclared in the most recent scope, so they cannot be
1037 merged and must become out-of-scope.  If it is not the first of
1038 parallel scopes (based on `child_count`), we check that there was a
1039 previous binding that is still pending-scope.  If there isn't, the new
1040 variable must now be out-of-scope.
1041
1042 When exiting a sequential scope that immediately enclosed parallel
1043 scopes, we need to resolve any pending-scope variables.  If there was
1044 no `else` clause, and we cannot determine that the `switch` was exhaustive,
1045 we need to mark all pending-scope variable as out-of-scope.  Otherwise
1046 all pending-scope variables become conditionally scoped.
1047
1048 ###### ast
1049         enum closetype { CloseSequential, CloseParallel, CloseElse };
1050
1051 ###### ast functions
1052
1053         static struct variable *var_decl(struct parse_context *c, struct text s)
1054         {
1055                 struct binding *b = find_binding(c, s);
1056                 struct variable *v = b->var;
1057
1058                 switch (v ? v->scope : OutScope) {
1059                 case InScope:
1060                         /* Caller will report the error */
1061                         return NULL;
1062                 case CondScope:
1063                         for (;
1064                              v && v->scope == CondScope;
1065                              v = v->previous)
1066                                 v->scope = OutScope;
1067                         break;
1068                 default: break;
1069                 }
1070                 v = calloc(1, sizeof(*v));
1071                 v->previous = b->var;
1072                 b->var = v;
1073                 v->name = b;
1074                 v->min_depth = v->depth = c->scope_depth;
1075                 v->scope = InScope;
1076                 v->in_scope = c->in_scope;
1077                 c->in_scope = v;
1078                 return v;
1079         }
1080
1081         static struct variable *var_ref(struct parse_context *c, struct text s)
1082         {
1083                 struct binding *b = find_binding(c, s);
1084                 struct variable *v = b->var;
1085                 struct variable *v2;
1086
1087                 switch (v ? v->scope : OutScope) {
1088                 case OutScope:
1089                 case PendingScope:
1090                         /* Caller will report the error */
1091                         return NULL;
1092                 case CondScope:
1093                         /* All CondScope variables of this name need to be merged
1094                          * and become InScope
1095                          */
1096                         v->depth = v->min_depth;
1097                         v->scope = InScope;
1098                         for (v2 = v->previous;
1099                              v2 && v2->scope == CondScope;
1100                              v2 = v2->previous)
1101                                 variable_merge(v, v2);
1102                         break;
1103                 case InScope:
1104                         break;
1105                 }
1106                 return v;
1107         }
1108
1109         static void var_block_close(struct parse_context *c, enum closetype ct)
1110         {
1111                 /* Close off all variables that are in_scope */
1112                 struct variable *v, **vp, *v2;
1113
1114                 scope_pop(c);
1115                 for (vp = &c->in_scope;
1116                      v = *vp, v && v->depth > c->scope_depth && v->min_depth > c->scope_depth;
1117                      ) {
1118                         if (v->name->var == v) switch (ct) {
1119                         case CloseElse:
1120                         case CloseParallel: /* handle PendingScope */
1121                                 switch(v->scope) {
1122                                 case InScope:
1123                                 case CondScope:
1124                                         if (c->scope_stack->child_count == 1)
1125                                                 v->scope = PendingScope;
1126                                         else if (v->previous &&
1127                                                  v->previous->scope == PendingScope)
1128                                                 v->scope = PendingScope;
1129                                         else if (v->type == Tlabel)
1130                                                 v->scope = PendingScope;
1131                                         else if (v->name->var == v)
1132                                                 v->scope = OutScope;
1133                                         if (ct == CloseElse) {
1134                                                 /* All Pending variables with this name
1135                                                  * are now Conditional */
1136                                                 for (v2 = v;
1137                                                      v2 && v2->scope == PendingScope;
1138                                                      v2 = v2->previous)
1139                                                         v2->scope = CondScope;
1140                                         }
1141                                         break;
1142                                 case PendingScope:
1143                                         for (v2 = v;
1144                                              v2 && v2->scope == PendingScope;
1145                                              v2 = v2->previous)
1146                                                 if (v2->type != Tlabel)
1147                                                         v2->scope = OutScope;
1148                                         break;
1149                                 case OutScope: break;
1150                                 }
1151                                 break;
1152                         case CloseSequential:
1153                                 if (v->type == Tlabel)
1154                                         v->scope = PendingScope;
1155                                 switch (v->scope) {
1156                                 case InScope:
1157                                         v->scope = OutScope;
1158                                         break;
1159                                 case PendingScope:
1160                                         /* There was no 'else', so we can only become
1161                                          * conditional if we know the cases were exhaustive,
1162                                          * and that doesn't mean anything yet.
1163                                          * So only labels become conditional..
1164                                          */
1165                                         for (v2 = v;
1166                                              v2 && v2->scope == PendingScope;
1167                                              v2 = v2->previous)
1168                                                 if (v2->type == Tlabel) {
1169                                                         v2->scope = CondScope;
1170                                                         v2->min_depth = c->scope_depth;
1171                                                 } else
1172                                                         v2->scope = OutScope;
1173                                         break;
1174                                 case CondScope:
1175                                 case OutScope: break;
1176                                 }
1177                                 break;
1178                         }
1179                         if (v->scope == OutScope || v->name->var != v)
1180                                 *vp = v->in_scope;
1181                         else
1182                                 vp = &v->in_scope;
1183                 }
1184         }
1185
1186 #### Storing Values
1187
1188 The value of a variable is store separately from the variable, on an
1189 analogue of a stack frame.  There are (currently) two frames that can be
1190 active.  A global frame which currently only stores constants, and a
1191 stacked frame which stores local variables.  Each variable knows if it
1192 is global or not, and what its index into the frame is.
1193
1194 Values in the global frame are known immediately they are relevant, so
1195 the frame needs to be reallocated as it grows so it can store those
1196 values.  The local frame doesn't get values until the interpreted phase
1197 is started, so there is no need to allocate until the size is known.
1198
1199 ###### variable fields
1200                 short frame_pos;
1201                 short global;
1202
1203 ###### parse context
1204
1205         short global_size, global_alloc;
1206         short local_size;
1207         void *global, *local;
1208
1209 ###### ast functions
1210
1211         static struct value *var_value(struct parse_context *c, struct variable *v)
1212         {
1213                 if (!v->global) {
1214                         if (!c->local || !v->type)
1215                                 return NULL;
1216                         if (v->frame_pos + v->type->size > c->local_size) {
1217                                 printf("INVALID frame_pos\n"); // NOTEST
1218                                 exit(2);
1219                         }
1220                         return c->local + v->frame_pos;
1221                 }
1222                 if (c->global_size > c->global_alloc) {
1223                         int old = c->global_alloc;
1224                         c->global_alloc = (c->global_size | 1023) + 1024;
1225                         c->global = realloc(c->global, c->global_alloc);
1226                         memset(c->global + old, 0, c->global_alloc - old);
1227                 }
1228                 return c->global + v->frame_pos;
1229         }
1230
1231         static struct value *global_alloc(struct parse_context *c, struct type *t,
1232                                           struct variable *v, struct value *init)
1233         {
1234                 struct value *ret;
1235                 struct variable scratch;
1236
1237                 if (t->prepare_type)
1238                         t->prepare_type(c, t, 1);
1239
1240                 if (c->global_size & (t->align - 1))
1241                         c->global_size = (c->global_size + t->align) & ~(t->align-1);
1242                 if (!v) {
1243                         v = &scratch;
1244                         v->type = t;
1245                 }
1246                 v->frame_pos = c->global_size;
1247                 v->global = 1;
1248                 c->global_size += v->type->size;
1249                 ret = var_value(c, v);
1250                 if (init)
1251                         memcpy(ret, init, t->size);
1252                 else
1253                         val_init(t, ret);
1254                 return ret;
1255         }
1256
1257 As global values are found -- struct field initializers, labels etc --
1258 `global_alloc()` is called to record the value in the global frame.
1259
1260 When the program is fully parsed, we need to walk the list of variables
1261 to find any that weren't merged away and that aren't global, and to
1262 calculate the frame size and assign a frame position for each variable.
1263 For this we have `scope_finalize()`.
1264
1265 ###### ast functions
1266
1267         static void scope_finalize(struct parse_context *c)
1268         {
1269                 struct binding *b;
1270
1271                 for (b = c->varlist; b; b = b->next) {
1272                         struct variable *v;
1273                         for (v = b->var; v; v = v->previous) {
1274                                 struct type *t = v->type;
1275                                 if (v->merged && v->merged != v)
1276                                         continue;
1277                                 if (v->global)
1278                                         continue;
1279                                 if (c->local_size & (t->align - 1))
1280                                         c->local_size = (c->local_size + t->align) & ~(t->align-1);
1281                                 v->frame_pos = c->local_size;
1282                                 c->local_size += v->type->size;
1283                         }
1284                 }
1285                 c->local = calloc(1, c->local_size);
1286         }
1287
1288 ###### free context vars
1289         free(context.global);
1290         free(context.local);
1291
1292 ### Executables
1293
1294 Executables can be lots of different things.  In many cases an
1295 executable is just an operation combined with one or two other
1296 executables.  This allows for expressions and lists etc.  Other times an
1297 executable is something quite specific like a constant or variable name.
1298 So we define a `struct exec` to be a general executable with a type, and
1299 a `struct binode` which is a subclass of `exec`, forms a node in a
1300 binary tree, and holds an operation.  There will be other subclasses,
1301 and to access these we need to be able to `cast` the `exec` into the
1302 various other types.  The first field in any `struct exec` is the type
1303 from the `exec_types` enum.
1304
1305 ###### macros
1306         #define cast(structname, pointer) ({            \
1307                 const typeof( ((struct structname *)0)->type) *__mptr = &(pointer)->type; \
1308                 if (__mptr && *__mptr != X##structname) abort();                \
1309                 (struct structname *)( (char *)__mptr);})
1310
1311         #define new(structname) ({                                              \
1312                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1313                 __ptr->type = X##structname;                                            \
1314                 __ptr->line = -1; __ptr->column = -1;                                   \
1315                 __ptr;})
1316
1317         #define new_pos(structname, token) ({                                           \
1318                 struct structname *__ptr = ((struct structname *)calloc(1,sizeof(struct structname))); \
1319                 __ptr->type = X##structname;                                            \
1320                 __ptr->line = token.line; __ptr->column = token.col;                    \
1321                 __ptr;})
1322
1323 ###### ast
1324         enum exec_types {
1325                 Xbinode,
1326                 ## exec type
1327         };
1328         struct exec {
1329                 enum exec_types type;
1330                 int line, column;
1331         };
1332         struct binode {
1333                 struct exec;
1334                 enum Btype {
1335                         ## Binode types
1336                 } op;
1337                 struct exec *left, *right;
1338         };
1339
1340 ###### ast functions
1341
1342         static int __fput_loc(struct exec *loc, FILE *f)
1343         {
1344                 if (!loc)
1345                         return 0;               // NOTEST
1346                 if (loc->line >= 0) {
1347                         fprintf(f, "%d:%d: ", loc->line, loc->column);
1348                         return 1;
1349                 }
1350                 if (loc->type == Xbinode)
1351                         return __fput_loc(cast(binode,loc)->left, f) ||
1352                                __fput_loc(cast(binode,loc)->right, f);
1353                 return 0;
1354         }
1355         static void fput_loc(struct exec *loc, FILE *f)
1356         {
1357                 if (!__fput_loc(loc, f))
1358                         fprintf(f, "??:??: ");  // NOTEST
1359         }
1360
1361 Each different type of `exec` node needs a number of functions defined,
1362 a bit like methods.  We must be able to free it, print it, analyse it
1363 and execute it.  Once we have specific `exec` types we will need to
1364 parse them too.  Let's take this a bit more slowly.
1365
1366 #### Freeing
1367
1368 The parser generator requires a `free_foo` function for each struct
1369 that stores attributes and they will often be `exec`s and subtypes
1370 there-of.  So we need `free_exec` which can handle all the subtypes,
1371 and we need `free_binode`.
1372
1373 ###### ast functions
1374
1375         static void free_binode(struct binode *b)
1376         {
1377                 if (!b)
1378                         return;
1379                 free_exec(b->left);
1380                 free_exec(b->right);
1381                 free(b);
1382         }
1383
1384 ###### core functions
1385         static void free_exec(struct exec *e)
1386         {
1387                 if (!e)
1388                         return;
1389                 switch(e->type) {
1390                         ## free exec cases
1391                 }
1392         }
1393
1394 ###### forward decls
1395
1396         static void free_exec(struct exec *e);
1397
1398 ###### free exec cases
1399         case Xbinode: free_binode(cast(binode, e)); break;
1400
1401 #### Printing
1402
1403 Printing an `exec` requires that we know the current indent level for
1404 printing line-oriented components.  As will become clear later, we
1405 also want to know what sort of bracketing to use.
1406
1407 ###### ast functions
1408
1409         static void do_indent(int i, char *str)
1410         {
1411                 while (i--)
1412                         printf("    ");
1413                 printf("%s", str);
1414         }
1415
1416 ###### core functions
1417         static void print_binode(struct binode *b, int indent, int bracket)
1418         {
1419                 struct binode *b2;
1420                 switch(b->op) {
1421                 ## print binode cases
1422                 }
1423         }
1424
1425         static void print_exec(struct exec *e, int indent, int bracket)
1426         {
1427                 if (!e)
1428                         return;         // NOTEST
1429                 switch (e->type) {
1430                 case Xbinode:
1431                         print_binode(cast(binode, e), indent, bracket); break;
1432                 ## print exec cases
1433                 }
1434         }
1435
1436 ###### forward decls
1437
1438         static void print_exec(struct exec *e, int indent, int bracket);
1439
1440 #### Analysing
1441
1442 As discussed, analysis involves propagating type requirements around the
1443 program and looking for errors.
1444
1445 So `propagate_types` is passed an expected type (being a `struct type`
1446 pointer together with some `val_rules` flags) that the `exec` is
1447 expected to return, and returns the type that it does return, either
1448 of which can be `NULL` signifying "unknown".  An `ok` flag is passed
1449 by reference. It is set to `0` when an error is found, and `2` when
1450 any change is made.  If it remains unchanged at `1`, then no more
1451 propagation is needed.
1452
1453 ###### ast
1454
1455         enum val_rules {Rnolabel = 1<<0, Rboolok = 1<<1, Rnoconstant = 2<<1};
1456
1457 ###### format cases
1458         case 'r':
1459                 if (rules & Rnolabel)
1460                         fputs(" (labels not permitted)", stderr);
1461                 break;
1462
1463 ###### core functions
1464
1465         static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1466                                             struct type *type, int rules);
1467         static struct type *__propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1468                                               struct type *type, int rules)
1469         {
1470                 struct type *t;
1471
1472                 if (!prog)
1473                         return Tnone;
1474
1475                 switch (prog->type) {
1476                 case Xbinode:
1477                 {
1478                         struct binode *b = cast(binode, prog);
1479                         switch (b->op) {
1480                         ## propagate binode cases
1481                         }
1482                         break;
1483                 }
1484                 ## propagate exec cases
1485                 }
1486                 return Tnone;
1487         }
1488
1489         static struct type *propagate_types(struct exec *prog, struct parse_context *c, int *ok,
1490                                             struct type *type, int rules)
1491         {
1492                 struct type *ret = __propagate_types(prog, c, ok, type, rules);
1493
1494                 if (c->parse_error)
1495                         *ok = 0;
1496                 return ret;
1497         }
1498
1499 #### Interpreting
1500
1501 Interpreting an `exec` doesn't require anything but the `exec`.  State
1502 is stored in variables and each variable will be directly linked from
1503 within the `exec` tree.  The exception to this is the whole `program`
1504 which needs to look at command line arguments.  The `program` will be
1505 interpreted separately.
1506
1507 Each `exec` can return a value combined with a type in `struct lrval`.
1508 The type may be `Tnone` but must be non-NULL.  Some `exec`s will return
1509 the location of a value, which can be updated, in `lval`.  Others will
1510 set `lval` to NULL indicating that there is a value of appropriate type
1511 in `rval`.
1512
1513
1514 ###### core functions
1515
1516         struct lrval {
1517                 struct type *type;
1518                 struct value rval, *lval;
1519         };
1520
1521         static struct lrval _interp_exec(struct parse_context *c, struct exec *e);
1522
1523         static struct value interp_exec(struct parse_context *c, struct exec *e,
1524                                         struct type **typeret)
1525         {
1526                 struct lrval ret = _interp_exec(c, e);
1527
1528                 if (!ret.type) abort();
1529                 if (typeret)
1530                         *typeret = ret.type;
1531                 if (ret.lval)
1532                         dup_value(ret.type, ret.lval, &ret.rval);
1533                 return ret.rval;
1534         }
1535
1536         static struct value *linterp_exec(struct parse_context *c, struct exec *e,
1537                                           struct type **typeret)
1538         {
1539                 struct lrval ret = _interp_exec(c, e);
1540
1541                 if (ret.lval)
1542                         *typeret = ret.type;
1543                 else
1544                         free_value(ret.type, &ret.rval);
1545                 return ret.lval;
1546         }
1547
1548         static struct lrval _interp_exec(struct parse_context *c, struct exec *e)
1549         {
1550                 struct lrval ret;
1551                 struct value rv = {}, *lrv = NULL;
1552                 struct type *rvtype;
1553
1554                 rvtype = ret.type = Tnone;
1555                 if (!e) {
1556                         ret.lval = lrv;
1557                         ret.rval = rv;
1558                         return ret;
1559                 }
1560
1561                 switch(e->type) {
1562                 case Xbinode:
1563                 {
1564                         struct binode *b = cast(binode, e);
1565                         struct value left, right, *lleft;
1566                         struct type *ltype, *rtype;
1567                         ltype = rtype = Tnone;
1568                         switch (b->op) {
1569                         ## interp binode cases
1570                         }
1571                         free_value(ltype, &left);
1572                         free_value(rtype, &right);
1573                         break;
1574                 }
1575                 ## interp exec cases
1576                 }
1577                 ret.lval = lrv;
1578                 ret.rval = rv;
1579                 ret.type = rvtype;
1580                 return ret;
1581         }
1582
1583 ### Complex types
1584
1585 Now that we have the shape of the interpreter in place we can add some
1586 complex types and connected them in to the data structures and the
1587 different phases of parse, analyse, print, interpret.
1588
1589 Thus far we have arrays and structs.
1590
1591 #### Arrays
1592
1593 Arrays can be declared by giving a size and a type, as `[size]type' so
1594 `freq:[26]number` declares `freq` to be an array of 26 numbers.  The
1595 size can be either a literal number, or a named constant.  Some day an
1596 arbitrary expression will be supported.
1597
1598 Arrays cannot be assigned.  When pointers are introduced we will also
1599 introduce array slices which can refer to part or all of an array -
1600 the assignment syntax will create a slice.  For now, an array can only
1601 ever be referenced by the name it is declared with.  It is likely that
1602 a "`copy`" primitive will eventually be define which can be used to
1603 make a copy of an array with controllable recursive depth.
1604
1605 For now we have two sorts of array, those with fixed size either because
1606 it is given as a literal number or because it is a struct member (which
1607 cannot have a runtime-changing size), and those with a size that is
1608 determined at runtime - local variables with a const size.  The former
1609 have their size calculated at parse time, the latter at run time.
1610
1611 For the latter type, the `size` field of the type is the size of a
1612 pointer, and the array is reallocated every time it comes into scope.
1613
1614 We differentiate struct fields with a const size from local variables
1615 with a const size by whether they are prepared at parse time or not.
1616
1617 ###### type union fields
1618
1619         struct {
1620                 short size;
1621                 short static_size;
1622                 struct variable *vsize;
1623                 struct type *member;
1624         } array;
1625
1626 ###### value union fields
1627         void *array;  // used if not static_size
1628
1629 ###### value functions
1630
1631         static void array_prepare_type(struct parse_context *c, struct type *type,
1632                                        int parse_time)
1633         {
1634                 struct value *vsize;
1635                 mpz_t q;
1636                 if (!type->array.vsize || type->array.static_size)
1637                         return;
1638
1639                 vsize = var_value(c, type->array.vsize);
1640                 mpz_init(q);
1641                 mpz_tdiv_q(q, mpq_numref(vsize->num), mpq_denref(vsize->num));
1642                 type->array.size = mpz_get_si(q);
1643                 mpz_clear(q);
1644
1645                 if (parse_time) {
1646                         type->array.static_size = 1;
1647                         type->size = type->array.size * type->array.member->size;
1648                         type->align = type->array.member->align;
1649                 }
1650         }
1651
1652         static void array_init(struct type *type, struct value *val)
1653         {
1654                 int i;
1655                 void *ptr = val->ptr;
1656
1657                 if (!val)
1658                         return;
1659                 if (!type->array.static_size) {
1660                         val->array = calloc(type->array.size,
1661                                             type->array.member->size);
1662                         ptr = val->array;
1663                 }
1664                 for (i = 0; i < type->array.size; i++) {
1665                         struct value *v;
1666                         v = (void*)ptr + i * type->array.member->size;
1667                         val_init(type->array.member, v);
1668                 }
1669         }
1670
1671         static void array_free(struct type *type, struct value *val)
1672         {
1673                 int i;
1674                 void *ptr = val->ptr;
1675
1676                 if (!type->array.static_size)
1677                         ptr = val->array;
1678                 for (i = 0; i < type->array.size; i++) {
1679                         struct value *v;
1680                         v = (void*)ptr + i * type->array.member->size;
1681                         free_value(type->array.member, v);
1682                 }
1683                 if (!type->array.static_size)
1684                         free(ptr);
1685         }
1686
1687         static int array_compat(struct type *require, struct type *have)
1688         {
1689                 if (have->compat != require->compat)
1690                         return 0;
1691                 /* Both are arrays, so we can look at details */
1692                 if (!type_compat(require->array.member, have->array.member, 0))
1693                         return 0;
1694                 if (require->array.vsize == NULL && have->array.vsize == NULL)
1695                         return require->array.size == have->array.size;
1696
1697                 return require->array.vsize == have->array.vsize;
1698         }
1699
1700         static void array_print_type(struct type *type, FILE *f)
1701         {
1702                 fputs("[", f);
1703                 if (type->array.vsize) {
1704                         struct binding *b = type->array.vsize->name;
1705                         fprintf(f, "%.*s]", b->name.len, b->name.txt);
1706                 } else
1707                         fprintf(f, "%d]", type->array.size);
1708                 type_print(type->array.member, f);
1709         }
1710
1711         static struct type array_prototype = {
1712                 .init = array_init,
1713                 .prepare_type = array_prepare_type,
1714                 .print_type = array_print_type,
1715                 .compat = array_compat,
1716                 .free = array_free,
1717                 .size = sizeof(void*),
1718                 .align = sizeof(void*),
1719         };
1720
1721 ###### declare terminals
1722         $TERM [ ]
1723
1724 ###### type grammar
1725
1726         | [ NUMBER ] Type ${ {
1727                 char tail[3];
1728                 mpq_t num;
1729                 struct text noname = { "", 0 };
1730                 struct type *t;
1731
1732                 $0 = t = add_type(c, noname, &array_prototype);
1733                 t->array.member = $<4;
1734                 t->array.vsize = NULL;
1735                 if (number_parse(num, tail, $2.txt) == 0)
1736                         tok_err(c, "error: unrecognised number", &$2);
1737                 else if (tail[0])
1738                         tok_err(c, "error: unsupported number suffix", &$2);
1739                 else {
1740                         t->array.size = mpz_get_ui(mpq_numref(num));
1741                         if (mpz_cmp_ui(mpq_denref(num), 1) != 0) {
1742                                 tok_err(c, "error: array size must be an integer",
1743                                         &$2);
1744                         } else if (mpz_cmp_ui(mpq_numref(num), 1UL << 30) >= 0)
1745                                 tok_err(c, "error: array size is too large",
1746                                         &$2);
1747                         mpq_clear(num);
1748                 }
1749                 t->array.static_size = 1;
1750                 t->size = t->array.size * t->array.member->size;
1751                 t->align = t->array.member->align;
1752         } }$
1753
1754         | [ IDENTIFIER ] Type ${ {
1755                 struct variable *v = var_ref(c, $2.txt);
1756                 struct text noname = { "", 0 };
1757
1758                 if (!v)
1759                         tok_err(c, "error: name undeclared", &$2);
1760                 else if (!v->constant)
1761                         tok_err(c, "error: array size must be a constant", &$2);
1762
1763                 $0 = add_type(c, noname, &array_prototype);
1764                 $0->array.member = $<4;
1765                 $0->array.size = 0;
1766                 $0->array.vsize = v;
1767         } }$
1768
1769 ###### Binode types
1770         Index,
1771
1772 ###### variable grammar
1773
1774         | Variable [ Expression ] ${ {
1775                 struct binode *b = new(binode);
1776                 b->op = Index;
1777                 b->left = $<1;
1778                 b->right = $<3;
1779                 $0 = b;
1780         } }$
1781
1782 ###### print binode cases
1783         case Index:
1784                 print_exec(b->left, -1, bracket);
1785                 printf("[");
1786                 print_exec(b->right, -1, bracket);
1787                 printf("]");
1788                 break;
1789
1790 ###### propagate binode cases
1791         case Index:
1792                 /* left must be an array, right must be a number,
1793                  * result is the member type of the array
1794                  */
1795                 propagate_types(b->right, c, ok, Tnum, 0);
1796                 t = propagate_types(b->left, c, ok, NULL, rules & Rnoconstant);
1797                 if (!t || t->compat != array_compat) {
1798                         type_err(c, "error: %1 cannot be indexed", prog, t, 0, NULL);
1799                         return NULL;
1800                 } else {
1801                         if (!type_compat(type, t->array.member, rules)) {
1802                                 type_err(c, "error: have %1 but need %2", prog,
1803                                          t->array.member, rules, type);
1804                         }
1805                         return t->array.member;
1806                 }
1807                 break;
1808
1809 ###### interp binode cases
1810         case Index: {
1811                 mpz_t q;
1812                 long i;
1813                 void *ptr;
1814
1815                 lleft = linterp_exec(c, b->left, &ltype);
1816                 right = interp_exec(c, b->right, &rtype);
1817                 mpz_init(q);
1818                 mpz_tdiv_q(q, mpq_numref(right.num), mpq_denref(right.num));
1819                 i = mpz_get_si(q);
1820                 mpz_clear(q);
1821
1822                 if (ltype->array.static_size)
1823                         ptr = lleft;
1824                 else
1825                         ptr = *(void**)lleft;
1826                 rvtype = ltype->array.member;
1827                 if (i >= 0 && i < ltype->array.size)
1828                         lrv = ptr + i * rvtype->size;
1829                 else
1830                         val_init(ltype->array.member, &rv);
1831                 ltype = NULL;
1832                 break;
1833         }
1834
1835 #### Structs
1836
1837 A `struct` is a data-type that contains one or more other data-types.
1838 It differs from an array in that each member can be of a different
1839 type, and they are accessed by name rather than by number.  Thus you
1840 cannot choose an element by calculation, you need to know what you
1841 want up-front.
1842
1843 The language makes no promises about how a given structure will be
1844 stored in memory - it is free to rearrange fields to suit whatever
1845 criteria seems important.
1846
1847 Structs are declared separately from program code - they cannot be
1848 declared in-line in a variable declaration like arrays can.  A struct
1849 is given a name and this name is used to identify the type - the name
1850 is not prefixed by the word `struct` as it would be in C.
1851
1852 Structs are only treated as the same if they have the same name.
1853 Simply having the same fields in the same order is not enough.  This
1854 might change once we can create structure initializers from a list of
1855 values.
1856
1857 Each component datum is identified much like a variable is declared,
1858 with a name, one or two colons, and a type.  The type cannot be omitted
1859 as there is no opportunity to deduce the type from usage.  An initial
1860 value can be given following an equals sign, so
1861
1862 ##### Example: a struct type
1863
1864         struct complex:
1865                 x:number = 0
1866                 y:number = 0
1867
1868 would declare a type called "complex" which has two number fields,
1869 each initialised to zero.
1870
1871 Struct will need to be declared separately from the code that uses
1872 them, so we will need to be able to print out the declaration of a
1873 struct when reprinting the whole program.  So a `print_type_decl` type
1874 function will be needed.
1875
1876 ###### type union fields
1877
1878         struct {
1879                 int nfields;
1880                 struct field {
1881                         struct text name;
1882                         struct type *type;
1883                         struct value *init;
1884                         int offset;
1885                 } *fields;
1886         } structure;
1887
1888 ###### type functions
1889         void (*print_type_decl)(struct type *type, FILE *f);
1890
1891 ###### value functions
1892
1893         static void structure_init(struct type *type, struct value *val)
1894         {
1895                 int i;
1896
1897                 for (i = 0; i < type->structure.nfields; i++) {
1898                         struct value *v;
1899                         v = (void*) val->ptr + type->structure.fields[i].offset;
1900                         if (type->structure.fields[i].init)
1901                                 dup_value(type->structure.fields[i].type,
1902                                           type->structure.fields[i].init,
1903                                           v);
1904                         else
1905                                 val_init(type->structure.fields[i].type, v);
1906                 }
1907         }
1908
1909         static void structure_free(struct type *type, struct value *val)
1910         {
1911                 int i;
1912
1913                 for (i = 0; i < type->structure.nfields; i++) {
1914                         struct value *v;
1915                         v = (void*)val->ptr + type->structure.fields[i].offset;
1916                         free_value(type->structure.fields[i].type, v);
1917                 }
1918         }
1919
1920         static void structure_free_type(struct type *t)
1921         {
1922                 int i;
1923                 for (i = 0; i < t->structure.nfields; i++)
1924                         if (t->structure.fields[i].init) {
1925                                 free_value(t->structure.fields[i].type,
1926                                            t->structure.fields[i].init);
1927                         }
1928                 free(t->structure.fields);
1929         }
1930
1931         static struct type structure_prototype = {
1932                 .init = structure_init,
1933                 .free = structure_free,
1934                 .free_type = structure_free_type,
1935                 .print_type_decl = structure_print_type,
1936         };
1937
1938 ###### exec type
1939         Xfieldref,
1940
1941 ###### ast
1942         struct fieldref {
1943                 struct exec;
1944                 struct exec *left;
1945                 int index;
1946                 struct text name;
1947         };
1948
1949 ###### free exec cases
1950         case Xfieldref:
1951                 free_exec(cast(fieldref, e)->left);
1952                 free(e);
1953                 break;
1954
1955 ###### declare terminals
1956         $TERM struct .
1957
1958 ###### variable grammar
1959
1960         | Variable . IDENTIFIER ${ {
1961                 struct fieldref *fr = new_pos(fieldref, $2);
1962                 fr->left = $<1;
1963                 fr->name = $3.txt;
1964                 fr->index = -2;
1965                 $0 = fr;
1966         } }$
1967
1968 ###### print exec cases
1969
1970         case Xfieldref:
1971         {
1972                 struct fieldref *f = cast(fieldref, e);
1973                 print_exec(f->left, -1, bracket);
1974                 printf(".%.*s", f->name.len, f->name.txt);
1975                 break;
1976         }
1977
1978 ###### ast functions
1979         static int find_struct_index(struct type *type, struct text field)
1980         {
1981                 int i;
1982                 for (i = 0; i < type->structure.nfields; i++)
1983                         if (text_cmp(type->structure.fields[i].name, field) == 0)
1984                                 return i;
1985                 return -1;
1986         }
1987
1988 ###### propagate exec cases
1989
1990         case Xfieldref:
1991         {
1992                 struct fieldref *f = cast(fieldref, prog);
1993                 struct type *st = propagate_types(f->left, c, ok, NULL, 0);
1994
1995                 if (!st)
1996                         type_err(c, "error: unknown type for field access", f->left,
1997                                  NULL, 0, NULL);
1998                 else if (st->init != structure_init)
1999                         type_err(c, "error: field reference attempted on %1, not a struct",
2000                                  f->left, st, 0, NULL);
2001                 else if (f->index == -2) {
2002                         f->index = find_struct_index(st, f->name);
2003                         if (f->index < 0)
2004                                 type_err(c, "error: cannot find requested field in %1",
2005                                          f->left, st, 0, NULL);
2006                 }
2007                 if (f->index >= 0) {
2008                         struct type *ft = st->structure.fields[f->index].type;
2009                         if (!type_compat(type, ft, rules))
2010                                 type_err(c, "error: have %1 but need %2", prog,
2011                                          ft, rules, type);
2012                         return ft;
2013                 }
2014                 break;
2015         }
2016
2017 ###### interp exec cases
2018         case Xfieldref:
2019         {
2020                 struct fieldref *f = cast(fieldref, e);
2021                 struct type *ltype;
2022                 struct value *lleft = linterp_exec(c, f->left, &ltype);
2023                 lrv = (void*)lleft->ptr + ltype->structure.fields[f->index].offset;
2024                 rvtype = ltype->structure.fields[f->index].type;
2025                 break;
2026         }
2027
2028 ###### ast
2029         struct fieldlist {
2030                 struct fieldlist *prev;
2031                 struct field f;
2032         };
2033
2034 ###### ast functions
2035         static void free_fieldlist(struct fieldlist *f)
2036         {
2037                 if (!f)
2038                         return;
2039                 free_fieldlist(f->prev);
2040                 if (f->f.init) {
2041                         free_value(f->f.type, f->f.init);
2042                         free(f->f.init);
2043                 }
2044                 free(f);
2045         }
2046
2047 ###### top level grammar
2048         DeclareStruct -> struct IDENTIFIER FieldBlock Newlines ${ {
2049                         struct type *t =
2050                                 add_type(c, $2.txt, &structure_prototype);
2051                         int cnt = 0;
2052                         struct fieldlist *f;
2053
2054                         for (f = $3; f; f=f->prev)
2055                                 cnt += 1;
2056
2057                         t->structure.nfields = cnt;
2058                         t->structure.fields = calloc(cnt, sizeof(struct field));
2059                         f = $3;
2060                         while (cnt > 0) {
2061                                 int a = f->f.type->align;
2062                                 cnt -= 1;
2063                                 t->structure.fields[cnt] = f->f;
2064                                 if (t->size & (a-1))
2065                                         t->size = (t->size | (a-1)) + 1;
2066                                 t->structure.fields[cnt].offset = t->size;
2067                                 t->size += ((f->f.type->size - 1) | (a-1)) + 1;
2068                                 if (a > t->align)
2069                                         t->align = a;
2070                                 f->f.init = NULL;
2071                                 f = f->prev;
2072                         }
2073                 } }$
2074
2075         $*fieldlist
2076         FieldBlock -> { IN OptNL FieldLines OUT OptNL } ${ $0 = $<FL; }$
2077                 | { SimpleFieldList } ${ $0 = $<SFL; }$
2078                 | IN OptNL FieldLines OUT ${ $0 = $<FL; }$
2079                 | SimpleFieldList EOL ${ $0 = $<SFL; }$
2080
2081         FieldLines -> SimpleFieldList Newlines ${ $0 = $<SFL; }$
2082                 | FieldLines SimpleFieldList Newlines ${
2083                         $SFL->prev = $<FL;
2084                         $0 = $<SFL;
2085                 }$
2086
2087         SimpleFieldList -> Field ${ $0 = $<F; }$
2088                 | SimpleFieldList ; Field ${
2089                         $F->prev = $<SFL;
2090                         $0 = $<F;
2091                 }$
2092                 | SimpleFieldList ; ${
2093                         $0 = $<SFL;
2094                 }$
2095                 | ERROR ${ tok_err(c, "Syntax error in struct field", &$1); }$
2096
2097         Field -> IDENTIFIER : Type = Expression ${ {
2098                         int ok;
2099
2100                         $0 = calloc(1, sizeof(struct fieldlist));
2101                         $0->f.name = $1.txt;
2102                         $0->f.type = $<3;
2103                         $0->f.init = NULL;
2104                         do {
2105                                 ok = 1;
2106                                 propagate_types($<5, c, &ok, $3, 0);
2107                         } while (ok == 2);
2108                         if (!ok)
2109                                 c->parse_error = 1;
2110                         else {
2111                                 struct value vl = interp_exec(c, $5, NULL);
2112                                 $0->f.init = global_alloc(c, $0->f.type, NULL, &vl);
2113                         }
2114                 } }$
2115                 | IDENTIFIER : Type ${
2116                         $0 = calloc(1, sizeof(struct fieldlist));
2117                         $0->f.name = $1.txt;
2118                         $0->f.type = $<3;
2119                         if ($0->f.type->prepare_type)
2120                                 $0->f.type->prepare_type(c, $0->f.type, 1);
2121                 }$
2122
2123 ###### forward decls
2124         static void structure_print_type(struct type *t, FILE *f);
2125
2126 ###### value functions
2127         static void structure_print_type(struct type *t, FILE *f)
2128         {
2129                 int i;
2130
2131                 fprintf(f, "struct %.*s\n", t->name.len, t->name.txt);
2132
2133                 for (i = 0; i < t->structure.nfields; i++) {
2134                         struct field *fl = t->structure.fields + i;
2135                         fprintf(f, "    %.*s : ", fl->name.len, fl->name.txt);
2136                         type_print(fl->type, f);
2137                         if (fl->type->print && fl->init) {
2138                                 fprintf(f, " = ");
2139                                 if (fl->type == Tstr)
2140                                         fprintf(f, "\"");
2141                                 print_value(fl->type, fl->init);
2142                                 if (fl->type == Tstr)
2143                                         fprintf(f, "\"");
2144                         }
2145                         printf("\n");
2146                 }
2147         }
2148
2149 ###### print type decls
2150         {
2151                 struct type *t;
2152                 int target = -1;
2153
2154                 while (target != 0) {
2155                         int i = 0;
2156                         for (t = context.typelist; t ; t=t->next)
2157                                 if (t->print_type_decl) {
2158                                         i += 1;
2159                                         if (i == target)
2160                                                 break;
2161                                 }
2162
2163                         if (target == -1) {
2164                                 target = i;
2165                         } else {
2166                                 t->print_type_decl(t, stdout);
2167                                 target -= 1;
2168                         }
2169                 }
2170         }
2171
2172 ## Executables: the elements of code
2173
2174 Each code element needs to be parsed, printed, analysed,
2175 interpreted, and freed.  There are several, so let's just start with
2176 the easy ones and work our way up.
2177
2178 ### Values
2179
2180 We have already met values as separate objects.  When manifest
2181 constants appear in the program text, that must result in an executable
2182 which has a constant value.  So the `val` structure embeds a value in
2183 an executable.
2184
2185 ###### exec type
2186         Xval,
2187
2188 ###### ast
2189         struct val {
2190                 struct exec;
2191                 struct type *vtype;
2192                 struct value val;
2193         };
2194
2195 ###### ast functions
2196         struct val *new_val(struct type *T, struct token tk)
2197         {
2198                 struct val *v = new_pos(val, tk);
2199                 v->vtype = T;
2200                 return v;
2201         }
2202
2203 ###### Grammar
2204
2205         $TERM True False
2206
2207         $*val
2208         Value ->  True ${
2209                         $0 = new_val(Tbool, $1);
2210                         $0->val.bool = 1;
2211                         }$
2212                 | False ${
2213                         $0 = new_val(Tbool, $1);
2214                         $0->val.bool = 0;
2215                         }$
2216                 | NUMBER ${
2217                         $0 = new_val(Tnum, $1);
2218                         {
2219                         char tail[3];
2220                         if (number_parse($0->val.num, tail, $1.txt) == 0)
2221                                 mpq_init($0->val.num);
2222                                 if (tail[0])
2223                                         tok_err(c, "error: unsupported number suffix",
2224                                                 &$1);
2225                         }
2226                         }$
2227                 | STRING ${
2228                         $0 = new_val(Tstr, $1);
2229                         {
2230                         char tail[3];
2231                         string_parse(&$1, '\\', &$0->val.str, tail);
2232                         if (tail[0])
2233                                 tok_err(c, "error: unsupported string suffix",
2234                                         &$1);
2235                         }
2236                         }$
2237                 | MULTI_STRING ${
2238                         $0 = new_val(Tstr, $1);
2239                         {
2240                         char tail[3];
2241                         string_parse(&$1, '\\', &$0->val.str, tail);
2242                         if (tail[0])
2243                                 tok_err(c, "error: unsupported string suffix",
2244                                         &$1);
2245                         }
2246                         }$
2247
2248 ###### print exec cases
2249         case Xval:
2250         {
2251                 struct val *v = cast(val, e);
2252                 if (v->vtype == Tstr)
2253                         printf("\"");
2254                 print_value(v->vtype, &v->val);
2255                 if (v->vtype == Tstr)
2256                         printf("\"");
2257                 break;
2258         }
2259
2260 ###### propagate exec cases
2261         case Xval:
2262         {
2263                 struct val *val = cast(val, prog);
2264                 if (!type_compat(type, val->vtype, rules))
2265                         type_err(c, "error: expected %1%r found %2",
2266                                    prog, type, rules, val->vtype);
2267                 return val->vtype;
2268         }
2269
2270 ###### interp exec cases
2271         case Xval:
2272                 rvtype = cast(val, e)->vtype;
2273                 dup_value(rvtype, &cast(val, e)->val, &rv);
2274                 break;
2275
2276 ###### ast functions
2277         static void free_val(struct val *v)
2278         {
2279                 if (v)
2280                         free_value(v->vtype, &v->val);
2281                 free(v);
2282         }
2283
2284 ###### free exec cases
2285         case Xval: free_val(cast(val, e)); break;
2286
2287 ###### ast functions
2288         // Move all nodes from 'b' to 'rv', reversing their order.
2289         // In 'b' 'left' is a list, and 'right' is the last node.
2290         // In 'rv', left' is the first node and 'right' is a list.
2291         static struct binode *reorder_bilist(struct binode *b)
2292         {
2293                 struct binode *rv = NULL;
2294
2295                 while (b) {
2296                         struct exec *t = b->right;
2297                         b->right = rv;
2298                         rv = b;
2299                         if (b->left)
2300                                 b = cast(binode, b->left);
2301                         else
2302                                 b = NULL;
2303                         rv->left = t;
2304                 }
2305                 return rv;
2306         }
2307
2308 ### Variables
2309
2310 Just as we used a `val` to wrap a value into an `exec`, we similarly
2311 need a `var` to wrap a `variable` into an exec.  While each `val`
2312 contained a copy of the value, each `var` holds a link to the variable
2313 because it really is the same variable no matter where it appears.
2314 When a variable is used, we need to remember to follow the `->merged`
2315 link to find the primary instance.
2316
2317 ###### exec type
2318         Xvar,
2319
2320 ###### ast
2321         struct var {
2322                 struct exec;
2323                 struct variable *var;
2324         };
2325
2326 ###### Grammar
2327
2328         $TERM : ::
2329
2330         $*var
2331         VariableDecl -> IDENTIFIER : ${ {
2332                 struct variable *v = var_decl(c, $1.txt);
2333                 $0 = new_pos(var, $1);
2334                 $0->var = v;
2335                 if (v)
2336                         v->where_decl = $0;
2337                 else {
2338                         v = var_ref(c, $1.txt);
2339                         $0->var = v;
2340                         type_err(c, "error: variable '%v' redeclared",
2341                                  $0, NULL, 0, NULL);
2342                         type_err(c, "info: this is where '%v' was first declared",
2343                                  v->where_decl, NULL, 0, NULL);
2344                 }
2345         } }$
2346             | IDENTIFIER :: ${ {
2347                 struct variable *v = var_decl(c, $1.txt);
2348                 $0 = new_pos(var, $1);
2349                 $0->var = v;
2350                 if (v) {
2351                         v->where_decl = $0;
2352                         v->constant = 1;
2353                 } else {
2354                         v = var_ref(c, $1.txt);
2355                         $0->var = v;
2356                         type_err(c, "error: variable '%v' redeclared",
2357                                  $0, NULL, 0, NULL);
2358                         type_err(c, "info: this is where '%v' was first declared",
2359                                  v->where_decl, NULL, 0, NULL);
2360                 }
2361         } }$
2362             | IDENTIFIER : Type ${ {
2363                 struct variable *v = var_decl(c, $1.txt);
2364                 $0 = new_pos(var, $1);
2365                 $0->var = v;
2366                 if (v) {
2367                         v->where_decl = $0;
2368                         v->where_set = $0;
2369                         v->type = $<Type;
2370                 } else {
2371                         v = var_ref(c, $1.txt);
2372                         $0->var = v;
2373                         type_err(c, "error: variable '%v' redeclared",
2374                                  $0, NULL, 0, NULL);
2375                         type_err(c, "info: this is where '%v' was first declared",
2376                                  v->where_decl, NULL, 0, NULL);
2377                 }
2378         } }$
2379             | IDENTIFIER :: Type ${ {
2380                 struct variable *v = var_decl(c, $1.txt);
2381                 $0 = new_pos(var, $1);
2382                 $0->var = v;
2383                 if (v) {
2384                         v->where_decl = $0;
2385                         v->where_set = $0;
2386                         v->type = $<Type;
2387                         v->constant = 1;
2388                 } else {
2389                         v = var_ref(c, $1.txt);
2390                         $0->var = v;
2391                         type_err(c, "error: variable '%v' redeclared",
2392                                  $0, NULL, 0, NULL);
2393                         type_err(c, "info: this is where '%v' was first declared",
2394                                  v->where_decl, NULL, 0, NULL);
2395                 }
2396         } }$
2397
2398         $*exec
2399         Variable -> IDENTIFIER ${ {
2400                 struct variable *v = var_ref(c, $1.txt);
2401                 $0 = new_pos(var, $1);
2402                 if (v == NULL) {
2403                         /* This might be a label - allocate a var just in case */
2404                         v = var_decl(c, $1.txt);
2405                         if (v) {
2406                                 v->type = Tnone;
2407                                 v->where_decl = $0;
2408                                 v->where_set = $0;
2409                         }
2410                 }
2411                 cast(var, $0)->var = v;
2412         } }$
2413         ## variable grammar
2414
2415         $*type
2416         Type -> IDENTIFIER ${
2417                 $0 = find_type(c, $1.txt);
2418                 if (!$0) {
2419                         tok_err(c,
2420                                 "error: undefined type", &$1);
2421
2422                         $0 = Tnone;
2423                 }
2424         }$
2425         ## type grammar
2426
2427 ###### print exec cases
2428         case Xvar:
2429         {
2430                 struct var *v = cast(var, e);
2431                 if (v->var) {
2432                         struct binding *b = v->var->name;
2433                         printf("%.*s", b->name.len, b->name.txt);
2434                 }
2435                 break;
2436         }
2437
2438 ###### format cases
2439         case 'v':
2440                 if (loc->type == Xvar) {
2441                         struct var *v = cast(var, loc);
2442                         if (v->var) {
2443                                 struct binding *b = v->var->name;
2444                                 fprintf(stderr, "%.*s", b->name.len, b->name.txt);
2445                         } else
2446                                 fputs("???", stderr);   // NOTEST
2447                 } else
2448                         fputs("NOTVAR", stderr);        // NOTEST
2449                 break;
2450
2451 ###### propagate exec cases
2452
2453         case Xvar:
2454         {
2455                 struct var *var = cast(var, prog);
2456                 struct variable *v = var->var;
2457                 if (!v) {
2458                         type_err(c, "%d:BUG: no variable!!", prog, NULL, 0, NULL); // NOTEST
2459                         return Tnone;                                   // NOTEST
2460                 }
2461                 if (v->merged)
2462                         v = v->merged;
2463                 if (v->constant && (rules & Rnoconstant)) {
2464                         type_err(c, "error: Cannot assign to a constant: %v",
2465                                  prog, NULL, 0, NULL);
2466                         type_err(c, "info: name was defined as a constant here",
2467                                  v->where_decl, NULL, 0, NULL);
2468                         return v->type;
2469                 }
2470                 if (v->type == Tnone && v->where_decl == prog)
2471                         type_err(c, "error: variable used but not declared: %v",
2472                                  prog, NULL, 0, NULL);
2473                 if (v->type == NULL) {
2474                         if (type && *ok != 0) {
2475                                 v->type = type;
2476                                 v->where_set = prog;
2477                                 *ok = 2;
2478                         }
2479                         return type;
2480                 }
2481                 if (!type_compat(type, v->type, rules)) {
2482                         type_err(c, "error: expected %1%r but variable '%v' is %2", prog,
2483                                  type, rules, v->type);
2484                         type_err(c, "info: this is where '%v' was set to %1", v->where_set,
2485                                  v->type, rules, NULL);
2486                 }
2487                 if (!type)
2488                         return v->type;
2489                 return type;
2490         }
2491
2492 ###### interp exec cases
2493         case Xvar:
2494         {
2495                 struct var *var = cast(var, e);
2496                 struct variable *v = var->var;
2497
2498                 if (v->merged)
2499                         v = v->merged;
2500                 lrv = var_value(c, v);
2501                 rvtype = v->type;
2502                 break;
2503         }
2504
2505 ###### ast functions
2506
2507         static void free_var(struct var *v)
2508         {
2509                 free(v);
2510         }
2511
2512 ###### free exec cases
2513         case Xvar: free_var(cast(var, e)); break;
2514
2515 ### Expressions: Conditional
2516
2517 Our first user of the `binode` will be conditional expressions, which
2518 is a bit odd as they actually have three components.  That will be
2519 handled by having 2 binodes for each expression.  The conditional
2520 expression is the lowest precedence operator which is why we define it
2521 first - to start the precedence list.
2522
2523 Conditional expressions are of the form "value `if` condition `else`
2524 other_value".  They associate to the right, so everything to the right
2525 of `else` is part of an else value, while only a higher-precedence to
2526 the left of `if` is the if values.  Between `if` and `else` there is no
2527 room for ambiguity, so a full conditional expression is allowed in
2528 there.
2529
2530 ###### Binode types
2531         CondExpr,
2532
2533 ###### Grammar
2534
2535         $LEFT if $$ifelse
2536         ## expr precedence
2537
2538         $*exec
2539         Expression -> Expression if Expression else Expression $$ifelse ${ {
2540                         struct binode *b1 = new(binode);
2541                         struct binode *b2 = new(binode);
2542                         b1->op = CondExpr;
2543                         b1->left = $<3;
2544                         b1->right = b2;
2545                         b2->op = CondExpr;
2546                         b2->left = $<1;
2547                         b2->right = $<5;
2548                         $0 = b1;
2549                 } }$
2550                 ## expression grammar
2551
2552 ###### print binode cases
2553
2554         case CondExpr:
2555                 b2 = cast(binode, b->right);
2556                 if (bracket) printf("(");
2557                 print_exec(b2->left, -1, bracket);
2558                 printf(" if ");
2559                 print_exec(b->left, -1, bracket);
2560                 printf(" else ");
2561                 print_exec(b2->right, -1, bracket);
2562                 if (bracket) printf(")");
2563                 break;
2564
2565 ###### propagate binode cases
2566
2567         case CondExpr: {
2568                 /* cond must be Tbool, others must match */
2569                 struct binode *b2 = cast(binode, b->right);
2570                 struct type *t2;
2571
2572                 propagate_types(b->left, c, ok, Tbool, 0);
2573                 t = propagate_types(b2->left, c, ok, type, Rnolabel);
2574                 t2 = propagate_types(b2->right, c, ok, type ?: t, Rnolabel);
2575                 return t ?: t2;
2576         }
2577
2578 ###### interp binode cases
2579
2580         case CondExpr: {
2581                 struct binode *b2 = cast(binode, b->right);
2582                 left = interp_exec(c, b->left, &ltype);
2583                 if (left.bool)
2584                         rv = interp_exec(c, b2->left, &rvtype);
2585                 else
2586                         rv = interp_exec(c, b2->right, &rvtype);
2587                 }
2588                 break;
2589
2590 ### Expressions: Boolean
2591
2592 The next class of expressions to use the `binode` will be Boolean
2593 expressions.  "`and then`" and "`or else`" are similar to `and` and `or`
2594 have same corresponding precendence.  The difference is that they don't
2595 evaluate the second expression if not necessary.
2596
2597 ###### Binode types
2598         And,
2599         AndThen,
2600         Or,
2601         OrElse,
2602         Not,
2603
2604 ###### expr precedence
2605         $LEFT or
2606         $LEFT and
2607         $LEFT not
2608
2609 ###### expression grammar
2610                 | Expression or Expression ${ {
2611                         struct binode *b = new(binode);
2612                         b->op = Or;
2613                         b->left = $<1;
2614                         b->right = $<3;
2615                         $0 = b;
2616                 } }$
2617                 | Expression or else Expression ${ {
2618                         struct binode *b = new(binode);
2619                         b->op = OrElse;
2620                         b->left = $<1;
2621                         b->right = $<4;
2622                         $0 = b;
2623                 } }$
2624
2625                 | Expression and Expression ${ {
2626                         struct binode *b = new(binode);
2627                         b->op = And;
2628                         b->left = $<1;
2629                         b->right = $<3;
2630                         $0 = b;
2631                 } }$
2632                 | Expression and then Expression ${ {
2633                         struct binode *b = new(binode);
2634                         b->op = AndThen;
2635                         b->left = $<1;
2636                         b->right = $<4;
2637                         $0 = b;
2638                 } }$
2639
2640                 | not Expression ${ {
2641                         struct binode *b = new(binode);
2642                         b->op = Not;
2643                         b->right = $<2;
2644                         $0 = b;
2645                 } }$
2646
2647 ###### print binode cases
2648         case And:
2649                 if (bracket) printf("(");
2650                 print_exec(b->left, -1, bracket);
2651                 printf(" and ");
2652                 print_exec(b->right, -1, bracket);
2653                 if (bracket) printf(")");
2654                 break;
2655         case AndThen:
2656                 if (bracket) printf("(");
2657                 print_exec(b->left, -1, bracket);
2658                 printf(" and then ");
2659                 print_exec(b->right, -1, bracket);
2660                 if (bracket) printf(")");
2661                 break;
2662         case Or:
2663                 if (bracket) printf("(");
2664                 print_exec(b->left, -1, bracket);
2665                 printf(" or ");
2666                 print_exec(b->right, -1, bracket);
2667                 if (bracket) printf(")");
2668                 break;
2669         case OrElse:
2670                 if (bracket) printf("(");
2671                 print_exec(b->left, -1, bracket);
2672                 printf(" or else ");
2673                 print_exec(b->right, -1, bracket);
2674                 if (bracket) printf(")");
2675                 break;
2676         case Not:
2677                 if (bracket) printf("(");
2678                 printf("not ");
2679                 print_exec(b->right, -1, bracket);
2680                 if (bracket) printf(")");
2681                 break;
2682
2683 ###### propagate binode cases
2684         case And:
2685         case AndThen:
2686         case Or:
2687         case OrElse:
2688         case Not:
2689                 /* both must be Tbool, result is Tbool */
2690                 propagate_types(b->left, c, ok, Tbool, 0);
2691                 propagate_types(b->right, c, ok, Tbool, 0);
2692                 if (type && type != Tbool)
2693                         type_err(c, "error: %1 operation found where %2 expected", prog,
2694                                    Tbool, 0, type);
2695                 return Tbool;
2696
2697 ###### interp binode cases
2698         case And:
2699                 rv = interp_exec(c, b->left, &rvtype);
2700                 right = interp_exec(c, b->right, &rtype);
2701                 rv.bool = rv.bool && right.bool;
2702                 break;
2703         case AndThen:
2704                 rv = interp_exec(c, b->left, &rvtype);
2705                 if (rv.bool)
2706                         rv = interp_exec(c, b->right, NULL);
2707                 break;
2708         case Or:
2709                 rv = interp_exec(c, b->left, &rvtype);
2710                 right = interp_exec(c, b->right, &rtype);
2711                 rv.bool = rv.bool || right.bool;
2712                 break;
2713         case OrElse:
2714                 rv = interp_exec(c, b->left, &rvtype);
2715                 if (!rv.bool)
2716                         rv = interp_exec(c, b->right, NULL);
2717                 break;
2718         case Not:
2719                 rv = interp_exec(c, b->right, &rvtype);
2720                 rv.bool = !rv.bool;
2721                 break;
2722
2723 ### Expressions: Comparison
2724
2725 Of slightly higher precedence that Boolean expressions are Comparisons.
2726 A comparison takes arguments of any comparable type, but the two types
2727 must be the same.
2728
2729 To simplify the parsing we introduce an `eop` which can record an
2730 expression operator, and the `CMPop` non-terminal will match one of them.
2731
2732 ###### ast
2733         struct eop {
2734                 enum Btype op;
2735         };
2736
2737 ###### ast functions
2738         static void free_eop(struct eop *e)
2739         {
2740                 if (e)
2741                         free(e);
2742         }
2743
2744 ###### Binode types
2745         Less,
2746         Gtr,
2747         LessEq,
2748         GtrEq,
2749         Eql,
2750         NEql,
2751
2752 ###### expr precedence
2753         $LEFT < > <= >= == != CMPop
2754
2755 ###### expression grammar
2756         | Expression CMPop Expression ${ {
2757                 struct binode *b = new(binode);
2758                 b->op = $2.op;
2759                 b->left = $<1;
2760                 b->right = $<3;
2761                 $0 = b;
2762         } }$
2763
2764 ###### Grammar
2765
2766         $eop
2767         CMPop ->   < ${ $0.op = Less; }$
2768                 |  > ${ $0.op = Gtr; }$
2769                 |  <= ${ $0.op = LessEq; }$
2770                 |  >= ${ $0.op = GtrEq; }$
2771                 |  == ${ $0.op = Eql; }$
2772                 |  != ${ $0.op = NEql; }$
2773
2774 ###### print binode cases
2775
2776         case Less:
2777         case LessEq:
2778         case Gtr:
2779         case GtrEq:
2780         case Eql:
2781         case NEql:
2782                 if (bracket) printf("(");
2783                 print_exec(b->left, -1, bracket);
2784                 switch(b->op) {
2785                 case Less:   printf(" < "); break;
2786                 case LessEq: printf(" <= "); break;
2787                 case Gtr:    printf(" > "); break;
2788                 case GtrEq:  printf(" >= "); break;
2789                 case Eql:    printf(" == "); break;
2790                 case NEql:   printf(" != "); break;
2791                 default: abort();               // NOTEST
2792                 }
2793                 print_exec(b->right, -1, bracket);
2794                 if (bracket) printf(")");
2795                 break;
2796
2797 ###### propagate binode cases
2798         case Less:
2799         case LessEq:
2800         case Gtr:
2801         case GtrEq:
2802         case Eql:
2803         case NEql:
2804                 /* Both must match but not be labels, result is Tbool */
2805                 t = propagate_types(b->left, c, ok, NULL, Rnolabel);
2806                 if (t)
2807                         propagate_types(b->right, c, ok, t, 0);
2808                 else {
2809                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
2810                         if (t)
2811                                 t = propagate_types(b->left, c, ok, t, 0);
2812                 }
2813                 if (!type_compat(type, Tbool, 0))
2814                         type_err(c, "error: Comparison returns %1 but %2 expected", prog,
2815                                     Tbool, rules, type);
2816                 return Tbool;
2817
2818 ###### interp binode cases
2819         case Less:
2820         case LessEq:
2821         case Gtr:
2822         case GtrEq:
2823         case Eql:
2824         case NEql:
2825         {
2826                 int cmp;
2827                 left = interp_exec(c, b->left, &ltype);
2828                 right = interp_exec(c, b->right, &rtype);
2829                 cmp = value_cmp(ltype, rtype, &left, &right);
2830                 rvtype = Tbool;
2831                 switch (b->op) {
2832                 case Less:      rv.bool = cmp <  0; break;
2833                 case LessEq:    rv.bool = cmp <= 0; break;
2834                 case Gtr:       rv.bool = cmp >  0; break;
2835                 case GtrEq:     rv.bool = cmp >= 0; break;
2836                 case Eql:       rv.bool = cmp == 0; break;
2837                 case NEql:      rv.bool = cmp != 0; break;
2838                 default:        rv.bool = 0; break;     // NOTEST
2839                 }
2840                 break;
2841         }
2842
2843 ### Expressions: The rest
2844
2845 The remaining expressions with the highest precedence are arithmetic,
2846 string concatenation, and string conversion.  String concatenation
2847 (`++`) has the same precedence as multiplication and division, but lower
2848 than the uniary.
2849
2850 String conversion is a temporary feature until I get a better type
2851 system.  `$` is a prefix operator which expects a string and returns
2852 a number.
2853
2854 `+` and `-` are both infix and prefix operations (where they are
2855 absolute value and negation).  These have different operator names.
2856
2857 We also have a 'Bracket' operator which records where parentheses were
2858 found.  This makes it easy to reproduce these when printing.  Possibly I
2859 should only insert brackets were needed for precedence.
2860
2861 ###### Binode types
2862         Plus, Minus,
2863         Times, Divide, Rem,
2864         Concat,
2865         Absolute, Negate,
2866         StringConv,
2867         Bracket,
2868
2869 ###### expr precedence
2870         $LEFT + - Eop
2871         $LEFT * / % ++ Top
2872         $LEFT Uop $
2873         $TERM ( )
2874
2875 ###### expression grammar
2876                 | Expression Eop Expression ${ {
2877                         struct binode *b = new(binode);
2878                         b->op = $2.op;
2879                         b->left = $<1;
2880                         b->right = $<3;
2881                         $0 = b;
2882                 } }$
2883
2884                 | Expression Top Expression ${ {
2885                         struct binode *b = new(binode);
2886                         b->op = $2.op;
2887                         b->left = $<1;
2888                         b->right = $<3;
2889                         $0 = b;
2890                 } }$
2891
2892                 | ( Expression ) ${ {
2893                         struct binode *b = new_pos(binode, $1);
2894                         b->op = Bracket;
2895                         b->right = $<2;
2896                         $0 = b;
2897                 } }$
2898                 | Uop Expression ${ {
2899                         struct binode *b = new(binode);
2900                         b->op = $1.op;
2901                         b->right = $<2;
2902                         $0 = b;
2903                 } }$
2904                 | Value ${ $0 = $<1; }$
2905                 | Variable ${ $0 = $<1; }$
2906
2907         $eop
2908         Eop ->    + ${ $0.op = Plus; }$
2909                 | - ${ $0.op = Minus; }$
2910
2911         Uop ->    + ${ $0.op = Absolute; }$
2912                 | - ${ $0.op = Negate; }$
2913                 | $ ${ $0.op = StringConv; }$
2914
2915         Top ->    * ${ $0.op = Times; }$
2916                 | / ${ $0.op = Divide; }$
2917                 | % ${ $0.op = Rem; }$
2918                 | ++ ${ $0.op = Concat; }$
2919
2920 ###### print binode cases
2921         case Plus:
2922         case Minus:
2923         case Times:
2924         case Divide:
2925         case Concat:
2926         case Rem:
2927                 if (bracket) printf("(");
2928                 print_exec(b->left, indent, bracket);
2929                 switch(b->op) {
2930                 case Plus:   fputs(" + ", stdout); break;
2931                 case Minus:  fputs(" - ", stdout); break;
2932                 case Times:  fputs(" * ", stdout); break;
2933                 case Divide: fputs(" / ", stdout); break;
2934                 case Rem:    fputs(" % ", stdout); break;
2935                 case Concat: fputs(" ++ ", stdout); break;
2936                 default: abort();       // NOTEST
2937                 }                       // NOTEST
2938                 print_exec(b->right, indent, bracket);
2939                 if (bracket) printf(")");
2940                 break;
2941         case Absolute:
2942         case Negate:
2943         case StringConv:
2944                 if (bracket) printf("(");
2945                 switch (b->op) {
2946                 case Absolute:   fputs("+", stdout); break;
2947                 case Negate:     fputs("-", stdout); break;
2948                 case StringConv: fputs("$", stdout); break;
2949                 default: abort();       // NOTEST
2950                 }                       // NOTEST
2951                 print_exec(b->right, indent, bracket);
2952                 if (bracket) printf(")");
2953                 break;
2954         case Bracket:
2955                 printf("(");
2956                 print_exec(b->right, indent, bracket);
2957                 printf(")");
2958                 break;
2959
2960 ###### propagate binode cases
2961         case Plus:
2962         case Minus:
2963         case Times:
2964         case Rem:
2965         case Divide:
2966                 /* both must be numbers, result is Tnum */
2967         case Absolute:
2968         case Negate:
2969                 /* as propagate_types ignores a NULL,
2970                  * unary ops fit here too */
2971                 propagate_types(b->left, c, ok, Tnum, 0);
2972                 propagate_types(b->right, c, ok, Tnum, 0);
2973                 if (!type_compat(type, Tnum, 0))
2974                         type_err(c, "error: Arithmetic returns %1 but %2 expected", prog,
2975                                    Tnum, rules, type);
2976                 return Tnum;
2977
2978         case Concat:
2979                 /* both must be Tstr, result is Tstr */
2980                 propagate_types(b->left, c, ok, Tstr, 0);
2981                 propagate_types(b->right, c, ok, Tstr, 0);
2982                 if (!type_compat(type, Tstr, 0))
2983                         type_err(c, "error: Concat returns %1 but %2 expected", prog,
2984                                    Tstr, rules, type);
2985                 return Tstr;
2986
2987         case StringConv:
2988                 /* op must be string, result is number */
2989                 propagate_types(b->left, c, ok, Tstr, 0);
2990                 if (!type_compat(type, Tnum, 0))
2991                         type_err(c,
2992                           "error: Can only convert string to number, not %1",
2993                                 prog, type, 0, NULL);
2994                 return Tnum;
2995
2996         case Bracket:
2997                 return propagate_types(b->right, c, ok, type, 0);
2998
2999 ###### interp binode cases
3000
3001         case Plus:
3002                 rv = interp_exec(c, b->left, &rvtype);
3003                 right = interp_exec(c, b->right, &rtype);
3004                 mpq_add(rv.num, rv.num, right.num);
3005                 break;
3006         case Minus:
3007                 rv = interp_exec(c, b->left, &rvtype);
3008                 right = interp_exec(c, b->right, &rtype);
3009                 mpq_sub(rv.num, rv.num, right.num);
3010                 break;
3011         case Times:
3012                 rv = interp_exec(c, b->left, &rvtype);
3013                 right = interp_exec(c, b->right, &rtype);
3014                 mpq_mul(rv.num, rv.num, right.num);
3015                 break;
3016         case Divide:
3017                 rv = interp_exec(c, b->left, &rvtype);
3018                 right = interp_exec(c, b->right, &rtype);
3019                 mpq_div(rv.num, rv.num, right.num);
3020                 break;
3021         case Rem: {
3022                 mpz_t l, r, rem;
3023
3024                 left = interp_exec(c, b->left, &ltype);
3025                 right = interp_exec(c, b->right, &rtype);
3026                 mpz_init(l); mpz_init(r); mpz_init(rem);
3027                 mpz_tdiv_q(l, mpq_numref(left.num), mpq_denref(left.num));
3028                 mpz_tdiv_q(r, mpq_numref(right.num), mpq_denref(right.num));
3029                 mpz_tdiv_r(rem, l, r);
3030                 val_init(Tnum, &rv);
3031                 mpq_set_z(rv.num, rem);
3032                 mpz_clear(r); mpz_clear(l); mpz_clear(rem);
3033                 rvtype = ltype;
3034                 break;
3035         }
3036         case Negate:
3037                 rv = interp_exec(c, b->right, &rvtype);
3038                 mpq_neg(rv.num, rv.num);
3039                 break;
3040         case Absolute:
3041                 rv = interp_exec(c, b->right, &rvtype);
3042                 mpq_abs(rv.num, rv.num);
3043                 break;
3044         case Bracket:
3045                 rv = interp_exec(c, b->right, &rvtype);
3046                 break;
3047         case Concat:
3048                 left = interp_exec(c, b->left, &ltype);
3049                 right = interp_exec(c, b->right, &rtype);
3050                 rvtype = Tstr;
3051                 rv.str = text_join(left.str, right.str);
3052                 break;
3053         case StringConv:
3054                 right = interp_exec(c, b->right, &rvtype);
3055                 rtype = Tstr;
3056                 rvtype = Tnum;
3057
3058                 struct text tx = right.str;
3059                 char tail[3];
3060                 int neg = 0;
3061                 if (tx.txt[0] == '-') {
3062                         neg = 1;
3063                         tx.txt++;
3064                         tx.len--;
3065                 }
3066                 if (number_parse(rv.num, tail, tx) == 0)
3067                         mpq_init(rv.num);
3068                 else if (neg)
3069                         mpq_neg(rv.num, rv.num);
3070                 if (tail[0])
3071                         printf("Unsupported suffix: %.*s\n", tx.len, tx.txt);
3072
3073                 break;
3074
3075 ###### value functions
3076
3077         static struct text text_join(struct text a, struct text b)
3078         {
3079                 struct text rv;
3080                 rv.len = a.len + b.len;
3081                 rv.txt = malloc(rv.len);
3082                 memcpy(rv.txt, a.txt, a.len);
3083                 memcpy(rv.txt+a.len, b.txt, b.len);
3084                 return rv;
3085         }
3086
3087 ### Blocks, Statements, and Statement lists.
3088
3089 Now that we have expressions out of the way we need to turn to
3090 statements.  There are simple statements and more complex statements.
3091 Simple statements do not contain (syntactic) newlines, complex statements do.
3092
3093 Statements often come in sequences and we have corresponding simple
3094 statement lists and complex statement lists.
3095 The former comprise only simple statements separated by semicolons.
3096 The later comprise complex statements and simple statement lists.  They are
3097 separated by newlines.  Thus the semicolon is only used to separate
3098 simple statements on the one line.  This may be overly restrictive,
3099 but I'm not sure I ever want a complex statement to share a line with
3100 anything else.
3101
3102 Note that a simple statement list can still use multiple lines if
3103 subsequent lines are indented, so
3104
3105 ###### Example: wrapped simple statement list
3106
3107         a = b; c = d;
3108            e = f; print g
3109
3110 is a single simple statement list.  This might allow room for
3111 confusion, so I'm not set on it yet.
3112
3113 A simple statement list needs no extra syntax.  A complex statement
3114 list has two syntactic forms.  It can be enclosed in braces (much like
3115 C blocks), or it can be introduced by an indent and continue until an
3116 unindented newline (much like Python blocks).  With this extra syntax
3117 it is referred to as a block.
3118
3119 Note that a block does not have to include any newlines if it only
3120 contains simple statements.  So both of:
3121
3122         if condition: a=b; d=f
3123
3124         if condition { a=b; print f }
3125
3126 are valid.
3127
3128 In either case the list is constructed from a `binode` list with
3129 `Block` as the operator.  When parsing the list it is most convenient
3130 to append to the end, so a list is a list and a statement.  When using
3131 the list it is more convenient to consider a list to be a statement
3132 and a list.  So we need a function to re-order a list.
3133 `reorder_bilist` serves this purpose.
3134
3135 The only stand-alone statement we introduce at this stage is `pass`
3136 which does nothing and is represented as a `NULL` pointer in a `Block`
3137 list.  Other stand-alone statements will follow once the infrastructure
3138 is in-place.
3139
3140 ###### Binode types
3141         Block,
3142
3143 ###### Grammar
3144
3145         $TERM { } ;
3146
3147         $*binode
3148         Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3149                 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3150                 | SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3151                 | SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3152                 | IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3153
3154         OpenBlock -> OpenScope { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3155                 | OpenScope { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3156                 | OpenScope SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3157                 | OpenScope SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3158                 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3159
3160         UseBlock -> { OpenScope IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3161                 | { OpenScope SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3162                 | IN OpenScope OptNL Statementlist OUT ${ $0 = $<Sl; }$
3163
3164         ColonBlock -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $<Sl; }$
3165                 | { SimpleStatements } ${ $0 = reorder_bilist($<SS); }$
3166                 | : SimpleStatements ; ${ $0 = reorder_bilist($<SS); }$
3167                 | : SimpleStatements EOL ${ $0 = reorder_bilist($<SS); }$
3168                 | : IN OptNL Statementlist OUT ${ $0 = $<Sl; }$
3169
3170         Statementlist -> ComplexStatements ${ $0 = reorder_bilist($<CS); }$
3171
3172         ComplexStatements -> ComplexStatements ComplexStatement ${
3173                         if ($2 == NULL) {
3174                                 $0 = $<1;
3175                         } else {
3176                                 $0 = new(binode);
3177                                 $0->op = Block;
3178                                 $0->left = $<1;
3179                                 $0->right = $<2;
3180                         }
3181                 }$
3182                 | ComplexStatement ${
3183                         if ($1 == NULL) {
3184                                 $0 = NULL;
3185                         } else {
3186                                 $0 = new(binode);
3187                                 $0->op = Block;
3188                                 $0->left = NULL;
3189                                 $0->right = $<1;
3190                         }
3191                 }$
3192
3193         $*exec
3194         ComplexStatement -> SimpleStatements Newlines ${
3195                         $0 = reorder_bilist($<SS);
3196                         }$
3197                 |  SimpleStatements ; Newlines ${
3198                         $0 = reorder_bilist($<SS);
3199                         }$
3200                 ## ComplexStatement Grammar
3201
3202         $*binode
3203         SimpleStatements -> SimpleStatements ; SimpleStatement ${
3204                         $0 = new(binode);
3205                         $0->op = Block;
3206                         $0->left = $<1;
3207                         $0->right = $<3;
3208                         }$
3209                 | SimpleStatement ${
3210                         $0 = new(binode);
3211                         $0->op = Block;
3212                         $0->left = NULL;
3213                         $0->right = $<1;
3214                         }$
3215
3216         $TERM pass
3217         SimpleStatement -> pass ${ $0 = NULL; }$
3218                 | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$
3219                 ## SimpleStatement Grammar
3220
3221 ###### print binode cases
3222         case Block:
3223                 if (indent < 0) {
3224                         // simple statement
3225                         if (b->left == NULL)
3226                                 printf("pass");
3227                         else
3228                                 print_exec(b->left, indent, bracket);
3229                         if (b->right) {
3230                                 printf("; ");
3231                                 print_exec(b->right, indent, bracket);
3232                         }
3233                 } else {
3234                         // block, one per line
3235                         if (b->left == NULL)
3236                                 do_indent(indent, "pass\n");
3237                         else
3238                                 print_exec(b->left, indent, bracket);
3239                         if (b->right)
3240                                 print_exec(b->right, indent, bracket);
3241                 }
3242                 break;
3243
3244 ###### propagate binode cases
3245         case Block:
3246         {
3247                 /* If any statement returns something other than Tnone
3248                  * or Tbool then all such must return same type.
3249                  * As each statement may be Tnone or something else,
3250                  * we must always pass NULL (unknown) down, otherwise an incorrect
3251                  * error might occur.  We never return Tnone unless it is
3252                  * passed in.
3253                  */
3254                 struct binode *e;
3255
3256                 for (e = b; e; e = cast(binode, e->right)) {
3257                         t = propagate_types(e->left, c, ok, NULL, rules);
3258                         if ((rules & Rboolok) && t == Tbool)
3259                                 t = NULL;
3260                         if (t && t != Tnone && t != Tbool) {
3261                                 if (!type)
3262                                         type = t;
3263                                 else if (t != type)
3264                                         type_err(c, "error: expected %1%r, found %2",
3265                                                  e->left, type, rules, t);
3266                         }
3267                 }
3268                 return type;
3269         }
3270
3271 ###### interp binode cases
3272         case Block:
3273                 while (rvtype == Tnone &&
3274                        b) {
3275                         if (b->left)
3276                                 rv = interp_exec(c, b->left, &rvtype);
3277                         b = cast(binode, b->right);
3278                 }
3279                 break;
3280
3281 ### The Print statement
3282
3283 `print` is a simple statement that takes a comma-separated list of
3284 expressions and prints the values separated by spaces and terminated
3285 by a newline.  No control of formatting is possible.
3286
3287 `print` faces the same list-ordering issue as blocks, and uses the
3288 same solution.
3289
3290 ###### Binode types
3291         Print,
3292
3293 ##### expr precedence
3294         $TERM print ,
3295
3296 ###### SimpleStatement Grammar
3297
3298         | print ExpressionList ${
3299                 $0 = reorder_bilist($<2);
3300         }$
3301         | print ExpressionList , ${
3302                 $0 = new(binode);
3303                 $0->op = Print;
3304                 $0->right = NULL;
3305                 $0->left = $<2;
3306                 $0 = reorder_bilist($0);
3307         }$
3308         | print ${
3309                 $0 = new(binode);
3310                 $0->op = Print;
3311                 $0->right = NULL;
3312         }$
3313
3314 ###### Grammar
3315
3316         $*binode
3317         ExpressionList -> ExpressionList , Expression ${
3318                 $0 = new(binode);
3319                 $0->op = Print;
3320                 $0->left = $<1;
3321                 $0->right = $<3;
3322                 }$
3323                 | Expression ${
3324                         $0 = new(binode);
3325                         $0->op = Print;
3326                         $0->left = NULL;
3327                         $0->right = $<1;
3328                 }$
3329
3330 ###### print binode cases
3331
3332         case Print:
3333                 do_indent(indent, "print");
3334                 while (b) {
3335                         if (b->left) {
3336                                 printf(" ");
3337                                 print_exec(b->left, -1, bracket);
3338                                 if (b->right)
3339                                         printf(",");
3340                         }
3341                         b = cast(binode, b->right);
3342                 }
3343                 if (indent >= 0)
3344                         printf("\n");
3345                 break;
3346
3347 ###### propagate binode cases
3348
3349         case Print:
3350                 /* don't care but all must be consistent */
3351                 propagate_types(b->left, c, ok, NULL, Rnolabel);
3352                 propagate_types(b->right, c, ok, NULL, Rnolabel);
3353                 break;
3354
3355 ###### interp binode cases
3356
3357         case Print:
3358         {
3359                 char sep = 0;
3360                 int eol = 1;
3361                 for ( ; b; b = cast(binode, b->right))
3362                         if (b->left) {
3363                                 if (sep)
3364                                         putchar(sep);
3365                                 left = interp_exec(c, b->left, &ltype);
3366                                 print_value(ltype, &left);
3367                                 free_value(ltype, &left);
3368                                 if (b->right)
3369                                         sep = ' ';
3370                         } else if (sep)
3371                                 eol = 0;
3372                 ltype = Tnone;
3373                 if (eol)
3374                         printf("\n");
3375                 break;
3376         }
3377
3378 ###### Assignment statement
3379
3380 An assignment will assign a value to a variable, providing it hasn't
3381 been declared as a constant.  The analysis phase ensures that the type
3382 will be correct so the interpreter just needs to perform the
3383 calculation.  There is a form of assignment which declares a new
3384 variable as well as assigning a value.  If a name is assigned before
3385 it is declared, and error will be raised as the name is created as
3386 `Tlabel` and it is illegal to assign to such names.
3387
3388 ###### Binode types
3389         Assign,
3390         Declare,
3391
3392 ###### declare terminals
3393         $TERM =
3394
3395 ###### SimpleStatement Grammar
3396         | Variable = Expression ${
3397                         $0 = new(binode);
3398                         $0->op = Assign;
3399                         $0->left = $<1;
3400                         $0->right = $<3;
3401                 }$
3402         | VariableDecl = Expression ${
3403                         $0 = new(binode);
3404                         $0->op = Declare;
3405                         $0->left = $<1;
3406                         $0->right =$<3;
3407                 }$
3408
3409         | VariableDecl ${
3410                         if ($1->var->where_set == NULL) {
3411                                 type_err(c,
3412                                          "Variable declared with no type or value: %v",
3413                                          $1, NULL, 0, NULL);
3414                         } else {
3415                                 $0 = new(binode);
3416                                 $0->op = Declare;
3417                                 $0->left = $<1;
3418                                 $0->right = NULL;
3419                         }
3420                 }$
3421
3422 ###### print binode cases
3423
3424         case Assign:
3425                 do_indent(indent, "");
3426                 print_exec(b->left, indent, bracket);
3427                 printf(" = ");
3428                 print_exec(b->right, indent, bracket);
3429                 if (indent >= 0)
3430                         printf("\n");
3431                 break;
3432
3433         case Declare:
3434                 {
3435                 struct variable *v = cast(var, b->left)->var;
3436                 do_indent(indent, "");
3437                 print_exec(b->left, indent, bracket);
3438                 if (cast(var, b->left)->var->constant) {
3439                         if (v->where_decl == v->where_set) {
3440                                 printf("::");
3441                                 type_print(v->type, stdout);
3442                                 printf(" ");
3443                         } else
3444                                 printf(" ::");
3445                 } else {
3446                         if (v->where_decl == v->where_set) {
3447                                 printf(":");
3448                                 type_print(v->type, stdout);
3449                                 printf(" ");
3450                         } else
3451                                 printf(" :");
3452                 }
3453                 if (b->right) {
3454                         printf("= ");
3455                         print_exec(b->right, indent, bracket);
3456                 }
3457                 if (indent >= 0)
3458                         printf("\n");
3459                 }
3460                 break;
3461
3462 ###### propagate binode cases
3463
3464         case Assign:
3465         case Declare:
3466                 /* Both must match and not be labels,
3467                  * Type must support 'dup',
3468                  * For Assign, left must not be constant.
3469                  * result is Tnone
3470                  */
3471                 t = propagate_types(b->left, c, ok, NULL,
3472                                     Rnolabel | (b->op == Assign ? Rnoconstant : 0));
3473                 if (!b->right)
3474                         return Tnone;
3475
3476                 if (t) {
3477                         if (propagate_types(b->right, c, ok, t, 0) != t)
3478                                 if (b->left->type == Xvar)
3479                                         type_err(c, "info: variable '%v' was set as %1 here.",
3480                                                  cast(var, b->left)->var->where_set, t, rules, NULL);
3481                 } else {
3482                         t = propagate_types(b->right, c, ok, NULL, Rnolabel);
3483                         if (t)
3484                                 propagate_types(b->left, c, ok, t,
3485                                                 (b->op == Assign ? Rnoconstant : 0));
3486                 }
3487                 if (t && t->dup == NULL)
3488                         type_err(c, "error: cannot assign value of type %1", b, t, 0, NULL);
3489                 return Tnone;
3490
3491                 break;
3492
3493 ###### interp binode cases
3494
3495         case Assign:
3496                 lleft = linterp_exec(c, b->left, &ltype);
3497                 right = interp_exec(c, b->right, &rtype);
3498                 if (lleft) {
3499                         free_value(ltype, lleft);
3500                         dup_value(ltype, &right, lleft);
3501                         ltype = NULL;
3502                 }
3503                 break;
3504
3505         case Declare:
3506         {
3507                 struct variable *v = cast(var, b->left)->var;
3508                 struct value *val;
3509                 if (v->merged)
3510                         v = v->merged;
3511                 val = var_value(c, v);
3512                 free_value(v->type, val);
3513                 if (v->type->prepare_type)
3514                         v->type->prepare_type(c, v->type, 0);
3515                 if (b->right) {
3516                         right = interp_exec(c, b->right, &rtype);
3517                         memcpy(val, &right, rtype->size);
3518                         rtype = Tnone;
3519                 } else {
3520                         val_init(v->type, val);
3521                 }
3522                 break;
3523         }
3524
3525 ### The `use` statement
3526
3527 The `use` statement is the last "simple" statement.  It is needed when
3528 the condition in a conditional statement is a block.  `use` works much
3529 like `return` in C, but only completes the `condition`, not the whole
3530 function.
3531
3532 ###### Binode types
3533         Use,
3534
3535 ###### expr precedence
3536         $TERM use
3537
3538 ###### SimpleStatement Grammar
3539         | use Expression ${
3540                 $0 = new_pos(binode, $1);
3541                 $0->op = Use;
3542                 $0->right = $<2;
3543                 if ($0->right->type == Xvar) {
3544                         struct var *v = cast(var, $0->right);
3545                         if (v->var->type == Tnone) {
3546                                 /* Convert this to a label */
3547                                 struct value *val;
3548
3549                                 v->var->type = Tlabel;
3550                                 val = global_alloc(c, Tlabel, v->var, NULL);
3551                                 val->label = val;
3552                         }
3553                 }
3554         }$
3555
3556 ###### print binode cases
3557
3558         case Use:
3559                 do_indent(indent, "use ");
3560                 print_exec(b->right, -1, bracket);
3561                 if (indent >= 0)
3562                         printf("\n");
3563                 break;
3564
3565 ###### propagate binode cases
3566
3567         case Use:
3568                 /* result matches value */
3569                 return propagate_types(b->right, c, ok, type, 0);
3570
3571 ###### interp binode cases
3572
3573         case Use:
3574                 rv = interp_exec(c, b->right, &rvtype);
3575                 break;
3576
3577 ### The Conditional Statement
3578
3579 This is the biggy and currently the only complex statement.  This
3580 subsumes `if`, `while`, `do/while`, `switch`, and some parts of `for`.
3581 It is comprised of a number of parts, all of which are optional though
3582 set combinations apply.  Each part is (usually) a key word (`then` is
3583 sometimes optional) followed by either an expression or a code block,
3584 except the `casepart` which is a "key word and an expression" followed
3585 by a code block.  The code-block option is valid for all parts and,
3586 where an expression is also allowed, the code block can use the `use`
3587 statement to report a value.  If the code block does not report a value
3588 the effect is similar to reporting `True`.
3589
3590 The `else` and `case` parts, as well as `then` when combined with
3591 `if`, can contain a `use` statement which will apply to some
3592 containing conditional statement. `for` parts, `do` parts and `then`
3593 parts used with `for` can never contain a `use`, except in some
3594 subordinate conditional statement.
3595
3596 If there is a `forpart`, it is executed first, only once.
3597 If there is a `dopart`, then it is executed repeatedly providing
3598 always that the `condpart` or `cond`, if present, does not return a non-True
3599 value.  `condpart` can fail to return any value if it simply executes
3600 to completion.  This is treated the same as returning `True`.
3601
3602 If there is a `thenpart` it will be executed whenever the `condpart`
3603 or `cond` returns True (or does not return any value), but this will happen
3604 *after* `dopart` (when present).
3605
3606 If `elsepart` is present it will be executed at most once when the
3607 condition returns `False` or some value that isn't `True` and isn't
3608 matched by any `casepart`.  If there are any `casepart`s, they will be
3609 executed when the condition returns a matching value.
3610
3611 The particular sorts of values allowed in case parts has not yet been
3612 determined in the language design, so nothing is prohibited.
3613
3614 The various blocks in this complex statement potentially provide scope
3615 for variables as described earlier.  Each such block must include the
3616 "OpenScope" nonterminal before parsing the block, and must call
3617 `var_block_close()` when closing the block.
3618
3619 The code following "`if`", "`switch`" and "`for`" does not get its own
3620 scope, but is in a scope covering the whole statement, so names
3621 declared there cannot be redeclared elsewhere.  Similarly the
3622 condition following "`while`" is in a scope the covers the body
3623 ("`do`" part) of the loop, and which does not allow conditional scope
3624 extension.  Code following "`then`" (both looping and non-looping),
3625 "`else`" and "`case`" each get their own local scope.
3626
3627 The type requirements on the code block in a `whilepart` are quite
3628 unusal.  It is allowed to return a value of some identifiable type, in
3629 which case the loop aborts and an appropriate `casepart` is run, or it
3630 can return a Boolean, in which case the loop either continues to the
3631 `dopart` (on `True`) or aborts and runs the `elsepart` (on `False`).
3632 This is different both from the `ifpart` code block which is expected to
3633 return a Boolean, or the `switchpart` code block which is expected to
3634 return the same type as the casepart values.  The correct analysis of
3635 the type of the `whilepart` code block is the reason for the
3636 `Rboolok` flag which is passed to `propagate_types()`.
3637
3638 The `cond_statement` cannot fit into a `binode` so a new `exec` is
3639 defined.
3640
3641 ###### exec type
3642         Xcond_statement,
3643
3644 ###### ast
3645         struct casepart {
3646                 struct exec *value;
3647                 struct exec *action;
3648                 struct casepart *next;
3649         };
3650         struct cond_statement {
3651                 struct exec;
3652                 struct exec *forpart, *condpart, *dopart, *thenpart, *elsepart;
3653                 struct casepart *casepart;
3654         };
3655
3656 ###### ast functions
3657
3658         static void free_casepart(struct casepart *cp)
3659         {
3660                 while (cp) {
3661                         struct casepart *t;
3662                         free_exec(cp->value);
3663                         free_exec(cp->action);
3664                         t = cp->next;
3665                         free(cp);
3666                         cp = t;
3667                 }
3668         }
3669
3670         static void free_cond_statement(struct cond_statement *s)
3671         {
3672                 if (!s)
3673                         return;
3674                 free_exec(s->forpart);
3675                 free_exec(s->condpart);
3676                 free_exec(s->dopart);
3677                 free_exec(s->thenpart);
3678                 free_exec(s->elsepart);
3679                 free_casepart(s->casepart);
3680                 free(s);
3681         }
3682
3683 ###### free exec cases
3684         case Xcond_statement: free_cond_statement(cast(cond_statement, e)); break;
3685
3686 ###### ComplexStatement Grammar
3687         | CondStatement ${ $0 = $<1; }$
3688
3689 ###### expr precedence
3690         $TERM for then while do
3691         $TERM else
3692         $TERM switch case
3693
3694 ###### Grammar
3695
3696         $*cond_statement
3697         // A CondStatement must end with EOL, as does CondSuffix and
3698         // IfSuffix.
3699         // ForPart, ThenPart, SwitchPart, CasePart are non-empty and
3700         // may or may not end with EOL
3701         // WhilePart and IfPart include an appropriate Suffix
3702
3703
3704         // Both ForPart and Whilepart open scopes, and CondSuffix only
3705         // closes one - so in the first branch here we have another to close.
3706         CondStatement -> ForPart OptNL ThenPart OptNL WhilePart CondSuffix ${
3707                         $0 = $<CS;
3708                         $0->forpart = $<FP;
3709                         $0->thenpart = $<TP;
3710                         $0->condpart = $WP.condpart; $WP.condpart = NULL;
3711                         $0->dopart = $WP.dopart; $WP.dopart = NULL;
3712                         var_block_close(c, CloseSequential);
3713                         }$
3714                 | ForPart OptNL WhilePart CondSuffix ${
3715                         $0 = $<CS;
3716                         $0->forpart = $<FP;
3717                         $0->condpart = $WP.condpart; $WP.condpart = NULL;
3718                         $0->dopart = $WP.dopart; $WP.dopart = NULL;
3719                         var_block_close(c, CloseSequential);
3720                         }$
3721                 | WhilePart CondSuffix ${
3722                         $0 = $<CS;
3723                         $0->condpart = $WP.condpart; $WP.condpart = NULL;
3724                         $0->dopart = $WP.dopart; $WP.dopart = NULL;
3725                         }$
3726                 | SwitchPart OptNL CasePart CondSuffix ${
3727                         $0 = $<CS;
3728                         $0->condpart = $<SP;
3729                         $CP->next = $0->casepart;
3730                         $0->casepart = $<CP;
3731                         }$
3732                 | SwitchPart : IN OptNL CasePart CondSuffix OUT Newlines ${
3733                         $0 = $<CS;
3734                         $0->condpart = $<SP;
3735                         $CP->next = $0->casepart;
3736                         $0->casepart = $<CP;
3737                         }$
3738                 | IfPart IfSuffix ${
3739                         $0 = $<IS;
3740                         $0->condpart = $IP.condpart; $IP.condpart = NULL;
3741                         $0->thenpart = $IP.thenpart; $IP.thenpart = NULL;
3742                         // This is where we close an "if" statement
3743                         var_block_close(c, CloseSequential);
3744                         }$
3745
3746         CondSuffix -> IfSuffix ${
3747                         $0 = $<1;
3748                         // This is where we close scope of the whole
3749                         // "for" or "while" statement
3750                         var_block_close(c, CloseSequential);
3751                 }$
3752                 | Newlines CasePart CondSuffix ${
3753                         $0 = $<CS;
3754                         $CP->next = $0->casepart;
3755                         $0->casepart = $<CP;
3756                 }$
3757                 | CasePart CondSuffix ${
3758                         $0 = $<CS;
3759                         $CP->next = $0->casepart;
3760                         $0->casepart = $<CP;
3761                 }$
3762
3763         IfSuffix -> Newlines ${ $0 = new(cond_statement); }$
3764                 | Newlines ElsePart ${ $0 = $<EP; }$
3765                 | ElsePart ${$0 = $<EP; }$
3766
3767         ElsePart -> else OpenBlock Newlines ${
3768                         $0 = new(cond_statement);
3769                         $0->elsepart = $<OB;
3770                         var_block_close(c, CloseElse);
3771                 }$
3772                 | else OpenScope CondStatement ${
3773                         $0 = new(cond_statement);
3774                         $0->elsepart = $<CS;
3775                         var_block_close(c, CloseElse);
3776                 }$
3777
3778         $*casepart
3779         CasePart -> case Expression OpenScope ColonBlock ${
3780                         $0 = calloc(1,sizeof(struct casepart));
3781                         $0->value = $<Ex;
3782                         $0->action = $<Bl;
3783                         var_block_close(c, CloseParallel);
3784                 }$
3785
3786         $*exec
3787         // These scopes are closed in CondSuffix
3788         ForPart -> for OpenBlock ${
3789                         $0 = $<Bl;
3790                 }$
3791
3792         ThenPart -> then OpenBlock ${
3793                         $0 = $<OB;
3794                         var_block_close(c, CloseSequential);
3795                 }$
3796
3797         $cond_statement
3798         // This scope is closed in CondSuffix
3799         WhilePart -> while UseBlock OptNL do Block ${
3800                         $0.condpart = $<UB;
3801                         $0.dopart = $<Bl;
3802                 }$
3803                 | while OpenScope Expression ColonBlock ${
3804                         $0.condpart = $<Exp;
3805                         $0.dopart = $<Bl;
3806                 }$
3807
3808         IfPart -> if UseBlock OptNL then OpenBlock ClosePara ${
3809                         $0.condpart = $<UB;
3810                         $0.thenpart = $<Bl;
3811                 }$
3812                 | if OpenScope Expression OpenScope ColonBlock ClosePara ${
3813                         $0.condpart = $<Ex;
3814                         $0.thenpart = $<Bl;
3815                 }$
3816                 | if OpenScope Expression OpenScope OptNL then Block ClosePara ${
3817                         $0.condpart = $<Ex;
3818                         $0.thenpart = $<Bl;
3819                 }$
3820
3821         $*exec
3822         // This scope is closed in CondSuffix
3823         SwitchPart -> switch OpenScope Expression ${
3824                         $0 = $<Ex;
3825                 }$
3826                 | switch UseBlock ${
3827                         $0 = $<Bl;
3828                 }$
3829
3830 ###### print exec cases
3831
3832         case Xcond_statement:
3833         {
3834                 struct cond_statement *cs = cast(cond_statement, e);
3835                 struct casepart *cp;
3836                 if (cs->forpart) {
3837                         do_indent(indent, "for");
3838                         if (bracket) printf(" {\n"); else printf("\n");
3839                         print_exec(cs->forpart, indent+1, bracket);
3840                         if (cs->thenpart) {
3841                                 if (bracket)
3842                                         do_indent(indent, "} then {\n");
3843                                 else
3844                                         do_indent(indent, "then\n");
3845                                 print_exec(cs->thenpart, indent+1, bracket);
3846                         }
3847                         if (bracket) do_indent(indent, "}\n");
3848                 }
3849                 if (cs->dopart) {
3850                         // a loop
3851                         if (cs->condpart && cs->condpart->type == Xbinode &&
3852                             cast(binode, cs->condpart)->op == Block) {
3853                                 if (bracket)
3854                                         do_indent(indent, "while {\n");
3855                                 else
3856                                         do_indent(indent, "while\n");
3857                                 print_exec(cs->condpart, indent+1, bracket);
3858                                 if (bracket)
3859                                         do_indent(indent, "} do {\n");
3860                                 else
3861                                         do_indent(indent, "do\n");
3862                                 print_exec(cs->dopart, indent+1, bracket);
3863                                 if (bracket)
3864                                         do_indent(indent, "}\n");
3865                         } else {
3866                                 do_indent(indent, "while ");
3867                                 print_exec(cs->condpart, 0, bracket);
3868                                 if (bracket)
3869                                         printf(" {\n");
3870                                 else
3871                                         printf(":\n");
3872                                 print_exec(cs->dopart, indent+1, bracket);
3873                                 if (bracket)
3874                                         do_indent(indent, "}\n");
3875                         }
3876                 } else {
3877                         // a condition
3878                         if (cs->casepart)
3879                                 do_indent(indent, "switch");
3880                         else
3881                                 do_indent(indent, "if");
3882                         if (cs->condpart && cs->condpart->type == Xbinode &&
3883                             cast(binode, cs->condpart)->op == Block) {
3884                                 if (bracket)
3885                                         printf(" {\n");
3886                                 else
3887                                         printf(":\n");
3888                                 print_exec(cs->condpart, indent+1, bracket);
3889                                 if (bracket)
3890                                         do_indent(indent, "}\n");
3891                                 if (cs->thenpart) {
3892                                         do_indent(indent, "then:\n");
3893                                         print_exec(cs->thenpart, indent+1, bracket);
3894                                 }
3895                         } else {
3896                                 printf(" ");
3897                                 print_exec(cs->condpart, 0, bracket);
3898                                 if (cs->thenpart) {
3899                                         if (bracket)
3900                                                 printf(" {\n");
3901                                         else
3902                                                 printf(":\n");
3903                                         print_exec(cs->thenpart, indent+1, bracket);
3904                                         if (bracket)
3905                                                 do_indent(indent, "}\n");
3906                                 } else
3907                                         printf("\n");
3908                         }
3909                 }
3910                 for (cp = cs->casepart; cp; cp = cp->next) {
3911                         do_indent(indent, "case ");
3912                         print_exec(cp->value, -1, 0);
3913                         if (bracket)
3914                                 printf(" {\n");
3915                         else
3916                                 printf(":\n");
3917                         print_exec(cp->action, indent+1, bracket);
3918                         if (bracket)
3919                                 do_indent(indent, "}\n");
3920                 }
3921                 if (cs->elsepart) {
3922                         do_indent(indent, "else");
3923                         if (bracket)
3924                                 printf(" {\n");
3925                         else
3926                                 printf("\n");
3927                         print_exec(cs->elsepart, indent+1, bracket);
3928                         if (bracket)
3929                                 do_indent(indent, "}\n");
3930                 }
3931                 break;
3932         }
3933
3934 ###### propagate exec cases
3935         case Xcond_statement:
3936         {
3937                 // forpart and dopart must return Tnone
3938                 // thenpart must return Tnone if there is a dopart,
3939                 // otherwise it is like elsepart.
3940                 // condpart must:
3941                 //    be bool if there is no casepart
3942                 //    match casepart->values if there is a switchpart
3943                 //    either be bool or match casepart->value if there
3944                 //             is a whilepart
3945                 // elsepart and casepart->action must match the return type
3946                 //   expected of this statement.
3947                 struct cond_statement *cs = cast(cond_statement, prog);
3948                 struct casepart *cp;
3949
3950                 t = propagate_types(cs->forpart, c, ok, Tnone, 0);
3951                 if (!type_compat(Tnone, t, 0))
3952                         *ok = 0;
3953                 t = propagate_types(cs->dopart, c, ok, Tnone, 0);
3954                 if (!type_compat(Tnone, t, 0))
3955                         *ok = 0;
3956                 if (cs->dopart) {
3957                         t = propagate_types(cs->thenpart, c, ok, Tnone, 0);
3958                         if (!type_compat(Tnone, t, 0))
3959                                 *ok = 0;
3960                 }
3961                 if (cs->casepart == NULL)
3962                         propagate_types(cs->condpart, c, ok, Tbool, 0);
3963                 else {
3964                         /* Condpart must match case values, with bool permitted */
3965                         t = NULL;
3966                         for (cp = cs->casepart;
3967                              cp && !t; cp = cp->next)
3968                                 t = propagate_types(cp->value, c, ok, NULL, 0);
3969                         if (!t && cs->condpart)
3970                                 t = propagate_types(cs->condpart, c, ok, NULL, Rboolok);
3971                         // Now we have a type (I hope) push it down
3972                         if (t) {
3973                                 for (cp = cs->casepart; cp; cp = cp->next)
3974                                         propagate_types(cp->value, c, ok, t, 0);
3975                                 propagate_types(cs->condpart, c, ok, t, Rboolok);
3976                         }
3977                 }
3978                 // (if)then, else, and case parts must return expected type.
3979                 if (!cs->dopart && !type)
3980                         type = propagate_types(cs->thenpart, c, ok, NULL, rules);
3981                 if (!type)
3982                         type = propagate_types(cs->elsepart, c, ok, NULL, rules);
3983                 for (cp = cs->casepart;
3984                      cp && !type;
3985                      cp = cp->next)
3986                         type = propagate_types(cp->action, c, ok, NULL, rules);
3987                 if (type) {
3988                         if (!cs->dopart)
3989                                 propagate_types(cs->thenpart, c, ok, type, rules);
3990                         propagate_types(cs->elsepart, c, ok, type, rules);
3991                         for (cp = cs->casepart; cp ; cp = cp->next)
3992                                 propagate_types(cp->action, c, ok, type, rules);
3993                         return type;
3994                 } else
3995                         return NULL;
3996         }
3997
3998 ###### interp exec cases
3999         case Xcond_statement:
4000         {
4001                 struct value v, cnd;
4002                 struct type *vtype, *cndtype;
4003                 struct casepart *cp;
4004                 struct cond_statement *cs = cast(cond_statement, e);
4005
4006                 if (cs->forpart)
4007                         interp_exec(c, cs->forpart, NULL);
4008                 do {
4009                         if (cs->condpart)
4010                                 cnd = interp_exec(c, cs->condpart, &cndtype);
4011                         else
4012                                 cndtype = Tnone;
4013                         if (!(cndtype == Tnone ||
4014                               (cndtype == Tbool && cnd.bool != 0)))
4015                                 break;
4016                         // cnd is Tnone or Tbool, doesn't need to be freed
4017                         if (cs->dopart)
4018                                 interp_exec(c, cs->dopart, NULL);
4019
4020                         if (cs->thenpart) {
4021                                 rv = interp_exec(c, cs->thenpart, &rvtype);
4022                                 if (rvtype != Tnone || !cs->dopart)
4023                                         goto Xcond_done;
4024                                 free_value(rvtype, &rv);
4025                                 rvtype = Tnone;
4026                         }
4027                 } while (cs->dopart);
4028
4029                 for (cp = cs->casepart; cp; cp = cp->next) {
4030                         v = interp_exec(c, cp->value, &vtype);
4031                         if (value_cmp(cndtype, vtype, &v, &cnd) == 0) {
4032                                 free_value(vtype, &v);
4033                                 free_value(cndtype, &cnd);
4034                                 rv = interp_exec(c, cp->action, &rvtype);
4035                                 goto Xcond_done;
4036                         }
4037                         free_value(vtype, &v);
4038                 }
4039                 free_value(cndtype, &cnd);
4040                 if (cs->elsepart)
4041                         rv = interp_exec(c, cs->elsepart, &rvtype);
4042                 else
4043                         rvtype = Tnone;
4044         Xcond_done:
4045                 break;
4046         }
4047
4048 ### Top level structure
4049
4050 All the language elements so far can be used in various places.  Now
4051 it is time to clarify what those places are.
4052
4053 At the top level of a file there will be a number of declarations.
4054 Many of the things that can be declared haven't been described yet,
4055 such as functions, procedures, imports, and probably more.
4056 For now there are two sorts of things that can appear at the top
4057 level.  They are predefined constants, `struct` types, and the main
4058 program.  While the syntax will allow the main program to appear
4059 multiple times, that will trigger an error if it is actually attempted.
4060
4061 The various declarations do not return anything.  They store the
4062 various declarations in the parse context.
4063
4064 ###### Parser: grammar
4065
4066         $void
4067         Ocean -> OptNL DeclarationList
4068
4069         ## declare terminals
4070
4071         OptNL ->
4072                 | OptNL NEWLINE
4073         Newlines -> NEWLINE
4074                 | Newlines NEWLINE
4075
4076         DeclarationList -> Declaration
4077                 | DeclarationList Declaration
4078
4079         Declaration -> ERROR Newlines ${
4080                         tok_err(c,
4081                                 "error: unhandled parse error", &$1);
4082                 }$
4083                 | DeclareConstant
4084                 | DeclareProgram
4085                 | DeclareStruct
4086
4087         ## top level grammar
4088
4089 ### The `const` section
4090
4091 As well as being defined in with the code that uses them, constants
4092 can be declared at the top level.  These have full-file scope, so they
4093 are always `InScope`.  The value of a top level constant can be given
4094 as an expression, and this is evaluated immediately rather than in the
4095 later interpretation stage.  Once we add functions to the language, we
4096 will need rules concern which, if any, can be used to define a top
4097 level constant.
4098
4099 Constants are defined in a section that starts with the reserved word
4100 `const` and then has a block with a list of assignment statements.
4101 For syntactic consistency, these must use the double-colon syntax to
4102 make it clear that they are constants.  Type can also be given: if
4103 not, the type will be determined during analysis, as with other
4104 constants.
4105
4106 As the types constants are inserted at the head of a list, printing
4107 them in the same order that they were read is not straight forward.
4108 We take a quadratic approach here and count the number of constants
4109 (variables of depth 0), then count down from there, each time
4110 searching through for the Nth constant for decreasing N.
4111
4112 ###### top level grammar
4113
4114         $TERM const
4115
4116         DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines
4117                 | const { SimpleConstList } Newlines
4118                 | const IN OptNL ConstList OUT Newlines
4119                 | const SimpleConstList Newlines
4120
4121         ConstList -> ConstList SimpleConstLine
4122                 | SimpleConstLine
4123         SimpleConstList -> SimpleConstList ; Const
4124                 | Const
4125                 | SimpleConstList ;
4126         SimpleConstLine -> SimpleConstList Newlines
4127                 | ERROR Newlines ${ tok_err(c, "Syntax error in constant", &$1); }$
4128
4129         $*type
4130         CType -> Type   ${ $0 = $<1; }$
4131                 |       ${ $0 = NULL; }$
4132         $void
4133         Const -> IDENTIFIER :: CType = Expression ${ {
4134                 int ok;
4135                 struct variable *v;
4136
4137                 v = var_decl(c, $1.txt);
4138                 if (v) {
4139                         struct var *var = new_pos(var, $1);
4140                         v->where_decl = var;
4141                         v->where_set = var;
4142                         var->var = v;
4143                         v->constant = 1;
4144                 } else {
4145                         v = var_ref(c, $1.txt);
4146                         tok_err(c, "error: name already declared", &$1);
4147                         type_err(c, "info: this is where '%v' was first declared",
4148                                  v->where_decl, NULL, 0, NULL);
4149                 }
4150                 do {
4151                         ok = 1;
4152                         propagate_types($5, c, &ok, $3, 0);
4153                 } while (ok == 2);
4154                 if (!ok)
4155                         c->parse_error = 1;
4156                 else if (v) {
4157                         struct value res = interp_exec(c, $5, &v->type);
4158                         global_alloc(c, v->type, v, &res);
4159                 }
4160         } }$
4161
4162 ###### print const decls
4163         {
4164                 struct variable *v;
4165                 int target = -1;
4166
4167                 while (target != 0) {
4168                         int i = 0;
4169                         for (v = context.in_scope; v; v=v->in_scope)
4170                                 if (v->depth == 0) {
4171                                         i += 1;
4172                                         if (i == target)
4173                                                 break;
4174                                 }
4175
4176                         if (target == -1) {
4177                                 if (i)
4178                                         printf("const\n");
4179                                 target = i;
4180                         } else {
4181                                 struct value *val = var_value(&context, v);
4182                                 printf("    %.*s :: ", v->name->name.len, v->name->name.txt);
4183                                 type_print(v->type, stdout);
4184                                 printf(" = ");
4185                                 if (v->type == Tstr)
4186                                         printf("\"");
4187                                 print_value(v->type, val);
4188                                 if (v->type == Tstr)
4189                                         printf("\"");
4190                                 printf("\n");
4191                                 target -= 1;
4192                         }
4193                 }
4194         }
4195
4196 ### Finally the whole program.
4197
4198 Somewhat reminiscent of Pascal a (current) Ocean program starts with
4199 the keyword "program" and a list of variable names which are assigned
4200 values from command line arguments.  Following this is a `block` which
4201 is the code to execute.  Unlike Pascal, constants and other
4202 declarations come *before* the program.
4203
4204 As this is the top level, several things are handled a bit
4205 differently.
4206 The whole program is not interpreted by `interp_exec` as that isn't
4207 passed the argument list which the program requires.  Similarly type
4208 analysis is a bit more interesting at this level.
4209
4210 ###### Binode types
4211         Program,
4212
4213 ###### top level grammar
4214
4215         DeclareProgram -> Program ${ {
4216                 if (c->prog)
4217                         type_err(c, "Program defined a second time",
4218                                  $1, NULL, 0, NULL);
4219                 else
4220                         c->prog = $<1;
4221         } }$
4222
4223         $TERM program
4224
4225         $*binode
4226         Program -> program OpenScope Varlist ColonBlock Newlines ${
4227                 $0 = new(binode);
4228                 $0->op = Program;
4229                 $0->left = reorder_bilist($<Vl);
4230                 $0->right = $<Bl;
4231                 var_block_close(c, CloseSequential);
4232                 if (c->scope_stack && !c->parse_error) abort();
4233                 }$
4234
4235         Varlist -> Varlist ArgDecl ${
4236                         $0 = new(binode);
4237                         $0->op = Program;
4238                         $0->left = $<1;
4239                         $0->right = $<2;
4240                 }$
4241                 | ${ $0 = NULL; }$
4242
4243         $*var
4244         ArgDecl -> IDENTIFIER ${ {
4245                 struct variable *v = var_decl(c, $1.txt);
4246                 $0 = new(var);
4247                 $0->var = v;
4248         } }$
4249
4250         ## Grammar
4251
4252 ###### print binode cases
4253         case Program:
4254                 do_indent(indent, "program");
4255                 for (b2 = cast(binode, b->left); b2; b2 = cast(binode, b2->right)) {
4256                         printf(" ");
4257                         print_exec(b2->left, 0, 0);
4258                 }
4259                 if (bracket)
4260                         printf(" {\n");
4261                 else
4262                         printf(":\n");
4263                 print_exec(b->right, indent+1, bracket);
4264                 if (bracket)
4265                         do_indent(indent, "}\n");
4266                 break;
4267
4268 ###### propagate binode cases
4269         case Program: abort();          // NOTEST
4270
4271 ###### core functions
4272
4273         static int analyse_prog(struct exec *prog, struct parse_context *c)
4274         {
4275                 struct binode *bp = cast(binode, prog);
4276                 struct binode *b;
4277                 int ok = 1;
4278                 int arg = 0;
4279                 struct type *argv_type;
4280                 struct text argv_type_name = { " argv", 5 };
4281
4282                 if (!bp)
4283                         return 0;       // NOTEST
4284
4285                 argv_type = add_type(c, argv_type_name, &array_prototype);
4286                 argv_type->array.member = Tstr;
4287
4288                 for (b = cast(binode, bp->left); b; b = cast(binode, b->right)) {
4289                         struct var *v;
4290                         ok = 1;
4291                         switch (arg++) {
4292                         case 0: /* argc */
4293                                 v = cast(var, b->left);
4294                                 argv_type->array.vsize = v->var;
4295                                 propagate_types(b->left, c, &ok, Tnum, 0);
4296                                 break;
4297                         case 1: /* argv */
4298                                 propagate_types(b->left, c, &ok, argv_type, 0);
4299                                 break;
4300                         default: /* invalid */
4301                                 propagate_types(b->left, c, &ok, Tnone, 0);
4302                         }
4303                 }
4304
4305                 do {
4306                         ok = 1;
4307                         propagate_types(bp->right, c, &ok, Tnone, 0);
4308                 } while (ok == 2);
4309                 if (!ok)
4310                         return 0;
4311
4312                 /* Make sure everything is still consistent */
4313                 propagate_types(bp->right, c, &ok, Tnone, 0);
4314                 if (!ok)
4315                         return 0;
4316                 scope_finalize(c);
4317                 return 1;
4318         }
4319
4320         static void interp_prog(struct parse_context *c, struct exec *prog,
4321                                 int argc, char **argv)
4322         {
4323                 struct binode *p = cast(binode, prog);
4324                 struct binode *al;
4325                 int anum = 0;
4326                 struct value v;
4327                 struct type *vtype;
4328
4329                 if (!prog)
4330                         return;         // NOTEST
4331                 al = cast(binode, p->left);
4332                 while (al) {
4333                         struct var *v = cast(var, al->left);
4334                         struct value *vl = var_value(c, v->var);
4335                         struct value arg;
4336                         struct type *t;
4337                         mpq_t argcq;
4338                         int i;
4339
4340                         switch (anum++) {
4341                         case 0: /* argc */
4342                                 if (v->var->type == Tnum) {
4343                                         mpq_init(argcq);
4344                                         mpq_set_ui(argcq, argc, 1);
4345                                         memcpy(vl, &argcq, sizeof(argcq));
4346                                 }
4347                                 break;
4348                         case 1: /* argv */
4349                                 t = v->var->type;
4350                                 t->prepare_type(c, t, 0);
4351                                 array_init(v->var->type, vl);
4352                                 for (i = 0; i < argc; i++) {
4353                                         struct value *vl2 = vl->array + i * v->var->type->array.member->size;
4354
4355
4356                                         arg.str.txt = argv[i];
4357                                         arg.str.len = strlen(argv[i]);
4358                                         free_value(Tstr, vl2);
4359                                         dup_value(Tstr, &arg, vl2);
4360                                 }
4361                                 break;
4362                         }
4363                         al = cast(binode, al->right);
4364                 }
4365                 v = interp_exec(c, p->right, &vtype);
4366                 free_value(vtype, &v);
4367         }
4368
4369 ###### interp binode cases
4370         case Program: abort();  // NOTEST
4371
4372 ## And now to test it out.
4373
4374 Having a language requires having a "hello world" program.  I'll
4375 provide a little more than that: a program that prints "Hello world"
4376 finds the GCD of two numbers, prints the first few elements of
4377 Fibonacci, performs a binary search for a number, and a few other
4378 things which will likely grow as the languages grows.
4379
4380 ###### File: oceani.mk
4381         demos :: sayhello
4382         sayhello : oceani
4383                 @echo "===== DEMO ====="
4384                 ./oceani --section "demo: hello" oceani.mdc 55 33
4385
4386 ###### demo: hello
4387
4388         const
4389                 pi ::= 3.141_592_6
4390                 four ::= 2 + 2 ; five ::= 10/2
4391         const pie ::= "I like Pie";
4392                 cake ::= "The cake is"
4393                   ++ " a lie"
4394
4395         struct fred
4396                 size:[four]number
4397                 name:string
4398                 alive:Boolean
4399
4400         program argc argv:
4401                 print "Hello World, what lovely oceans you have!"
4402                 print "Are there", five, "?"
4403                 print pi, pie, "but", cake
4404
4405                 A := $argv[1]; B := $argv[2]
4406
4407                 /* When a variable is defined in both branches of an 'if',
4408                  * and used afterwards, the variables are merged.
4409                  */
4410                 if A > B:
4411                         bigger := "yes"
4412                 else
4413                         bigger := "no"
4414                 print "Is", A, "bigger than", B,"? ", bigger
4415                 /* If a variable is not used after the 'if', no
4416                  * merge happens, so types can be different
4417                  */
4418                 if A > B * 2:
4419                         double:string = "yes"
4420                         print A, "is more than twice", B, "?", double
4421                 else
4422                         double := B*2
4423                         print "double", B, "is", double
4424
4425                 a : number
4426                 a = A;
4427                 b:number = B
4428                 if a > 0 and then b > 0:
4429                         while a != b:
4430                                 if a < b:
4431                                         b = b - a
4432                                 else
4433                                         a = a - b
4434                         print "GCD of", A, "and", B,"is", a
4435                 else if a <= 0:
4436                         print a, "is not positive, cannot calculate GCD"
4437                 else
4438                         print b, "is not positive, cannot calculate GCD"
4439
4440                 for
4441                         togo := 10
4442                         f1 := 1; f2 := 1
4443                         print "Fibonacci:", f1,f2,
4444                 then togo = togo - 1
4445                 while togo > 0:
4446                         f3 := f1 + f2
4447                         print "", f3,
4448                         f1 = f2
4449                         f2 = f3
4450                 print ""
4451
4452                 /* Binary search... */
4453                 for
4454                         lo:= 0; hi := 100
4455                         target := 77
4456                 while
4457                         mid := (lo + hi) / 2
4458                         if mid == target:
4459                                 use Found
4460                         if mid < target:
4461                                 lo = mid
4462                         else
4463                                 hi = mid
4464                         if hi - lo < 1:
4465                                 use GiveUp
4466                         use True
4467                 do pass
4468                 case Found:
4469                         print "Yay, I found", target
4470                 case GiveUp:
4471                         print "Closest I found was", mid
4472
4473                 size::= 10
4474                 list:[size]number
4475                 list[0] = 1234
4476                 // "middle square" PRNG.  Not particularly good, but one my
4477                 // Dad taught me - the first one I ever heard of.
4478                 for i:=1; then i = i + 1; while i < size:
4479                         n := list[i-1] * list[i-1]
4480                         list[i] = (n / 100) % 10 000
4481
4482                 print "Before sort:",
4483                 for i:=0; then i = i + 1; while i < size:
4484                         print "", list[i],
4485                 print
4486
4487                 for i := 1; then i=i+1; while i < size:
4488                         for j:=i-1; then j=j-1; while j >= 0:
4489                                 if list[j] > list[j+1]:
4490                                         t:= list[j]
4491                                         list[j] = list[j+1]
4492                                         list[j+1] = t
4493                 print " After sort:",
4494                 for i:=0; then i = i + 1; while i < size:
4495                         print "", list[i],
4496                 print
4497
4498                 if 1 == 2 then print "yes"; else print "no"
4499
4500                 bob:fred
4501                 bob.name = "Hello"
4502                 bob.alive = (bob.name == "Hello")
4503                 print "bob", "is" if  bob.alive else "isn't", "alive"