parsergen: don't assume TK_newline in look-ahead for lineline

[ocean] / csrc / parsergen.mdc
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 5d5ea1ebc352b13eacfd809a11416ce1ba2e5e18..679fab6996de66a9fe267d914c654812f74771f3 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -17,7 +17,9 @@ There are several distinct sections.
  5. `parser_run` is a library function intended to be linked together
     with the generated parser tables to complete the implementation of
     a parser.
  5. `parser_run` is a library function intended to be linked together
     with the generated parser tables to complete the implementation of
     a parser.
-6. `calc.cgm` is a test grammar for a simple calculator.
+6. Finally `calc` is a test grammar for a simple calculator.  The
+   `parsergen` program built from the C code in this file can extract
+   that grammar directly from this file and process it.
  
  ###### File: parsergen.c
         #include <unistd.h>
  
  ###### File: parsergen.c
         #include <unistd.h>
@@ -40,13 +42,12 @@ There are several distinct sections.
         #include <stdlib.h>
         #include <stdio.h>
         ## parser includes
         #include <stdlib.h>
         #include <stdio.h>
         ## parser includes
-       ## parser
-###### File: calc.cgm
-       ## demo grammar
+       ## parser functions
+       ## parser_run
  ###### File: parsergen.mk
         CFLAGS += -Wall -g
         all :: parsergen calc
  ###### File: parsergen.mk
         CFLAGS += -Wall -g
         all :: parsergen calc
-       parsergen.c parsergen.mk calc.cgm libparser.c parser.h : parsergen.mdc
+       parsergen.c parsergen.mk libparser.c parser.h : parsergen.mdc
                 ./md2c parsergen.mdc
  
  ## Reading the grammar
                 ./md2c parsergen.mdc
  
  ## Reading the grammar
@@ -57,6 +58,13 @@ sections: `header`, `code`, and `grammar`.  The first two will be
  literally copied into the generated `.c` and `.h`. files.  The last
  contains the grammar.  This is tokenised with "[scanner][]".
  
  literally copied into the generated `.c` and `.h`. files.  The last
  contains the grammar.  This is tokenised with "[scanner][]".
  
+If the `--tag` option is given, then any top level header that doesn't
+start with the tag is ignored, and the tag is striped from the rest.  So
+`--tag Foo`
+means that the three needed sections must be `Foo: header`, `Foo: code`,
+and `Foo: grammar`.  The tag `calc` is used to extract the same calculator
+from this file.
+
  [mdcode]: mdcode.html
  [scanner]: scanner.html
  
  [mdcode]: mdcode.html
  [scanner]: scanner.html
  
@@ -73,9 +81,10 @@ declarations, and data type declarations.  These are all parsed with
  _ad hoc_ parsing as we don't have a parser generator yet.
  
  The precedence and associativity can be set for each production, but
  _ad hoc_ parsing as we don't have a parser generator yet.
  
  The precedence and associativity can be set for each production, but
-can be inherited from symbols.  The data type is potentially defined
-for each non-terminal and describes what C structure is used to store
-information about each symbol.
+can be inherited from symbols.  The data type (either structure or a
+reference to a structure) is potentially defined for each non-terminal
+and describes what C structure is used to store information about each
+symbol.
  
  ###### declarations
         enum assoc {Left, Right, Non};
  
  ###### declarations
         enum assoc {Left, Right, Non};
@@ -83,6 +92,7 @@ information about each symbol.
  
         struct symbol {
                 struct text struct_name;
  
         struct symbol {
                 struct text struct_name;
+               int isref;
                 enum assoc assoc;
                 unsigned short precedence;
                 ## symbol fields
                 enum assoc assoc;
                 unsigned short precedence;
                 ## symbol fields
@@ -90,6 +100,7 @@ information about each symbol.
         struct production {
                 unsigned short precedence;
                 enum assoc assoc;
         struct production {
                 unsigned short precedence;
                 enum assoc assoc;
+               char line_like;
                 ## production fields
         };
         struct grammar {
                 ## production fields
         };
         struct grammar {
@@ -97,11 +108,14 @@ information about each symbol.
         };
  
  The strings reported by `mdcode` and `scanner` are `struct text` which have
         };
  
  The strings reported by `mdcode` and `scanner` are `struct text` which have
-length rather than being null terminated.  To help with printing an
+length rather than being null terminated.  To help with printing and
  comparing we define `text_is` and `prtxt`, which should possibly go in
  `mdcode`.  `scanner` does provide `text_dump` which is useful for strings
  which might contain control characters.
  
  comparing we define `text_is` and `prtxt`, which should possibly go in
  `mdcode`.  `scanner` does provide `text_dump` which is useful for strings
  which might contain control characters.
  
+`strip_tag` is a bit like `strncmp`, but adds a test for a colon,
+because that is what we need to detect tags.
+
  ###### functions
         static int text_is(struct text t, char *s)
         {
  ###### functions
         static int text_is(struct text t, char *s)
         {
@@ -113,6 +127,20 @@ which might contain control characters.
                 printf("%.*s", t.len, t.txt);
         }
  
                 printf("%.*s", t.len, t.txt);
         }
  
+       static int strip_tag(struct text *t, char *tag)
+       {
+               int skip = strlen(tag) + 1;
+               if (skip >= t->len ||
+                   strncmp(t->txt, tag, skip-1) != 0 ||
+                   t->txt[skip-1] != ':')
+                       return 0;
+               while (skip < t->len && t->txt[skip] == ' ')
+                       skip++;
+               t->len -= skip;
+               t->txt += skip;
+               return 1;
+       }
+
  ### Symbols
  
  Productions are comprised primarily of symbols - terminal and
  ### Symbols
  
  Productions are comprised primarily of symbols - terminal and
@@ -142,8 +170,8 @@ different token types that `scanner` can report.
                 [TK_mark]         = "MARK",
                 [TK_string]       = "STRING",
                 [TK_multi_string] = "MULTI_STRING",
                 [TK_mark]         = "MARK",
                 [TK_string]       = "STRING",
                 [TK_multi_string] = "MULTI_STRING",
-               [TK_indent]       = "INDENT",
-               [TK_undent]       = "UNDENT",
+               [TK_in]           = "IN",
+               [TK_out]          = "OUT",
                 [TK_newline]      = "NEWLINE",
                 [TK_eof]          = "$eof",
         };
                 [TK_newline]      = "NEWLINE",
                 [TK_eof]          = "$eof",
         };
@@ -174,18 +202,6 @@ symbol, but its type might be `Unknown`.
         int num_syms;
  
  ###### functions
         int num_syms;
  
  ###### functions
-       static int text_cmp(struct text a, struct text b)
-       {
-               int len = a.len;
-               if (a.len > b.len)
-                       len = b.len;
-               int cmp = strncmp(a.txt, b.txt, len);
-               if (cmp)
-                       return cmp;
-               else
-                       return a.len - b.len;
-       }
-
         static struct symbol *sym_find(struct grammar *g, struct text s)
         {
                 struct symbol **l = &g->syms;
         static struct symbol *sym_find(struct grammar *g, struct text s)
         {
                 struct symbol **l = &g->syms;
@@ -231,8 +247,17 @@ word is `LEFT`, `RIGHT` or `NON`, then the line specifies a
  precedence, otherwise it specifies a data type.
  
  The data type name is simply stored and applied to the head of all
  precedence, otherwise it specifies a data type.
  
  The data type name is simply stored and applied to the head of all
-subsequent productions.  It must be the name of a structure, so `$expr`
-maps to `struct expr`.
+subsequent productions.  It must be the name of a structure optionally
+preceded by an asterisk which means a reference or "pointer".  So
+`$expression` maps to `struct expression` and `$*statement` maps to
+`struct statement *`.
+
+Any productions given before the first data type declaration will have
+no data type associated with them and can carry no information.  In
+order to allow other non-terminals to have no type, the data type
+`$void` can be given.  This does *not* mean that `struct void` will be
+used, but rather than no type will be associated with future
+non-terminals.
  
  The precedence line must contain a list of symbols - typically
  terminal symbols, but not necessarily.  It can only contain symbols
  
  The precedence line must contain a list of symbols - typically
  terminal symbols, but not necessarily.  It can only contain symbols
@@ -253,8 +278,12 @@ declares how it associates.  This level is stored in each symbol
  listed and may be inherited by any production which uses the symbol.  A
  production inherits from the last symbol which has a precedence.
  
  listed and may be inherited by any production which uses the symbol.  A
  production inherits from the last symbol which has a precedence.
  
+The symbols on the first precedence line have the lowest precedence.
+Subsequent lines introduce symbols with higher precedence.
+
  ###### grammar fields
         struct text current_type;
  ###### grammar fields
         struct text current_type;
+       int type_isref;
         int prec_levels;
  
  ###### declarations
         int prec_levels;
  
  ###### declarations
@@ -262,7 +291,7 @@ production inherits from the last symbol which has a precedence.
         static const char *known[] = { "$$", "${", "}$" };
  
  ###### functions
         static const char *known[] = { "$$", "${", "}$" };
  
  ###### functions
-       static char *dollar_line(struct token_state *ts, struct grammar *g)
+       static char *dollar_line(struct token_state *ts, struct grammar *g, int isref)
         {
                 struct token t = token_next(ts);
                 char *err;
         {
                 struct token t = token_next(ts);
                 char *err;
@@ -281,6 +310,9 @@ production inherits from the last symbol which has a precedence.
                         assoc = Non;
                 else {
                         g->current_type = t.txt;
                         assoc = Non;
                 else {
                         g->current_type = t.txt;
+                       g->type_isref = isref;
+                       if (text_is(t.txt, "void"))
+                               g->current_type.txt = NULL;
                         t = token_next(ts);
                         if (t.num != TK_newline) {
                                 err = "Extra tokens after type name";
                         t = token_next(ts);
                         if (t.num != TK_newline) {
                                 err = "Extra tokens after type name";
@@ -289,6 +321,11 @@ production inherits from the last symbol which has a precedence.
                         return NULL;
                 }
  
                         return NULL;
                 }
  
+               if (isref) {
+                       err = "$* cannot be followed by a precedence";
+                       goto abort;
+               }
+
                 // This is a precedence line, need some symbols.
                 found = 0;
                 g->prec_levels += 1;
                 // This is a precedence line, need some symbols.
                 found = 0;
                 g->prec_levels += 1;
@@ -317,6 +354,7 @@ production inherits from the last symbol which has a precedence.
                         s->precedence = g->prec_levels;
                         s->assoc = assoc;
                         found += 1;
                         s->precedence = g->prec_levels;
                         s->assoc = assoc;
                         found += 1;
+                       t = token_next(ts);
                 }
                 if (found == 0)
                         err = "No symbols given on precedence line";
                 }
                 if (found == 0)
                         err = "No symbols given on precedence line";
@@ -343,7 +381,7 @@ precedence of the production is that for the virtual symbol.  If none
  is given, the precedence is inherited from the last symbol in the
  production which has a precedence specified.
  
  is given, the precedence is inherited from the last symbol in the
  production which has a precedence specified.
  
-After the optional precedence may come  the `${` mark.  This indicates
+After the optional precedence may come the `${` mark.  This indicates
  the start of a code fragment.  If present, this must be on the same
  line as the start of the production.
  
  the start of a code fragment.  If present, this must be on the same
  line as the start of the production.
  
@@ -352,12 +390,15 @@ collected and forms the code-fragment for the production.  It must all
  be in one `code_node` of the literate code.  The `}$` must be
  at the end of a line.
  
  be in one `code_node` of the literate code.  The `}$` must be
  at the end of a line.
  
-Text in the code fragment will undergo substitutions where `$N` for
-some numeric `N` will be replaced with a variable holding the parse
-information for the particular symbol in the production.  `$0` is the
-head of the production, `$1` is the first symbol of the body, etc.
-The type of `$N` for a terminal symbol is `struct token`.  For
-non-terminal, it is whatever has been declared for that symbol.
+Text in the code fragment will undergo substitutions where `$N` or
+`$<N`,for some numeric `N`, will be replaced with a variable holding
+the parse information for the particular symbol in the production.
+`$0` is the head of the production, `$1` is the first symbol of the
+body, etc.  The type of `$N` for a terminal symbol is `struct token`.
+For a non-terminal, it is whatever has been declared for that symbol.
+The `<` may be included for symbols declared as storing a reference
+(not a structure) and means that the reference is being moved out, so
+it will not automatically be freed.
  
  While building productions we will need to add to an array which needs to
  grow dynamically.
  
  While building productions we will need to add to an array which needs to
  grow dynamically.
@@ -401,7 +442,7 @@ the end.
                 return code;
         }
  
                 return code;
         }
  
-Now we have all the bit we need to parse a full production.
+Now we have all the bits we need to parse a full production.
  
  ###### includes
         #include <memory.h>
  
  ###### includes
         #include <memory.h>
@@ -415,6 +456,7 @@ Now we have all the bit we need to parse a full production.
         struct symbol **body;
         int             body_size;
         struct text     code;
         struct symbol **body;
         int             body_size;
         struct text     code;
+       int             code_line;
  
  ###### symbol fields
         int first_production;
  
  ###### symbol fields
         int first_production;
@@ -455,15 +497,19 @@ Now we have all the bit we need to parse a full production.
                                 goto abort;
                         }
                         vs = sym_find(g, tk.txt);
                                 goto abort;
                         }
                         vs = sym_find(g, tk.txt);
-                       if (vs->type != Virtual) {
-                               err = "symbol after $$ must be virtual";
+                       if (vs->num == TK_newline)
+                               p.line_like = 1;
+                       else if (vs->precedence == 0) {
+                               err = "symbol after $$ must have precedence";
                                 goto abort;
                                 goto abort;
+                       } else {
+                               p.precedence = vs->precedence;
+                               p.assoc = vs->assoc;
                         }
                         }
-                       p.precedence = vs->precedence;
-                       p.assoc = vs->assoc;
                         tk = token_next(state);
                 }
                 if (tk.num == TK_open) {
                         tk = token_next(state);
                 }
                 if (tk.num == TK_open) {
+                       p.code_line = tk.line;
                         p.code = collect_code(state, tk);
                         if (p.code.txt == NULL) {
                                 err = "code fragment not closed properly";
                         p.code = collect_code(state, tk);
                         if (p.code.txt == NULL) {
                                 err = "code fragment not closed properly";
@@ -489,32 +535,38 @@ With the ability to parse production and dollar-lines, we have nearly all
  that we need to parse a grammar from a `code_node`.
  
  The head of the first production will effectively be the `start` symbol of
  that we need to parse a grammar from a `code_node`.
  
  The head of the first production will effectively be the `start` symbol of
-the grammar.  However it wont _actually_ be so.  Processing the grammar is
+the grammar.  However it won't _actually_ be so.  Processing the grammar is
  greatly simplified if the real start symbol only has a single production,
  greatly simplified if the real start symbol only has a single production,
-and expect `$eof` as the final terminal.  So when we find the first explicit
-production we insert an extra production as production zero which looks like
+and expects `$eof` as the final terminal.  So when we find the first
+explicit production we insert an extra production as production zero which
+looks like
  
  ###### Example: production 0
         $start -> START $eof
  
  
  ###### Example: production 0
         $start -> START $eof
  
-where `START` is the first non-terminal give.
+where `START` is the first non-terminal given.
  
  ###### create production zero
         struct production *p = calloc(1,sizeof(*p));
         struct text start = {"$start",6};
         struct text eof = {"$eof",4};
  
  ###### create production zero
         struct production *p = calloc(1,sizeof(*p));
         struct text start = {"$start",6};
         struct text eof = {"$eof",4};
+       struct text code = {"$0 = $<1;", 9};
         p->head = sym_find(g, start);
         p->head->type = Nonterminal;
         p->head = sym_find(g, start);
         p->head->type = Nonterminal;
+       p->head->struct_name = g->current_type;
+       p->head->isref = g->type_isref;
+       if (g->current_type.txt)
+               p->code = code;
         array_add(&p->body, &p->body_size, head);
         array_add(&p->body, &p->body_size, sym_find(g, eof));
         array_add(&p->body, &p->body_size, head);
         array_add(&p->body, &p->body_size, sym_find(g, eof));
-       g->start  = p->head->num;
         p->head->first_production = g->production_count;
         array_add(&g->productions, &g->production_count, p);
  
         p->head->first_production = g->production_count;
         array_add(&g->productions, &g->production_count, p);
  
-Now we are ready to read in the grammar.
-
-###### grammar fields
-       int start;      // the 'start' symbol.
+Now we are ready to read in the grammar.  We ignore comments
+and strings so that the marks which introduce them can be
+used as terminals in the grammar.  We don't ignore numbers
+even though we don't allow them as that causes the scanner
+to produce errors that the parser is better positioned to handle.
  
  ###### grammar_read
         static struct grammar *grammar_read(struct code_node *code)
  
  ###### grammar_read
         static struct grammar *grammar_read(struct code_node *code)
@@ -530,12 +582,12 @@ Now we are ready to read in the grammar.
                                  | (0 << TK_number)
                                  | (1 << TK_string)
                                  | (1 << TK_multi_string)
                                  | (0 << TK_number)
                                  | (1 << TK_string)
                                  | (1 << TK_multi_string)
-                                | (1 << TK_indent)
-                                | (1 << TK_undent),
+                                | (1 << TK_in)
+                                | (1 << TK_out),
                 };
  
                 struct token_state *state = token_open(code, &conf);
                 };
  
                 struct token_state *state = token_open(code, &conf);
-               struct token tk = token_next(state);
+               struct token tk;
                 struct symbol *head = NULL;
                 struct grammar *g;
                 char *err = NULL;
                 struct symbol *head = NULL;
                 struct grammar *g;
                 char *err = NULL;
@@ -557,7 +609,8 @@ Now we are ready to read in the grammar.
                                 else {
                                         head->type = Nonterminal;
                                         head->struct_name = g->current_type;
                                 else {
                                         head->type = Nonterminal;
                                         head->struct_name = g->current_type;
-                                       if (g->start == 0) {
+                                       head->isref = g->type_isref;
+                                       if (g->production_count == 0) {
                                                 ## create production zero
                                         }
                                         head->first_production = g->production_count;
                                                 ## create production zero
                                         }
                                         head->first_production = g->production_count;
@@ -576,8 +629,11 @@ Now we are ready to read in the grammar.
                                 else
                                         err = "First production must have a head";
                         } else if (tk.num == TK_mark
                                 else
                                         err = "First production must have a head";
                         } else if (tk.num == TK_mark
-                                  &&  text_is(tk.txt, "$")) {
-                               err = dollar_line(state, g);
+                                  && text_is(tk.txt, "$")) {
+                               err = dollar_line(state, g, 0);
+                       } else if (tk.num == TK_mark
+                                  && text_is(tk.txt, "$*")) {
+                               err = dollar_line(state, g, 1);
                         } else {
                                 err = "Unrecognised token at start of line.";
                         }
                         } else {
                                 err = "Unrecognised token at start of line.";
                         }
@@ -636,7 +692,7 @@ allocated space as it can be derived directly from the current `cnt` using
  `((cnt - 1) | 7) + 1`.
  
  ###### functions
  `((cnt - 1) | 7) + 1`.
  
  ###### functions
-       static void symset_add(struct symset *s, int key, int val)
+       static void symset_add(struct symset *s, unsigned short key, unsigned short val)
         {
                 int i;
                 int current = ((s->cnt-1) | 7) + 1;
         {
                 int i;
                 int current = ((s->cnt-1) | 7) + 1;
@@ -663,7 +719,7 @@ Finding a symbol (or item) in a `symset` uses a simple binary search.
  We return the index where the value was found (so data can be accessed),
  or `-1` to indicate failure.
  
  We return the index where the value was found (so data can be accessed),
  or `-1` to indicate failure.
  
-       static int symset_find(struct symset *ss, int key)
+       static int symset_find(struct symset *ss, unsigned short key)
         {
                 int lo = 0;
                 int hi = ss->cnt;
         {
                 int lo = 0;
                 int hi = ss->cnt;
@@ -683,10 +739,10 @@ or `-1` to indicate failure.
         }
  
  We will often want to form the union of two symsets.  When we do, we
         }
  
  We will often want to form the union of two symsets.  When we do, we
-will often want to know if anything changed (as they might mean there
+will often want to know if anything changed (as that might mean there
  is more work to do).  So `symset_union` reports whether anything was
  added to the first set.  We use a slow quadratic approach as these
  is more work to do).  So `symset_union` reports whether anything was
  added to the first set.  We use a slow quadratic approach as these
-sets don't really get very big.  If profiles shows this to be a problem is
+sets don't really get very big.  If profiles shows this to be a problem it
  can be optimised later.
  
         static int symset_union(struct symset *a, struct symset *b)
  can be optimised later.
  
         static int symset_union(struct symset *a, struct symset *b)
@@ -695,7 +751,7 @@ can be optimised later.
                 int added = 0;
                 for (i = 0; i < b->cnt; i++)
                         if (symset_find(a, b->syms[i]) < 0) {
                 int added = 0;
                 for (i = 0; i < b->cnt; i++)
                         if (symset_find(a, b->syms[i]) < 0) {
-                               int data = 0;
+                               unsigned short data = 0;
                                 if (b->data != NO_DATA)
                                         data = b->data[i];
                                 symset_add(a, b->syms[i], data);
                                 if (b->data != NO_DATA)
                                         data = b->data[i];
                                 symset_add(a, b->syms[i], data);
@@ -789,7 +845,6 @@ array like the productions.
                 return sl->ss;
         }
  
                 return sl->ss;
         }
  
-
  ### Setting `nullable`
  
  We set `nullable` on the head symbol for any production for which all
  ### Setting `nullable`
  
  We set `nullable` on the head symbol for any production for which all
@@ -827,10 +882,58 @@ changes happen.
                 }
         }
  
                 }
         }
  
+### Setting `line_like`
+
+In order to be able to ignore newline tokens when not relevant, but
+still include them in the parse when needed, we will need to know
+which states can start a "line-like" section of code.  We ignore
+newlines when there is an indent since the most recent start of a
+line-like symbol.
+
+A "line_like" symbol is simply any symbol that can derive a NEWLINE.
+If a symbol cannot derive a NEWLINE, then it is only part of a line -
+so is word-like.  If it can derive a NEWLINE, then we consider it to
+be like a line.
+
+Clearly the `TK_newline` token can derive a NEWLINE.  Any symbol which
+is the head of a production that contains a line_like symbol is also a
+line-like symbol.  We use a new field `line_like` to record this
+attribute of symbols, and compute it in a repetitive manner similar to
+`set_nullable`.
+
+###### symbol fields
+       int line_like;
+
+###### functions
+       static void set_line_like(struct grammar *g)
+       {
+               int check_again = 1;
+               g->symtab[TK_newline]->line_like = 1;
+               while (check_again) {
+                       int p;
+                       check_again = 0;
+                       for (p = 0; p < g->production_count; p++) {
+                               struct production *pr = g->productions[p];
+                               int s;
+
+                               if (pr->head->line_like)
+                                       continue;
+
+                               for (s = 0 ; s < pr->body_size; s++) {
+                                       if (pr->body[s]->line_like) {
+                                               pr->head->line_like = 1;
+                                               check_again = 1;
+                                               break;
+                                       }
+                               }
+                       }
+               }
+       }
+
  ### Building the `first` sets
  
  When calculating what can follow a particular non-terminal, we will need to
  ### Building the `first` sets
  
  When calculating what can follow a particular non-terminal, we will need to
-know what the "first" terminal in an subsequent non-terminal might be.  So
+know what the "first" terminal in any subsequent non-terminal might be.  So
  we calculate the `first` set for every non-terminal and store them in an
  array.  We don't bother recording the "first" set for terminals as they are
  trivial.
  we calculate the `first` set for every non-terminal and store them in an
  array.  We don't bother recording the "first" set for terminals as they are
  trivial.
@@ -840,7 +943,7 @@ we repeat the calculations until no changes happen, just like with
  `set_nullable`.  This makes use of the fact that `symset_union`
  reports if any change happens.
  
  `set_nullable`.  This makes use of the fact that `symset_union`
  reports if any change happens.
  
-The core of this which finds the "first" of part of a production body
+The core of this, which finds the "first" of part of a production body,
  will be reused for computing the "follow" sets, so we split it out
  into a separate function.
  
  will be reused for computing the "follow" sets, so we split it out
  into a separate function.
  
@@ -975,7 +1078,7 @@ building the itemsets and states for the LR grammar.  They are:
  1. LR(0) or SLR(1), where no look-ahead is considered.
  2. LALR(1) where we build look-ahead sets with each item and merge
     the LA sets when we find two paths to the same "kernel" set of items.
  1. LR(0) or SLR(1), where no look-ahead is considered.
  2. LALR(1) where we build look-ahead sets with each item and merge
     the LA sets when we find two paths to the same "kernel" set of items.
-3. LR(1) where different look-ahead for any item in the code means
+3. LR(1) where different look-ahead for any item in the set means
     a different state must be created.
  
  ###### forward declarations
     a different state must be created.
  
  ###### forward declarations
@@ -997,13 +1100,14 @@ as we want to do the lookup after generating the "kernel" of an
  itemset, so we need to ignore the offset=zero items which are added during
  completion.
  
  itemset, so we need to ignore the offset=zero items which are added during
  completion.
  
-To facilitate this, we modify the "DOT" number so that "0" sorts to the end of
-the list in the symset, and then only compare items before the first "0".
+To facilitate this, we modify the "DOT" number so that "0" sorts to
+the end of the list in the symset, and then only compare items before
+the first "0".
  
  ###### declarations
         static inline unsigned short item_num(int production, int index)
         {
  
  ###### declarations
         static inline unsigned short item_num(int production, int index)
         {
-               return production | (((index-1)&0x1f) << 11);
+               return production | ((31-index) << 11);
         }
         static inline int item_prod(unsigned short item)
         {
         }
         static inline int item_prod(unsigned short item)
         {
@@ -1011,7 +1115,7 @@ the list in the symset, and then only compare items before the first "0".
         }
         static inline int item_index(unsigned short item)
         {
         }
         static inline int item_index(unsigned short item)
         {
-               return ((item >> 11)+1) & 0x1f;
+               return (31-(item >> 11)) & 0x1f;
         }
  
  For LR(1) analysis we need to compare not just the itemset in a state
         }
  
  For LR(1) analysis we need to compare not just the itemset in a state
@@ -1055,6 +1159,26 @@ can just compare the symset and the data values together.
                         a.data[i] - b.data[i];
         }
  
                         a.data[i] - b.data[i];
         }
  
+It will be helpful to know if an itemset has been "completed" or not,
+particularly for LALR where itemsets get merged, at which point they
+need to be consider for completion again.  So  a `completed` flag is needed.
+
+For correct handling of `TK_newline` when parsing, we will need to
+know which states (itemsets) can occur at the start of a line, so we
+will record a `starts_line` flag too whenever DOT is at the start of a
+`line_like` symbol.
+
+Finally, for handling `TK_out` we need to know whether productions in the
+current state started *before* the most recent indent.  A state
+doesn't usually keep details of individual productions, so we need to
+add one extra detail. `min_prefix` is the smallest non-zero number of
+symbols *before* DOT in any production in an itemset.  This will allow
+us to determine if the the most recent indent is sufficiently recent
+to cancel it against a `TK_out`.  If it was seen longer ago than the
+`min_prefix`, and if the current state cannot be reduced, then the
+indented section must have ended in the middle of a syntactic unit, so
+an error must be signaled.
+
  And now we can build the list of itemsets.  The lookup routine returns
  both a success flag and a pointer to where in the list an insert
  should happen, so we don't need to search a second time.
  And now we can build the list of itemsets.  The lookup routine returns
  both a success flag and a pointer to where in the list an insert
  should happen, so we don't need to search a second time.
@@ -1065,7 +1189,11 @@ should happen, so we don't need to search a second time.
                 short state;
                 struct symset items;
                 struct symset go_to;
                 short state;
                 struct symset items;
                 struct symset go_to;
+               enum assoc assoc;
+               unsigned short precedence;
                 char completed;
                 char completed;
+               char starts_line;
+               int min_prefix;
         };
  
  ###### grammar fields
         };
  
  ###### grammar fields
@@ -1095,7 +1223,7 @@ should happen, so we don't need to search a second time.
         }
  
  Adding an itemset may require merging the LA sets if LALR analysis is
         }
  
  Adding an itemset may require merging the LA sets if LALR analysis is
-happening. If any new LA set add symbol that weren't in the old LA set, we
+happening. If any new LA set adds any symbols that weren't in the old LA set, we
  clear the `completed` flag so that the dependants of this itemset will be
  recalculated and their LA sets updated.
  
  clear the `completed` flag so that the dependants of this itemset will be
  recalculated and their LA sets updated.
  
@@ -1103,6 +1231,7 @@ recalculated and their LA sets updated.
  them to a data structure, of freeing them.
  
         static int add_itemset(struct grammar *g, struct symset ss,
  them to a data structure, of freeing them.
  
         static int add_itemset(struct grammar *g, struct symset ss,
+                              enum assoc assoc, unsigned short precedence,
                                enum grammar_type type)
         {
                 struct itemset **where, *is;
                                enum grammar_type type)
         {
                 struct itemset **where, *is;
@@ -1113,6 +1242,8 @@ them to a data structure, of freeing them.
                         is->state = g->states;
                         g->states += 1;
                         is->items = ss;
                         is->state = g->states;
                         g->states += 1;
                         is->items = ss;
+                       is->assoc = assoc;
+                       is->precedence = precedence;
                         is->next = *where;
                         is->go_to = INIT_DATASET;
                         *where = is;
                         is->next = *where;
                         is->go_to = INIT_DATASET;
                         *where = is;
@@ -1142,9 +1273,9 @@ them to a data structure, of freeing them.
  
  #### The build
  
  
  #### The build
  
-To build all the itemsets, we first insert the initial itemset made from the
-start symbol, complete each itemset, and then generate new itemsets from old
-until no new ones can be made.
+To build all the itemsets, we first insert the initial itemset made
+from production zero, complete each itemset, and then generate new
+itemsets from old until no new ones can be made.
  
  Completing an itemset means finding all the items where "DOT" is followed by
  a nonterminal and adding "DOT=0" items for every production from that
  
  Completing an itemset means finding all the items where "DOT" is followed by
  a nonterminal and adding "DOT=0" items for every production from that
@@ -1156,9 +1287,18 @@ be supplemented by the LA set for the item which produce the new item.
  
  We also collect a set of all symbols which follow "DOT" (in `done`) as this
  is used in the next stage.
  
  We also collect a set of all symbols which follow "DOT" (in `done`) as this
  is used in the next stage.
-
-NOTE: precedence handling should happen here - I haven't written this yet
-though.
+If any of these symbols are flagged as `line_like`, then this
+state must be a `starts_line` state so now is a good time to record that.
+
+When itemsets are created we assign a precedence to the itemset from
+the complete item, if there is one.  We ignore the possibility of
+there being two and don't (currently) handle precedence in such
+grammars.  When completing a grammar we ignore any item where DOT is
+followed by a terminal with a precedence lower than that for the
+itemset.  Unless the terminal has right associativity, we also ignore
+items where the terminal has the same precedence.  The result is that
+unwanted items are still in the itemset, but the terminal doesn't get
+into the go to set, so the item is ineffective.
  
  ###### complete itemset
         for (i = 0; i < is->items.cnt; i++) {
  
  ###### complete itemset
         for (i = 0; i < is->items.cnt; i++) {
@@ -1168,13 +1308,33 @@ though.
                 int p2;
                 struct symbol *s;
                 struct symset LA = INIT_SYMSET;
                 int p2;
                 struct symbol *s;
                 struct symset LA = INIT_SYMSET;
-               int sn = 0;
+               unsigned short sn = 0;
+               struct symset LAnl = INIT_SYMSET;
+               unsigned short snnl = 0;
  
  
+               if (is->min_prefix == 0 ||
+                   (bs > 0 && bs < is->min_prefix))
+                       is->min_prefix = bs;
                 if (bs == pr->body_size)
                         continue;
                 s = pr->body[bs];
                 if (bs == pr->body_size)
                         continue;
                 s = pr->body[bs];
-               if (symset_find(&done, s->num) < 0)
+               if (s->precedence && is->precedence &&
+                   is->precedence > s->precedence)
+                       /* This terminal has a low precedence and
+                        * shouldn't be shifted
+                        */
+                       continue;
+               if (s->precedence && is->precedence &&
+                   is->precedence == s->precedence && s->assoc != Right)
+                       /* This terminal has a matching precedence and is
+                        * not Right-associative, so we mustn't shift it.
+                        */
+                       continue;
+               if (symset_find(&done, s->num) < 0) {
                         symset_add(&done, s->num, 0);
                         symset_add(&done, s->num, 0);
+                       if (s->line_like)
+                               is->starts_line = 1;
+               }
                 if (s->type != Nonterminal)
                         continue;
                 again = 1;
                 if (s->type != Nonterminal)
                         continue;
                 again = 1;
@@ -1188,7 +1348,12 @@ though.
                         }
                         sn = save_set(g, LA);
                         LA = set_find(g, sn);
                         }
                         sn = save_set(g, LA);
                         LA = set_find(g, sn);
+                       if (symset_find(&LA, TK_newline))
+                               symset_add(&LAnl, TK_newline, 0);
+                       snnl = save_set(g, LAnl);
+                       LAnl = set_find(g, snnl);
                 }
                 }
+
                 /* Add productions for this symbol */
                 for (p2 = s->first_production;
                      p2 < g->production_count &&
                 /* Add productions for this symbol */
                 for (p2 = s->first_production;
                      p2 < g->production_count &&
@@ -1197,19 +1362,25 @@ though.
                         int itm = item_num(p2, 0);
                         int pos = symset_find(&is->items, itm);
                         if (pos < 0) {
                         int itm = item_num(p2, 0);
                         int pos = symset_find(&is->items, itm);
                         if (pos < 0) {
-                               symset_add(&is->items, itm, sn);
+                               if (g->productions[p2]->line_like)
+                                       symset_add(&is->items, itm, snnl);
+                               else
+                                       symset_add(&is->items, itm, sn);
                                 /* Will have re-ordered, so start
                                  * from beginning again */
                                 i = -1;
                         } else if (type >= LALR) {
                                 struct symset ss = set_find(g, is->items.data[pos]);
                                 struct symset tmp = INIT_SYMSET;
                                 /* Will have re-ordered, so start
                                  * from beginning again */
                                 i = -1;
                         } else if (type >= LALR) {
                                 struct symset ss = set_find(g, is->items.data[pos]);
                                 struct symset tmp = INIT_SYMSET;
+                               struct symset *la = &LA;
  
  
+                               if (g->productions[p2]->line_like)
+                                       la = &LAnl;
                                 symset_union(&tmp, &ss);
                                 symset_union(&tmp, &ss);
-                               if (symset_union(&tmp, &LA)) {
+                               if (symset_union(&tmp, la)) {
                                         is->items.data[pos] = save_set(g, tmp);
                                         i = -1;
                                         is->items.data[pos] = save_set(g, tmp);
                                         i = -1;
-                               }else
+                               } else
                                         symset_free(tmp);
                         }
                 }
                                         symset_free(tmp);
                         }
                 }
@@ -1222,11 +1393,14 @@ with a pre-existing itemset).
  
  ###### derive itemsets
         // Now we have a completed itemset, so we need to
  
  ###### derive itemsets
         // Now we have a completed itemset, so we need to
-       // create all the 'next' itemset and create them
+       // compute all the 'next' itemsets and create them
         // if they don't exist.
         for (i = 0; i < done.cnt; i++) {
                 int j;
         // if they don't exist.
         for (i = 0; i < done.cnt; i++) {
                 int j;
-               int state;
+               unsigned short state;
+               struct symbol *sym = g->symtab[done.syms[i]];
+               enum assoc assoc = Non;
+               unsigned short precedence = 0;
                 struct symset newitemset = INIT_SYMSET;
                 if (type >= LALR)
                         newitemset = INIT_DATASET;
                 struct symset newitemset = INIT_SYMSET;
                 if (type >= LALR)
                         newitemset = INIT_DATASET;
@@ -1236,16 +1410,23 @@ with a pre-existing itemset).
                         int p = item_prod(itm);
                         int bp = item_index(itm);
                         struct production *pr = g->productions[p];
                         int p = item_prod(itm);
                         int bp = item_index(itm);
                         struct production *pr = g->productions[p];
-                       int la = 0;
+                       unsigned short la = 0;
                         int pos;
  
                         if (bp == pr->body_size)
                                 continue;
                         int pos;
  
                         if (bp == pr->body_size)
                                 continue;
-                       if (pr->body[bp]->num != done.syms[i])
+                       if (pr->body[bp] != sym)
                                 continue;
                         if (type >= LALR)
                                 la = is->items.data[j];
                         pos = symset_find(&newitemset, pr->head->num);
                                 continue;
                         if (type >= LALR)
                                 la = is->items.data[j];
                         pos = symset_find(&newitemset, pr->head->num);
+                       if (bp + 1 == pr->body_size &&
+                           pr->precedence > 0 &&
+                           pr->precedence > precedence) {
+                               // new itemset is reducible and has a precedence.
+                               precedence = pr->precedence;
+                               assoc = pr->assoc;
+                       }
                         if (pos < 0)
                                 symset_add(&newitemset, item_num(p, bp+1), la);
                         else if (type >= LALR) {
                         if (pos < 0)
                                 symset_add(&newitemset, item_num(p, bp+1), la);
                         else if (type >= LALR) {
@@ -1263,12 +1444,12 @@ with a pre-existing itemset).
                                 }
                         }
                 }
                                 }
                         }
                 }
-               state = add_itemset(g, newitemset, type);
+               state = add_itemset(g, newitemset, assoc, precedence, type);
                 if (symset_find(&is->go_to, done.syms[i]) < 0)
                         symset_add(&is->go_to, done.syms[i], state);
         }
  
                 if (symset_find(&is->go_to, done.syms[i]) < 0)
                         symset_add(&is->go_to, done.syms[i], state);
         }
  
-All that is left is to crate the initial itemset from production zero, and
+All that is left is to create the initial itemset from production zero, and
  with `TK_eof` as the LA set.
  
  ###### functions
  with `TK_eof` as the LA set.
  
  ###### functions
@@ -1277,7 +1458,7 @@ with `TK_eof` as the LA set.
                 struct symset first = INIT_SYMSET;
                 struct itemset *is;
                 int again;
                 struct symset first = INIT_SYMSET;
                 struct itemset *is;
                 int again;
-               int la = 0;
+               unsigned short la = 0;
                 if (type >= LALR) {
                         // LA set just has eof
                         struct symset eof = INIT_SYMSET;
                 if (type >= LALR) {
                         // LA set just has eof
                         struct symset eof = INIT_SYMSET;
@@ -1285,9 +1466,9 @@ with `TK_eof` as the LA set.
                         la = save_set(g, eof);
                         first = INIT_DATASET;
                 }
                         la = save_set(g, eof);
                         first = INIT_DATASET;
                 }
-               // production 0, offset 0  (with no data)
+               // production 0, offset 0 (with no data)
                 symset_add(&first, item_num(0, 0), la);
                 symset_add(&first, item_num(0, 0), la);
-               add_itemset(g, first, type);
+               add_itemset(g, first, Non, 0, type);
                 for (again = 0, is = g->items;
                      is;
                      is = is->next ?: again ? (again = 0, g->items) : NULL) {
                 for (again = 0, is = g->items;
                      is;
                      is = is->next ?: again ? (again = 0, g->items) : NULL) {
@@ -1296,6 +1477,7 @@ with `TK_eof` as the LA set.
                         if (is->completed)
                                 continue;
                         is->completed = 1;
                         if (is->completed)
                                 continue;
                         is->completed = 1;
+                       again = 1;
                         ## complete itemset
                         ## derive itemsets
                         symset_free(done);
                         ## complete itemset
                         ## derive itemsets
                         symset_free(done);
@@ -1345,10 +1527,11 @@ changeover point in `first_nonterm`.
                 for (s = g->syms; s; s = s->next)
                         g->symtab[s->num] = s;
  
                 for (s = g->syms; s; s = s->next)
                         g->symtab[s->num] = s;
  
-               if (type >= SLR) {
-                       set_nullable(g);
+               set_nullable(g);
+               set_line_like(g);
+               if (type >= SLR)
                         build_first(g);
                         build_first(g);
-               }
+
                 if (type == SLR)
                         build_follow(g);
  
                 if (type == SLR)
                         build_follow(g);
  
@@ -1363,7 +1546,7 @@ changeover point in `first_nonterm`.
  
  The purpose of the report is to give the grammar developer insight into
  how the grammar parser will work.  It is basically a structured dump of
  
  The purpose of the report is to give the grammar developer insight into
  how the grammar parser will work.  It is basically a structured dump of
-all the tables that have been generated, plus an description of any conflicts.
+all the tables that have been generated, plus a description of any conflicts.
  
  ###### grammar_report
         static int grammar_report(struct grammar *g, enum grammar_type type)
  
  ###### grammar_report
         static int grammar_report(struct grammar *g, enum grammar_type type)
@@ -1375,8 +1558,10 @@ all the tables that have been generated, plus an description of any conflicts.
                 return report_conflicts(g, type);
         }
  
                 return report_conflicts(g, type);
         }
  
-Firstly we have the complete list of symbols, together with the "FIRST"
-set if that was generated.
+Firstly we have the complete list of symbols, together with the
+"FIRST" set if that was generated.  We add a mark to each symbol to
+show if it can end in a newline (`>`), if it is considered to be
+"line-like" (`<`), or if it is nullable (`.`).
  
  ###### functions
  
  
  ###### functions
  
@@ -1393,8 +1578,9 @@ set if that was generated.
                         if (!s)
                                 continue;
  
                         if (!s)
                                 continue;
  
-                       printf(" %c%3d%c: ",
-                              s->nullable ? '*':' ',
+                       printf(" %c%c%3d%c: ",
+                              s->nullable ? '.':' ',
+                              s->line_like ? '<':' ',
                                s->num, symtypes[s->type]);
                         prtxt(s->name);
                         if (s->precedence)
                                s->num, symtypes[s->type]);
                         prtxt(s->name);
                         if (s->precedence)
@@ -1414,7 +1600,7 @@ set if that was generated.
                 }
         }
  
                 }
         }
  
-Then we have to follow sets if they were computed.
+Then we have the follow sets if they were computed.
  
         static void report_follow(struct grammar *g)
         {
  
         static void report_follow(struct grammar *g)
         {
@@ -1456,9 +1642,17 @@ it up a bit.  First the items, with production number and associativity.
                 if (dot == pr->body_size)
                         printf(" .");
                 printf(" [%d]", p);
                 if (dot == pr->body_size)
                         printf(" .");
                 printf(" [%d]", p);
-               if (pr->precedence)
+               if (pr->precedence && dot == pr->body_size)
                         printf(" (%d%s)", pr->precedence,
                                assoc_names[pr->assoc]);
                         printf(" (%d%s)", pr->precedence,
                                assoc_names[pr->assoc]);
+               if (dot < pr->body_size &&
+                   pr->body[dot]->precedence) {
+                       struct symbol *s = pr->body[dot];
+                       printf(" [%d%s]", s->precedence,
+                              assoc_names[s->assoc]);
+               }
+               if (pr->line_like)
+                       printf(" $$NEWLINE");
                 printf("\n");
         }
  
                 printf("\n");
         }
  
@@ -1481,7 +1675,6 @@ The LA sets which are (possibly) reported with each item:
  
  Then the go to sets:
  
  
  Then the go to sets:
  
-
         static void report_goto(struct grammar *g, struct symset gt)
         {
                 int i;
         static void report_goto(struct grammar *g, struct symset gt)
         {
                 int i;
@@ -1503,7 +1696,11 @@ Now we can report all the item sets complete with items, LA sets, and GO TO.
                 for (s = 0; s < g->states; s++) {
                         int j;
                         struct itemset *is = g->statetab[s];
                 for (s = 0; s < g->states; s++) {
                         int j;
                         struct itemset *is = g->statetab[s];
-                       printf("  Itemset %d:\n", s);
+                       printf("  Itemset %d:%s min prefix=%d",
+                              s, is->starts_line?" (startsline)":"", is->min_prefix);
+                       if (is->precedence)
+                               printf(" %d%s", is->precedence, assoc_names[is->assoc]);
+                       printf("\n");
                         for (j = 0; j < is->items.cnt; j++) {
                                 report_item(g, is->items.syms[j]);
                                 if (is->items.data != NO_DATA)
                         for (j = 0; j < is->items.cnt; j++) {
                                 report_item(g, is->items.syms[j]);
                                 if (is->items.data != NO_DATA)
@@ -1537,9 +1734,9 @@ LR1 are also similar as they have FOLLOW or LA sets.
         }
  
  LR0 conflicts are any state which have both a reducible item and
         }
  
  LR0 conflicts are any state which have both a reducible item and
-a shiftable item.
+a shiftable item, or two reducible items.
  
  
-LR05 conflicts only occurs if two possibly reductions exist,
+LR05 conflicts only occur if two possible reductions exist,
  as shifts always over-ride reductions.
  
  ###### conflict functions
  as shifts always over-ride reductions.
  
  ###### conflict functions
@@ -1586,12 +1783,18 @@ as shifts always over-ride reductions.
         }
  
  SLR, LALR, and LR1 conflicts happen if two reducible items have over-lapping
         }
  
  SLR, LALR, and LR1 conflicts happen if two reducible items have over-lapping
-look ahead, or if a symbol in a look-ahead can be shifted.  The differ only
+look ahead, or if a symbol in a look-ahead can be shifted.  They differ only
  in the source of the look ahead set.
  
  in the source of the look ahead set.
  
-We build a dataset mapping terminal to item for possible SHIFTs and then
-another for possible REDUCE operations. We report when we get conflicts
-between the two.
+We build two datasets to reflect the "action" table: one which maps
+terminals to items where that terminal could be shifted and another
+which maps terminals to items that could be reduced when the terminal
+is in look-ahead.  We report when we get conflicts between the two.
+
+As a special case, if we find a SHIFT/REDUCE conflict, on the NEWLINE
+terminal, we ignore it.  NEWLINES are handled specially with its own
+rules for when to shift and when to reduce.  Conflicts are expected,
+but handled internally.
  
         static int conflicts_slr(struct grammar *g, enum grammar_type type)
         {
  
         static int conflicts_slr(struct grammar *g, enum grammar_type type)
         {
@@ -1607,22 +1810,28 @@ between the two.
                                 continue;
                         /* First collect the shifts */
                         for (j = 0; j < is->items.cnt; j++) {
                                 continue;
                         /* First collect the shifts */
                         for (j = 0; j < is->items.cnt; j++) {
-                               int itm = is->items.syms[j];
+                               unsigned short itm = is->items.syms[j];
                                 int p = item_prod(itm);
                                 int bp = item_index(itm);
                                 struct production *pr = g->productions[p];
                                 int p = item_prod(itm);
                                 int bp = item_index(itm);
                                 struct production *pr = g->productions[p];
+                               struct symbol *s;
  
  
-                               if (bp < pr->body_size &&
-                                   pr->body[bp]->type == Terminal) {
-                                       /* shiftable */
-                                       int sym = pr->body[bp]->num;
-                                       if (symset_find(&shifts, sym) < 0)
-                                               symset_add(&shifts, sym, itm);
-                               }
+                               if (bp >= pr->body_size ||
+                                   pr->body[bp]->type != Terminal)
+                                       /* not shiftable */
+                                       continue;
+
+                               s = pr->body[bp];
+                               if (s->precedence && is->precedence)
+                                       /* Precedence resolves this, so no conflict */
+                                       continue;
+
+                               if (symset_find(&shifts, s->num) < 0)
+                                       symset_add(&shifts, s->num, itm);
                         }
                         }
-                       /* Now look for reduction and conflicts */
+                       /* Now look for reductions and conflicts */
                         for (j = 0; j < is->items.cnt; j++) {
                         for (j = 0; j < is->items.cnt; j++) {
-                               int itm = is->items.syms[j];
+                               unsigned short itm = is->items.syms[j];
                                 int p = item_prod(itm);
                                 int bp = item_index(itm);
                                 struct production *pr = g->productions[p];
                                 int p = item_prod(itm);
                                 int bp = item_index(itm);
                                 struct production *pr = g->productions[p];
@@ -1638,13 +1847,13 @@ between the two.
                                 int k;
                                 for (k = 0; k < la.cnt; k++) {
                                         int pos = symset_find(&shifts, la.syms[k]);
                                 int k;
                                 for (k = 0; k < la.cnt; k++) {
                                         int pos = symset_find(&shifts, la.syms[k]);
-                                       if (pos >= 0) {
+                                       if (pos >= 0 && la.syms[k] != TK_newline) {
                                                 printf("  State %d has SHIFT/REDUCE conflict on ", i);
                                                 printf("  State %d has SHIFT/REDUCE conflict on ", i);
-                                               prtxt(g->symtab[la.syms[k]]->name);
+                                               cnt++;
+                                                       prtxt(g->symtab[la.syms[k]]->name);
                                                 printf(":\n");
                                                 report_item(g, shifts.data[pos]);
                                                 report_item(g, itm);
                                                 printf(":\n");
                                                 report_item(g, shifts.data[pos]);
                                                 report_item(g, itm);
-                                               cnt++;
                                         }
                                         pos = symset_find(&reduce, la.syms[k]);
                                         if (pos < 0) {
                                         }
                                         pos = symset_find(&reduce, la.syms[k]);
                                         if (pos < 0) {
@@ -1665,10 +1874,9 @@ between the two.
                 return cnt;
         }
  
                 return cnt;
         }
  
-
  ## Generating the parser
  
  ## Generating the parser
  
-The export part of the parser is the `parse_XX` function, where the name
+The exported part of the parser is the `parse_XX` function, where the name
  `XX` is based on the name of the parser files.
  
  This takes a `code_node`, a partially initialized `token_config`, and an
  `XX` is based on the name of the parser files.
  
  This takes a `code_node`, a partially initialized `token_config`, and an
@@ -1676,18 +1884,20 @@ optional `FILE` to send tracing to.  The `token_config` gets the list of
  known words added and then is used with the `code_node` to initialize the
  scanner.
  
  known words added and then is used with the `code_node` to initialize the
  scanner.
  
-`parse_XX` then call the library function `parser_run` to actually complete
-the parse,  This needs the `states` table and function to call the various
+`parse_XX` then calls the library function `parser_run` to actually complete
+the parse.  This needs the `states` table and function to call the various
  pieces of code provided in the grammar file, so they are generated first.
  
  ###### parser_generate
  
  pieces of code provided in the grammar file, so they are generated first.
  
  ###### parser_generate
  
-       static void gen_parser(FILE *f, struct grammar *g, char *file, char *name)
+       static void gen_parser(FILE *f, struct grammar *g, char *file, char *name,
+                              struct code_node *pre_reduce)
         {
                 gen_known(f, g);
         {
                 gen_known(f, g);
+               gen_non_term(f, g);
                 gen_goto(f, g);
                 gen_states(f, g);
                 gen_goto(f, g);
                 gen_states(f, g);
-               gen_reduce(f, g, file);
+               gen_reduce(f, g, file, pre_reduce);
                 gen_free(f, g);
  
                 fprintf(f, "#line 0 \"gen_parser\"\n");
                 gen_free(f, g);
  
                 fprintf(f, "#line 0 \"gen_parser\"\n");
@@ -1699,15 +1909,18 @@ pieces of code provided in the grammar file, so they are generated first.
                 fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
                 fprintf(f, "\tconfig->ignored |= (1 << TK_line_comment) | (1 << TK_block_comment);\n");
                 fprintf(f, "\ttokens = token_open(code, config);\n");
                 fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
                 fprintf(f, "\tconfig->ignored |= (1 << TK_line_comment) | (1 << TK_block_comment);\n");
                 fprintf(f, "\ttokens = token_open(code, config);\n");
-               fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace);\n");
+               fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config);\n");
                 fprintf(f, "\ttoken_close(tokens);\n");
                 fprintf(f, "\treturn rv;\n");
                 fprintf(f, "}\n\n");
         }
  
                 fprintf(f, "\ttoken_close(tokens);\n");
                 fprintf(f, "\treturn rv;\n");
                 fprintf(f, "}\n\n");
         }
  
-### Table words table
+### Known words table
  
  
-The know words is simply an array of terminal symbols.
+The known words table is simply an array of terminal symbols.
+The table of nonterminals used for tracing is a similar array.  We
+include virtual symbols in the table of non_terminals to keep the
+numbers right.
  
  ###### functions
  
  
  ###### functions
  
@@ -1724,10 +1937,24 @@ The know words is simply an array of terminal symbols.
                 fprintf(f, "};\n\n");
         }
  
                 fprintf(f, "};\n\n");
         }
  
+       static void gen_non_term(FILE *f, struct grammar *g)
+       {
+               int i;
+               fprintf(f, "#line 0 \"gen_non_term\"\n");
+               fprintf(f, "static const char *non_term[] = {\n");
+               for (i = TK_reserved;
+                    i < g->num_syms;
+                    i++)
+                       if (g->symtab[i]->type != Terminal)
+                               fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len,
+                                       g->symtab[i]->name.txt);
+               fprintf(f, "};\n\n");
+       }
+
  ### States and the goto tables.
  
  ### States and the goto tables.
  
-For each state we record the goto table and the reducible production if
-there is one.
+For each state we record the goto table, the reducible production if
+there is one, or a symbol to shift for error recovery.
  Some of the details of the reducible production are stored in the
  `do_reduce` function to come later.  Here we store the production number,
  the body size (useful for stack management) and the resulting symbol (useful
  Some of the details of the reducible production are stored in the
  `do_reduce` function to come later.  Here we store the production number,
  the body size (useful for stack management) and the resulting symbol (useful
@@ -1748,9 +1975,11 @@ The go to table is stored in a simple array of `sym` and corresponding
                 short reduce_prod;
                 short reduce_size;
                 short reduce_sym;
                 short reduce_prod;
                 short reduce_size;
                 short reduce_sym;
+               char starts_line;
+               char newline_only;
+               short min_prefix;
         };
  
         };
  
-
  ###### functions
  
         static void gen_goto(FILE *f, struct grammar *g)
  ###### functions
  
         static void gen_goto(FILE *f, struct grammar *g)
@@ -1778,7 +2007,8 @@ The go to table is stored in a simple array of `sym` and corresponding
                 fprintf(f, "static const struct state states[] = {\n");
                 for (i = 0; i < g->states; i++) {
                         struct itemset *is = g->statetab[i];
                 fprintf(f, "static const struct state states[] = {\n");
                 for (i = 0; i < g->states; i++) {
                         struct itemset *is = g->statetab[i];
-                       int j, prod = -1;
+                       int j, prod = -1, prod_len;
+
                         for (j = 0; j < is->items.cnt; j++) {
                                 int itm = is->items.syms[j];
                                 int p = item_prod(itm);
                         for (j = 0; j < is->items.cnt; j++) {
                                 int itm = is->items.syms[j];
                                 int p = item_prod(itm);
@@ -1788,14 +2018,24 @@ The go to table is stored in a simple array of `sym` and corresponding
                                 if (bp < pr->body_size)
                                         continue;
                                 /* This is what we reduce */
                                 if (bp < pr->body_size)
                                         continue;
                                 /* This is what we reduce */
-                               prod = p;
-                               break;
+                               if (prod < 0 || prod_len < pr->body_size) {
+                                       prod = p;
+                                       prod_len = pr->body_size;
+                               }
                         }
  
                         }
  
-                       fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d },\n",
-                               i, is->go_to.cnt, i, prod,
-                               prod < 0 ? -1 : g->productions[prod]->body_size,
-                               prod < 0 ? -1 : g->productions[prod]->head->num);
+                       if (prod >= 0)
+                               fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d, %d },\n",
+                                       i, is->go_to.cnt, i, prod,
+                                       g->productions[prod]->body_size,
+                                       g->productions[prod]->head->num,
+                                       is->starts_line,
+                                       g->productions[prod]->line_like,
+                                       is->min_prefix);
+                       else
+                               fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, 0, %d },\n",
+                                       i, is->go_to.cnt, i,
+                                       is->starts_line, is->min_prefix);
                 }
                 fprintf(f, "};\n\n");
         }
                 }
                 fprintf(f, "};\n\n");
         }
@@ -1813,21 +2053,34 @@ This code needs to be able to store data somewhere.  Rather than requiring
  `do_reduce` to `malloc` that "somewhere", we pass in a large buffer and have
  `do_reduce` return the size to be saved.
  
  `do_reduce` to `malloc` that "somewhere", we pass in a large buffer and have
  `do_reduce` return the size to be saved.
  
+In order for the code to access "global" context, we pass in the
+"config" pointer that was passed to parser function.  If the `struct
+token_config` is embedded in some larger structure, the reducing code
+can access the larger structure using pointer manipulation.
+
  The code fragment requires translation when written out.  Any `$N` needs to
  be converted to a reference either to that buffer (if `$0`) or to the
  The code fragment requires translation when written out.  Any `$N` needs to
  be converted to a reference either to that buffer (if `$0`) or to the
-structure returned by a previous reduction.  These pointer need to be cast
-to the appropriate type for each access.  All this is handling in
+structure returned by a previous reduction.  These pointers need to be cast
+to the appropriate type for each access.  All this is handled in
  `gen_code`.
  
  `gen_code`.
  
+`gen_code` also allows symbol references to contain a '`<`' as in '`$<2`'.
+This applied only to symbols with references (or pointers), not those with structures.
+The `<` implies that the reference it being moved out, so the object will not be
+automatically freed.  This is equivalent to assigning `NULL` to the pointer.
  
  ###### functions
  
         static void gen_code(struct production *p, FILE *f, struct grammar *g)
         {
                 char *c;
  
  ###### functions
  
         static void gen_code(struct production *p, FILE *f, struct grammar *g)
         {
                 char *c;
+               char *used = calloc(1, p->body_size);
+               int i;
+
                 fprintf(f, "\t\t\t");
                 for (c = p->code.txt; c < p->code.txt + p->code.len; c++) {
                         int n;
                 fprintf(f, "\t\t\t");
                 for (c = p->code.txt; c < p->code.txt + p->code.len; c++) {
                         int n;
+                       int use = 0;
                         if (*c != '$') {
                                 fputc(*c, f);
                                 if (*c == '\n')
                         if (*c != '$') {
                                 fputc(*c, f);
                                 if (*c == '\n')
@@ -1835,7 +2088,13 @@ to the appropriate type for each access.  All this is handling in
                                 continue;
                         }
                         c++;
                                 continue;
                         }
                         c++;
+                       if (*c == '<') {
+                               use = 1;
+                               c++;
+                       }
                         if (*c < '0' || *c > '9') {
                         if (*c < '0' || *c > '9') {
+                               if (use)
+                                       fputc('<', f);
                                 fputc(*c, f);
                                 continue;
                         }
                                 fputc(*c, f);
                                 continue;
                         }
@@ -1845,9 +2104,10 @@ to the appropriate type for each access.  All this is handling in
                                 n = n * 10 + *c - '0';
                         }
                         if (n == 0)
                                 n = n * 10 + *c - '0';
                         }
                         if (n == 0)
-                               fprintf(f, "(*(struct %.*s*)ret)",
+                               fprintf(f, "(*(struct %.*s*%s)ret)",
                                         p->head->struct_name.len,
                                         p->head->struct_name.len,
-                                       p->head->struct_name.txt);
+                                       p->head->struct_name.txt,
+                                       p->head->isref ? "*":"");
                         else if (n > p->body_size)
                                 fprintf(f, "$%d", n);
                         else if (p->body[n-1]->type == Terminal)
                         else if (n > p->body_size)
                                 fprintf(f, "$%d", n);
                         else if (p->body[n-1]->type == Terminal)
@@ -1855,52 +2115,57 @@ to the appropriate type for each access.  All this is handling in
                                         n-1);
                         else if (p->body[n-1]->struct_name.txt == NULL)
                                 fprintf(f, "$%d", n);
                                         n-1);
                         else if (p->body[n-1]->struct_name.txt == NULL)
                                 fprintf(f, "$%d", n);
-                       else
-                               fprintf(f, "(*(struct %.*s*)body[%d])",
+                       else {
+                               fprintf(f, "(*(struct %.*s*%s)body[%d])",
                                         p->body[n-1]->struct_name.len,
                                         p->body[n-1]->struct_name.len,
-                                       p->body[n-1]->struct_name.txt, n-1);
+                                       p->body[n-1]->struct_name.txt,
+                                       p->body[n-1]->isref ? "*":"", n-1);
+                               used[n-1] = use;
+                       }
                 }
                 fputs("\n", f);
                 }
                 fputs("\n", f);
+               for (i = 0; i < p->body_size; i++) {
+                       if (p->body[i]->struct_name.txt &&
+                           used[i]) {
+                               // assume this has been copied out
+                               if (p->body[i]->isref)
+                                       fprintf(f, "\t\t*(void**)body[%d] = NULL;\n", i);
+                               else
+                                       fprintf(f, "\t\tmemset(body[%d], 0, sizeof(struct %.*s));\n", i, p->body[i]->struct_name.len, p->body[i]->struct_name.txt);
+                       }
+               }
+               free(used);
         }
  
  ###### functions
  
         }
  
  ###### functions
  
-       static void gen_reduce(FILE *f, struct grammar *g, char *file)
+       static void gen_reduce(FILE *f, struct grammar *g, char *file,
+                              struct code_node *code)
         {
         {
-               int i, j;
-               fprintf(f, "#line 0 \"gen_reduce\"\n");
-               fprintf(f, "static int do_reduce(int prod, int depth, void **body,\n");
-               fprintf(f, "                      void *ret, FILE *trace)\n");
+               int i;
+               fprintf(f, "#line 1 \"gen_reduce\"\n");
+               fprintf(f, "static int do_reduce(int prod, void **body, struct token_config *config, void *ret)\n");
                 fprintf(f, "{\n");
                 fprintf(f, "\tint ret_size = 0;\n");
                 fprintf(f, "{\n");
                 fprintf(f, "\tint ret_size = 0;\n");
+               if (code)
+                       code_node_print(f, code, file);
  
  
+               fprintf(f, "#line 4 \"gen_reduce\"\n");
                 fprintf(f, "\tswitch(prod) {\n");
                 for (i = 0; i < g->production_count; i++) {
                         struct production *p = g->productions[i];
                         fprintf(f, "\tcase %d:\n", i);
  
                 fprintf(f, "\tswitch(prod) {\n");
                 for (i = 0; i < g->production_count; i++) {
                         struct production *p = g->productions[i];
                         fprintf(f, "\tcase %d:\n", i);
  
-                       if (p->code.txt)
+                       if (p->code.txt) {
+                               fprintf(f, "#line %d \"%s\"\n", p->code_line, file);
                                 gen_code(p, f, g);
                                 gen_code(p, f, g);
-
-                       fprintf(f, "\t\tif (trace) {\n");
-                       fprintf(f, "\t\t\tfprintf(trace, \"[%%2d]%.*s ->\", depth);\n",
-                               p->head->name.len, p->head->name.txt);
-                       for (j = 0; j < p->body_size; j++) {
-                               if (p->body[j]->type == Terminal) {
-                                       fputs("\t\t\tfputs(\" \", trace);\n", f);
-                                       fprintf(f, "\t\t\ttext_dump(trace, (*(struct token*)body[%d]).txt, 20);\n", j);
-                               } else {
-                                       fprintf(f, "\t\t\tfprintf(trace, \" %.*s\");\n",
-                                               p->body[j]->name.len,
-                                               p->body[j]->name.txt);
-                               }
                         }
                         }
-                       fprintf(f, "\t\t}\n");
  
                         if (p->head->struct_name.txt)
  
                         if (p->head->struct_name.txt)
-                               fprintf(f, "\t\tret_size = sizeof(struct %.*s);\n",
+                               fprintf(f, "\t\tret_size = sizeof(struct %.*s%s);\n",
                                         p->head->struct_name.len,
                                         p->head->struct_name.len,
-                                       p->head->struct_name.txt);
+                                       p->head->struct_name.txt,
+                                       p->head->isref ? "*":"");
  
                         fprintf(f, "\t\tbreak;\n");
                 }
  
                         fprintf(f, "\t\tbreak;\n");
                 }
@@ -1916,10 +2181,10 @@ done.
  It is particularly important to have fine control over freeing during error
  recovery where individual stack frames might need to be freed.
  
  It is particularly important to have fine control over freeing during error
  recovery where individual stack frames might need to be freed.
  
-For this, the grammar author required to defined a `free_XX` function for
-each structure that is used by a non-terminal.  `do_free` all call whichever
+For this, the grammar author is required to defined a `free_XX` function for
+each structure that is used by a non-terminal.  `do_free` will call whichever
  is appropriate given a symbol number, and will call `free` (as is
  is appropriate given a symbol number, and will call `free` (as is
-appropriate for tokens` on any terminal symbol.
+appropriate for tokens) on any terminal symbol.
  
  ###### functions
  
  
  ###### functions
  
@@ -1943,9 +2208,15 @@ appropriate for tokens` on any terminal symbol.
                                 continue;
  
                         fprintf(f, "\tcase %d:\n", s->num);
                                 continue;
  
                         fprintf(f, "\tcase %d:\n", s->num);
-                       fprintf(f, "\t\tfree_%.*s(asn);\n",
-                               s->struct_name.len,
-                               s->struct_name.txt);
+                       if (s->isref) {
+                               fprintf(f, "\t\tfree_%.*s(*(void**)asn);\n",
+                                       s->struct_name.len,
+                                       s->struct_name.txt);
+                               fprintf(f, "\t\tfree(asn);\n");
+                       } else
+                               fprintf(f, "\t\tfree_%.*s(asn);\n",
+                                       s->struct_name.len,
+                                       s->struct_name.txt);
                         fprintf(f, "\t\tbreak;\n");
                 }
                 fprintf(f, "\t}\n}\n\n");
                         fprintf(f, "\t\tbreak;\n");
                 }
                 fprintf(f, "\t}\n}\n\n");
@@ -1977,17 +2248,19 @@ grammar file).
                 { "SLR",        0, NULL, 'S' },
                 { "LALR",       0, NULL, 'L' },
                 { "LR1",        0, NULL, '1' },
                 { "SLR",        0, NULL, 'S' },
                 { "LALR",       0, NULL, 'L' },
                 { "LR1",        0, NULL, '1' },
+               { "tag",        1, NULL, 't' },
                 { "report",     0, NULL, 'R' },
                 { "output",     1, NULL, 'o' },
                 { NULL,         0, NULL, 0   }
         };
                 { "report",     0, NULL, 'R' },
                 { "output",     1, NULL, 'o' },
                 { NULL,         0, NULL, 0   }
         };
-       const char *options = "05SL1Ro:";
+       const char *options = "05SL1t:Ro:";
  
  ###### process arguments
         int opt;
         char *outfile = NULL;
         char *infile;
         char *name;
  
  ###### process arguments
         int opt;
         char *outfile = NULL;
         char *infile;
         char *name;
+       char *tag = NULL;
         int report = 1;
         enum grammar_type type = LR05;
         while ((opt = getopt_long(argc, argv, options,
         int report = 1;
         enum grammar_type type = LR05;
         while ((opt = getopt_long(argc, argv, options,
@@ -2007,6 +2280,8 @@ grammar file).
                         report = 2; break;
                 case 'o':
                         outfile = optarg; break;
                         report = 2; break;
                 case 'o':
                         outfile = optarg; break;
+               case 't':
+                       tag = optarg; break;
                 default:
                         fprintf(stderr, "Usage: parsergen ...\n");
                         exit(1);
                 default:
                         fprintf(stderr, "Usage: parsergen ...\n");
                         exit(1);
@@ -2034,8 +2309,9 @@ grammar file).
  To be able to run `mdcode` and `scanner` on the grammar we need to memory
  map it.
  
  To be able to run `mdcode` and `scanner` on the grammar we need to memory
  map it.
  
-One we have extracted the code (with `mdcode`) we expect to file three
-sections: header, code, and grammar.  Anything else is an error.
+Once we have extracted the code (with `mdcode`) we expect to find three
+sections: header, code, and grammar.  Anything else that is not
+excluded by the `--tag` option is an error.
  
  "header" and "code" are optional, though it is hard to build a working
  parser with neither. "grammar" must be provided.
  
  "header" and "code" are optional, though it is hard to build a working
  parser with neither. "grammar" must be provided.
@@ -2071,13 +2347,19 @@ parser with neither. "grammar" must be provided.
         struct code_node *hdr = NULL;
         struct code_node *code = NULL;
         struct code_node *gram = NULL;
         struct code_node *hdr = NULL;
         struct code_node *code = NULL;
         struct code_node *gram = NULL;
+       struct code_node *pre_reduce = NULL;
         for (s = table; s; s = s->next) {
         for (s = table; s; s = s->next) {
-               if (text_is(s->section, "header"))
+               struct text sec = s->section;
+               if (tag && !strip_tag(&sec, tag))
+                       continue;
+               if (text_is(sec, "header"))
                         hdr = s->code;
                         hdr = s->code;
-               else if (text_is(s->section, "code"))
+               else if (text_is(sec, "code"))
                         code = s->code;
                         code = s->code;
-               else if (text_is(s->section, "grammar"))
+               else if (text_is(sec, "grammar"))
                         gram = s->code;
                         gram = s->code;
+               else if (text_is(sec, "reduce"))
+                       pre_reduce = s->code;
                 else {
                         fprintf(stderr, "Unknown content section: %.*s\n",
                                 s->section.len, s->section.txt);
                 else {
                         fprintf(stderr, "Unknown content section: %.*s\n",
                                 s->section.len, s->section.txt);
@@ -2124,7 +2406,7 @@ the report finds conflicts we will exit with an error status.
                                 rv |= 1;
         }
  
                                 rv |= 1;
         }
  
-If a headers section is defined, we write it out and include a declaration
+If a "headers" section is defined, we write it out and include a declaration
  for the `parse_XX` function so it can be used from separate code.
  
         if (rv == 0 && hdr && outfile) {
  for the `parse_XX` function so it can be used from separate code.
  
         if (rv == 0 && hdr && outfile) {
@@ -2149,7 +2431,7 @@ file with the code section (if any) and the parser tables and function.
                 if (f) {
                         if (code)
                                 code_node_print(f, code, infile);
                 if (f) {
                         if (code)
                                 code_node_print(f, code, infile);
-                       gen_parser(f, g, infile, name);
+                       gen_parser(f, g, infile, name, pre_reduce);
                         fclose(f);
                 } else {
                         fprintf(stderr, "Cannot create %s.c\n",
                         fclose(f);
                 } else {
                         fprintf(stderr, "Cannot create %s.c\n",
@@ -2159,7 +2441,7 @@ file with the code section (if any) and the parser tables and function.
         }
  
  And that about wraps it up.  We need to set the locale so that UTF-8 is
         }
  
  And that about wraps it up.  We need to set the locale so that UTF-8 is
-recognised properly, and link with `libicuuc` is `libmdcode` requires that.
+recognised properly, and link with `libicuuc` as `libmdcode` requires that.
  
  ###### File: parsergen.mk
         parsergen : parsergen.o libscanner.o libmdcode.o
  
  ###### File: parsergen.mk
         parsergen : parsergen.o libscanner.o libmdcode.o
@@ -2186,7 +2468,7 @@ recognised properly, and link with `libicuuc` is `libmdcode` requires that.
  
  ## The SHIFT/REDUCE parser
  
  
  ## The SHIFT/REDUCE parser
  
-Having analysed the grammar and generated all the table, we only need the
+Having analysed the grammar and generated all the tables, we only need the
  shift/reduce engine to bring it all together.
  
  ### Goto table lookup
  shift/reduce engine to bring it all together.
  
  ### Goto table lookup
@@ -2195,7 +2477,7 @@ The parser generator has nicely provided us with goto tables sorted by
  symbol number.  We need a binary search function to find a symbol in the
  table.
  
  symbol number.  We need a binary search function to find a symbol in the
  table.
  
-###### parser
+###### parser functions
  
         static int search(const struct state *l, int sym)
         {
  
         static int search(const struct state *l, int sym)
         {
@@ -2219,7 +2501,7 @@ table.
  
  ### The state stack.
  
  
  ### The state stack.
  
-The core data structure for the parser is the stack.  This track all the
+The core data structure for the parser is the stack.  This tracks all the
  symbols that have been recognised or partially recognised.
  
  The stack usually won't grow very large - maybe a few tens of entries.  So
  symbols that have been recognised or partially recognised.
  
  The stack usually won't grow very large - maybe a few tens of entries.  So
@@ -2232,20 +2514,43 @@ We keep the stack as two separate allocations.  One, `asn_stack` stores the
  production, and by keeping a separate `asn` stack, we can just pass a
  pointer into this stack.
  
  production, and by keeping a separate `asn` stack, we can just pass a
  pointer into this stack.
  
-The other allocation store all other stack fields of which there are two.
+The other allocation stores all other stack fields of which there are six.
  The `state` is the most important one and guides the parsing process.  The
  `sym` is nearly unnecessary.  However when we want to free entries from the
  `asn_stack`, it helps to know what type they are so we can call the right
  freeing function.  The symbol leads us to the right free function through
  `do_free`.
  
  The `state` is the most important one and guides the parsing process.  The
  `sym` is nearly unnecessary.  However when we want to free entries from the
  `asn_stack`, it helps to know what type they are so we can call the right
  freeing function.  The symbol leads us to the right free function through
  `do_free`.
  
-###### parser
+The `indents` count tracks the line indents with-in the symbol or
+immediately follow it.  These are used to allow indent information to
+guide parsing and error recovery.
+
+`since_newline` tracks how many stack frames since the last
+start-of-line (whether indented or not).  So if `since_newline` is
+zero, then this symbol is at the start of a line.  Similarly
+`since_indent` counts the number of states since an indent, it is zero
+precisely when `indents` is not zero.
+
+`newline_permitted` keeps track of whether newlines should be ignored
+or not.
+
+The stack is most properly seen as alternating states and symbols -
+states, like the 'DOT' in items, are between symbols.  Each frame in
+our stack holds a state and the symbol that was before it.  The
+bottom of stack holds the start state but no symbol, as nothing came
+before the beginning.
+
+###### parser functions
  
         struct parser {
  
         struct parser {
-               int state;
                 struct frame {
                         short state;
                 struct frame {
                         short state;
+                       short newline_permitted;
+
                         short sym;
                         short sym;
+                       short indents;
+                       short since_newline;
+                       short since_indent;
                 } *stack;
                 void **asn_stack;
                 int stack_size;
                 } *stack;
                 void **asn_stack;
                 int stack_size;
@@ -2254,30 +2559,47 @@ freeing function.  The symbol leads us to the right free function through
  
  #### Shift and pop
  
  
  #### Shift and pop
  
-The operations are needed on the stack - shift (which is like push) and pop.
+Two operations are needed on the stack - shift (which is like push) and pop.
  
  
-Shift applies no only to terminals but also to non-terminals.  When we
-reduce a production we will pop off entries corresponding to the body
-symbols, then push on an item for the head of the production.  This last is
-exactly the same process as shifting in a terminal so we use the same
-function for both.
+Shift applies not only to terminals but also to non-terminals.  When
+we reduce a production we will pop off entries corresponding to the
+body symbols, then push on an item for the head of the production.
+This last is exactly the same process as shifting in a terminal so we
+use the same function for both.  In both cases we provide the symbol,
+the number of indents the symbol contains (which will be zero for a
+terminal symbol) and a flag indicating the the symbol was at (or was
+reduced from a symbol which was at) the start of a line.  The state is
+deduced from the current top-of-stack state and the new symbol.
  
  To simplify other code we arrange for `shift` to fail if there is no `goto`
  state for the symbol.  This is useful in basic parsing due to our design
  that we shift when we can, and reduce when we cannot.  So the `shift`
  function reports if it could.
  
  
  To simplify other code we arrange for `shift` to fail if there is no `goto`
  state for the symbol.  This is useful in basic parsing due to our design
  that we shift when we can, and reduce when we cannot.  So the `shift`
  function reports if it could.
  
-So `shift` finds the next state.  If that succeed it extends the allocations
-if needed and pushed all the information onto the stacks.
+`shift` is also used to push state zero onto the stack, so if the
+stack is empty, it always chooses zero as the next state.
  
  
-###### parser
+So `shift` finds the next state.  If that succeeds it extends the
+allocations if needed and pushes all the information onto the stacks.
+
+Newlines are permitted after a `starts_line` state until an internal
+indent.  If the new frame has neither a `starts_line` state nor an
+indent, newlines are permitted if the previous stack frame permitted
+them.
+
+###### parser functions
  
         static int shift(struct parser *p,
  
         static int shift(struct parser *p,
-                        int sym, void *asn,
+                        short sym, short indents, short start_of_line,
+                        void *asn,
                          const struct state states[])
         {
                 // Push an entry onto the stack
                          const struct state states[])
         {
                 // Push an entry onto the stack
-               int newstate = search(&states[p->state], sym);
+               struct frame next = {0};
+               int newstate = p->tos
+                       ? search(&states[p->stack[p->tos-1].state],
+                                sym)
+                       : 0;
                 if (newstate < 0)
                         return 0;
                 if (p->tos >= p->stack_size) {
                 if (newstate < 0)
                         return 0;
                 if (p->tos >= p->stack_size) {
@@ -2287,43 +2609,77 @@ if needed and pushed all the information onto the stacks.
                         p->asn_stack = realloc(p->asn_stack, p->stack_size
                                            * sizeof(p->asn_stack[0]));
                 }
                         p->asn_stack = realloc(p->asn_stack, p->stack_size
                                            * sizeof(p->asn_stack[0]));
                 }
-               p->stack[p->tos].state = p->state;
-               p->stack[p->tos].sym = sym;
+               next.sym = sym;
+               next.indents = indents;
+               next.state = newstate;
+               if (states[newstate].starts_line)
+                       next.newline_permitted = 1;
+               else if (indents)
+                       next.newline_permitted = 0;
+               else if (p->tos)
+                       next.newline_permitted =
+                               p->stack[p->tos-1].newline_permitted;
+               else
+                       next.newline_permitted = 0;
+
+               if (!start_of_line) {
+                       if (p->tos)
+                               next.since_newline = p->stack[p->tos-1].since_newline + 1;
+                       else
+                               next.since_newline = 1;
+               }
+               if (indents)
+                       next.since_indent = 0;
+               else if (p->tos)
+                       next.since_indent = p->stack[p->tos-1].since_indent + 1;
+               else
+                       next.since_indent = 1;
+
+               p->stack[p->tos] = next;
                 p->asn_stack[p->tos] = asn;
                 p->tos++;
                 p->asn_stack[p->tos] = asn;
                 p->tos++;
-               p->state = newstate;
                 return 1;
         }
  
                 return 1;
         }
  
-`pop` simply moves the top of stack (`tos`) back down the required amount
-and frees and `asn` entries that need to be freed.  It is called _after_ we
-reduce a production, just before we `shift` the nonterminal in.
+`pop` primarily moves the top of stack (`tos`) back down the required
+amount and frees any `asn` entries that need to be freed.  It also
+collects a summary of the indents and line starts in the symbols that
+are being removed. It is called _after_ we reduce a production, just
+before we `shift` the nonterminal in.
  
  
-###### parser
+###### parser functions
  
  
-       static void pop(struct parser *p, int num,
-                       void(*do_free)(short sym, void *asn))
+       static int pop(struct parser *p, int num,
+                      short *start_of_line,
+                      void(*do_free)(short sym, void *asn))
         {
                 int i;
         {
                 int i;
+               short indents = 0;
+               int sol = 0;
+
                 p->tos -= num;
                 p->tos -= num;
-               for (i = 0; i < num; i++)
+               for (i = 0; i < num; i++) {
+                       sol |= !p->stack[p->tos+i].since_newline;
+                       indents += p->stack[p->tos+i].indents;
                         do_free(p->stack[p->tos+i].sym,
                                 p->asn_stack[p->tos+i]);
                         do_free(p->stack[p->tos+i].sym,
                                 p->asn_stack[p->tos+i]);
-
-               p->state = p->stack[p->tos].state;
+               }
+               if (start_of_line)
+                       *start_of_line = sol;
+               return indents;
         }
  
  ### Memory allocation
  
  The `scanner` returns tokens in a local variable - we want them in allocated
         }
  
  ### Memory allocation
  
  The `scanner` returns tokens in a local variable - we want them in allocated
-memory so they can live in the `asn_stack`.  Similarly the `asn` produce by
+memory so they can live in the `asn_stack`.  Similarly the `asn` produced by
  a reduce is in a large buffer.  Both of these require some allocation and
  copying, hence `memdup` and `tokcopy`.
  
  ###### parser includes
         #include <memory.h>
  
  a reduce is in a large buffer.  Both of these require some allocation and
  copying, hence `memdup` and `tokcopy`.
  
  ###### parser includes
         #include <memory.h>
  
-###### parser
+###### parser functions
  
         void *memdup(void *m, int len)
         {
  
         void *memdup(void *m, int len)
         {
@@ -2344,88 +2700,244 @@ copying, hence `memdup` and `tokcopy`.
  
  ### The heart of the parser.
  
  
  ### The heart of the parser.
  
-Now we have the parser.  If we can shift, we do.  If not and we can reduce
-we do.  If the production we reduced was production zero, then we have
+Now we have the parser.  If we can shift we do, though newlines and
+reducing indenting may block that.  If not and we can reduce we do
+that.  If the production we reduced was production zero, then we have
  accepted the input and can finish.
  
  accepted the input and can finish.
  
+We return whatever `asn` was returned by reducing production zero.
+
  If we can neither shift nor reduce we have an error to handle.  We pop
  If we can neither shift nor reduce we have an error to handle.  We pop
-single entries off the stack until we can shift the `TK_error` symbol, the
+single entries off the stack until we can shift the `TK_error` symbol, then
  drop input tokens until we find one we can shift into the new error state.
  
  drop input tokens until we find one we can shift into the new error state.
  
-We return whatever `asn` was returned by reducing production zero.
+When we find `TK_in` and `TK_out` tokens which report indents we need
+to handle them directly as the grammar cannot express what we want to
+do with them.
+
+`TK_in` tokens are easy: we simply update indent count in the top stack frame to
+record how many indents there are following the previous token.
+
+`TK_out` tokens must be canceled against an indent count
+within the stack.  If we can reduce some symbols that are all since
+the most recent indent, then we do that first.  If the minimum prefix
+of the current state then extends back before the most recent indent,
+that indent can be cancelled.  If the minimum prefix is shorter then
+the indent had ended prematurely and we must start error handling, which
+is still a work-in-progress.
+
+`TK_newline` tokens are ignored unless the top stack frame records
+that they are permitted.  In that case they will not be considered for
+shifting if it is possible to reduce some symbols that are all since
+the most recent start of line.  This is how a newline forcibly
+terminates any line-like structure - we try to reduce down to at most
+one symbol for each line where newlines are allowed.
+A consequence of this is that a rule like
+
+###### Example: newlines - broken
+
+       Newlines ->
+               | NEWLINE Newlines
+       IfStatement -> Newlines if ....
+
+cannot work, as the NEWLINE will never be shifted as the empty string
+will be reduced first.  Optional sets of newlines need to be include
+in the thing that preceed:
+
+###### Example: newlines - works
+
+       If -> if
+               | NEWLINE If
+       IfStatement -> If ....
+
+Here the NEWLINE will be shifted because nothing can be reduced until
+the `if` is seen.
+
+When, during error handling, we discard token read in, we want to keep
+discarding until we see one that is recognised.  If we had a full set
+of LR(1) grammar states, this will mean looking in the look-ahead set,
+but we don't keep a full look-ahead set.  We only record the subset
+that leads to SHIFT.  We can, however, deduce the look-ahead set but
+looking at the SHIFT subsets for all states that we can get to by
+reducing zero or more times.  So we need a little function which
+checks if a given token is in any of these look-ahead sets.
  
  ###### parser includes
         #include "parser.h"
  
  ###### parser includes
         #include "parser.h"
-###### parser
+
+###### parser_run
+
+       static int in_lookahead(struct token *tk, const struct state *states, int state)
+       {
+               while (state >= 0) {
+                       if (search(&states[state], tk->num) >= 0)
+                               return 1;
+                       if (states[state].reduce_prod < 0)
+                               return 0;
+                       state = search(&states[state], states[state].reduce_sym);
+               }
+               return 0;
+       }
+
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
-                        int (*do_reduce)(int, int, void**, void*, FILE*),
+                        int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          void (*do_free)(short, void*),
-                        FILE *trace)
+                        FILE *trace, const char *non_term[],
+                        struct token_config *config)
         {
                 struct parser p = { 0 };
         {
                 struct parser p = { 0 };
-               struct token *tk;
+               struct token *tk = NULL;
                 int accepted = 0;
                 int accepted = 0;
-               void *ret;
+               void *ret = NULL;
  
  
-               tk = tok_copy(token_next(tokens));
+               shift(&p, TK_eof, 0, 1, NULL, states);
                 while (!accepted) {
                 while (!accepted) {
-                       if (shift(&p, tk->num, tk, states)) {
-                               if (trace) {
-                                       fputs("Shift ", trace);
-                                       text_dump(trace, tk->txt, 20);
-                                       fputs("\n", trace);
-                               }
+                       struct token *err_tk;
+                       struct frame *tos = &p.stack[p.tos-1];
+                       if (!tk)
                                 tk = tok_copy(token_next(tokens));
                                 tk = tok_copy(token_next(tokens));
+                       parser_trace(trace, &p,
+                                    tk, states, non_term, config->known_count);
+
+                       if (tk->num == TK_in) {
+                               tos->indents += 1;
+                               tos->since_newline = 0;
+                               tos->since_indent = 0;
+                               if (!states[tos->state].starts_line)
+                                       tos->newline_permitted = 0;
+                               free(tk);
+                               tk = NULL;
+                               parser_trace_action(trace, "Record");
+                               continue;
+                       }
+                       if (tk->num == TK_out) {
+                               if (states[tos->state].reduce_size >= 0 &&
+                                   states[tos->state].reduce_size <= tos->since_indent)
+                                       goto force_reduce;
+                               if (states[tos->state].min_prefix >= tos->since_indent) {
+                                       // OK to cancel
+                                       struct frame *in = tos - tos->since_indent;
+                                       in->indents -= 1;
+                                       if (in->indents == 0) {
+                                               /* Reassess since_indent and newline_permitted */
+                                               if (in > p.stack) {
+                                                       in->since_indent = in[-1].since_indent + 1;
+                                                       in->newline_permitted = in[-1].newline_permitted;
+                                               } else {
+                                                       in->since_indent = 0;
+                                                       in->newline_permitted = 0;
+                                               }
+                                               if (states[in->state].starts_line)
+                                                       in->newline_permitted = 1;
+                                               while (in < tos) {
+                                                       in += 1;
+                                                       in->since_indent = in[-1].since_indent + 1;
+                                                       if (states[in->state].starts_line)
+                                                               in->newline_permitted = 1;
+                                                       else
+                                                               in->newline_permitted = in[-1].newline_permitted;
+                                               }
+                                       }
+                                       free(tk);
+                                       tk = NULL;
+                                       parser_trace_action(trace, "Cancel");
+                                       continue;
+                               }
+                               // fall through to error handling as both SHIFT and REDUCE
+                               // will fail.
+                       }
+                       if (tk->num == TK_newline) {
+                               if (!tos->newline_permitted) {
+                                       free(tk);
+                                       tk = NULL;
+                                       parser_trace_action(trace, "Discard");
+                                       continue;
+                               }
+                               if (tos->since_newline > 1 &&
+                                   states[tos->state].reduce_size >= 0 &&
+                                   states[tos->state].reduce_size <= tos->since_newline)
+                                       goto force_reduce;
+                       }
+                       if (shift(&p, tk->num, 0, tk->num == TK_newline, tk, states)) {
+                               tk = NULL;
+                               parser_trace_action(trace, "Shift");
                                 continue;
                         }
                                 continue;
                         }
-                       if (states[p.state].reduce_prod >= 0) {
+               force_reduce:
+                       if (states[tos->state].reduce_prod >= 0 &&
+                           states[tos->state].newline_only &&
+                           tk->num != TK_newline && tk->num != TK_eof && tk->num != TK_out) {
+                               /* Anything other than newline in an error as this
+                                * production must end at EOL
+                                */
+                       } else if (states[tos->state].reduce_prod >= 0) {
                                 void **body;
                                 void **body;
-                               int prod = states[p.state].reduce_prod;
-                               int size = states[p.state].reduce_size;
-                               int sym = states[p.state].reduce_sym;
+                               void *res;
+                               const struct state *nextstate = &states[tos->state];
+                               int prod = nextstate->reduce_prod;
+                               int size = nextstate->reduce_size;
                                 int bufsize;
                                 static char buf[16*1024];
                                 int bufsize;
                                 static char buf[16*1024];
+                               short indents, start_of_line;
  
  
-                               body = p.asn_stack +
-                                       (p.tos - states[p.state].reduce_size);
+                               body = p.asn_stack + (p.tos - size);
  
  
-                               bufsize = do_reduce(prod, p.tos, body,
-                                                   buf, trace);
-                               if (trace)
-                                       fputs("\n", trace);
+                               bufsize = do_reduce(prod, body, config, buf);
  
  
-                               pop(&p, size, do_free);
-                               shift(&p, sym, memdup(buf, bufsize), states);
-                               if (prod == 0)
+                               indents = pop(&p, size, &start_of_line,
+                                             do_free);
+                               res = memdup(buf, bufsize);
+                               memset(buf, 0, bufsize);
+                               if (!shift(&p, nextstate->reduce_sym,
+                                          indents, start_of_line,
+                                          res, states)) {
+                                       if (prod != 0) abort();
                                         accepted = 1;
                                         accepted = 1;
+                                       ret = res;
+                               }
+                               parser_trace_action(trace, "Reduce");
                                 continue;
                         }
                                 continue;
                         }
-                       /* Error. we walk up the stack until we
+                       /* Error. We walk up the stack until we
                          * find a state which will accept TK_error.
                          * We then shift in TK_error and see what state
                          * that takes us too.
                          * Then we discard input tokens until
                          * we find one that is acceptable.
                          */
                          * find a state which will accept TK_error.
                          * We then shift in TK_error and see what state
                          * that takes us too.
                          * Then we discard input tokens until
                          * we find one that is acceptable.
                          */
-                       while (shift(&p, TK_error, tk, states) < 0
-                              && p.tos > 0)
+                       parser_trace_action(trace, "ERROR");
+                       short indents = 0, start_of_line;
+
+                       err_tk = tok_copy(*tk);
+                       while (p.tos > 0 &&
+                              shift(&p, TK_error, 0, 0,
+                                    err_tk, states) == 0)
                                 // discard this state
                                 // discard this state
-                               pop(&p, 1, do_free);
-                       tk = tok_copy(*tk);
-                       while (search(&states[p.state], tk->num) < 0 &&
+                               indents += pop(&p, 1, &start_of_line, do_free);
+                       if (p.tos == 0) {
+                               free(err_tk);
+                               // no state accepted TK_error
+                               break;
+                       }
+                       tos = &p.stack[p.tos-1];
+                       while (!in_lookahead(tk, states, tos->state) &&
                                tk->num != TK_eof) {
                                 free(tk);
                                 tk = tok_copy(token_next(tokens));
                                tk->num != TK_eof) {
                                 free(tk);
                                 tk = tok_copy(token_next(tokens));
+                               if (tk->num == TK_in)
+                                       indents += 1;
+                               if (tk->num == TK_out) {
+                                       if (indents == 0)
+                                               break;
+                                       indents -= 1;
+                                       // FIXME update since_indent here
+                               }
                         }
                         }
-                       if (p.tos == 0 && tk->num == TK_eof)
-                               break;
+                       tos->indents += indents;
                 }
                 free(tk);
                 }
                 free(tk);
-               if (accepted)
-                       ret = p.asn_stack[0];
-               else
-                       pop(&p, p.tos, do_free);
+               pop(&p, p.tos, NULL, do_free);
                 free(p.asn_stack);
                 free(p.stack);
                 return ret;
                 free(p.asn_stack);
                 free(p.stack);
                 return ret;
@@ -2434,10 +2946,89 @@ We return whatever `asn` was returned by reducing production zero.
  ###### exported functions
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
  ###### exported functions
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
-                        int (*do_reduce)(int, int, void**, void*, FILE*),
+                        int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          void (*do_free)(short, void*),
-                        FILE *trace);
+                        FILE *trace, const char *non_term[],
+                        struct token_config *config);
+
+### Tracing
+
+Being able to visualize the parser in action can be invaluable when
+debugging the parser code, or trying to understand how the parse of a
+particular grammar progresses.  The stack contains all the important
+state, so just printing out the stack every time around the parse loop
+can make it possible to see exactly what is happening.
+
+This doesn't explicitly show each SHIFT and REDUCE action.  However they
+are easily deduced from the change between consecutive lines, and the
+details of each state can be found by cross referencing the states list
+in the stack with the "report" that parsergen can generate.
+
+For terminal symbols, we just dump the token.  For non-terminals we
+print the name of the symbol.  The look ahead token is reported at the
+end inside square brackets.
+
+###### parser functions
+
+       static char *reserved_words[] = {
+               [TK_error]        = "ERROR",
+               [TK_in]           = "IN",
+               [TK_out]          = "OUT",
+               [TK_newline]      = "NEWLINE",
+               [TK_eof]          = "$eof",
+       };
+       static void parser_trace_state(FILE *trace, struct frame *f, const struct state states[])
+       {
+               fprintf(trace, "(%d", f->state);
+               if (states[f->state].starts_line)
+                       fprintf(trace, "s");
+               if (f->newline_permitted)
+                       fprintf(trace, "n%d", f->since_newline);
+               fprintf(trace, ") ");
+       }
  
  
+       void parser_trace(FILE *trace, struct parser *p,
+                         struct token *tk, const struct state states[],
+                         const char *non_term[], int knowns)
+       {
+               int i;
+               if (!trace)
+                       return;
+               for (i = 0; i < p->tos; i++) {
+                       struct frame *f = &p->stack[i];
+                       if (i) {
+                               int sym = f->sym;
+                               if (sym < TK_reserved &&
+                                   reserved_words[sym] != NULL)
+                                       fputs(reserved_words[sym], trace);
+                               else if (sym < TK_reserved + knowns) {
+                                       struct token *t = p->asn_stack[i];
+                                       text_dump(trace, t->txt, 20);
+                               } else
+                                       fputs(non_term[sym - TK_reserved - knowns],
+                                             trace);
+                               if (f->indents)
+                                       fprintf(trace, ".%d", f->indents);
+                               if (f->since_newline == 0)
+                                       fputs("/", trace);
+                               fputs(" ", trace);
+                       }
+                       parser_trace_state(trace, f, states);
+               }
+               fprintf(trace, "[");
+               if (tk->num < TK_reserved &&
+                   reserved_words[tk->num] != NULL)
+                       fputs(reserved_words[tk->num], trace);
+               else
+                       text_dump(trace, tk->txt, 20);
+               fputs("]", trace);
+       }
+
+       void parser_trace_action(FILE *trace, char *action)
+       {
+               if (trace)
+                       fprintf(trace, " - %s\n", action);
+       }
  
  # A Worked Example
  
  
  # A Worked Example
  
@@ -2446,24 +3037,20 @@ The obvious example for a parser is a calculator.
  As `scanner` provides number parsing function using `libgmp` is it not much
  work to perform arbitrary rational number calculations.
  
  As `scanner` provides number parsing function using `libgmp` is it not much
  work to perform arbitrary rational number calculations.
  
-This calculator takes one expression, or an equality test per line.  The
-results are printed and in any equality test fails, the program exits with
+This calculator takes one expression, or an equality test, per line.  The
+results are printed and if any equality test fails, the program exits with
  an error.
  
  an error.
  
-Embedding mdcode inside mdcode is rather horrible.  I'd like to find a
-better approach, but as the grammar file must have 3 components I need
-something like this.
-
  ###### File: parsergen.mk
  ###### File: parsergen.mk
-       calc.c : parsergen calc.cgm
-               ./parsergen -o calc calc.cgm
+       calc.c calc.h : parsergen parsergen.mdc
+               ./parsergen --tag calc -o calc parsergen.mdc
         calc : calc.o libparser.o libscanner.o libmdcode.o libnumber.o
                 $(CC) $(CFLAGS) -o calc calc.o libparser.o libscanner.o libmdcode.o libnumber.o -licuuc -lgmp
         calc : calc.o libparser.o libscanner.o libmdcode.o libnumber.o
                 $(CC) $(CFLAGS) -o calc calc.o libparser.o libscanner.o libmdcode.o libnumber.o -licuuc -lgmp
+       calctest : calc
+               ./calc parsergen.mdc
+       demos :: calctest
  
  
-###### demo grammar
-
-       # header
-       ~~~~~
+# calc: header
  
         #include "number.h"
         // what do we use for a demo-grammar?  A calculator of course.
  
         #include "number.h"
         // what do we use for a demo-grammar?  A calculator of course.
@@ -2473,9 +3060,7 @@ something like this.
                 int err;
         };
  
                 int err;
         };
  
-       ~~~~~
-       # code
-       ~~~~~
+# calc: code
  
         #include <stdlib.h>
         #include <unistd.h>
  
         #include <stdlib.h>
         #include <unistd.h>
@@ -2484,6 +3069,7 @@ something like this.
         #include <stdio.h>
         #include <malloc.h>
         #include <gmp.h>
         #include <stdio.h>
         #include <malloc.h>
         #include <gmp.h>
+       #include <string.h>
         #include "mdcode.h"
         #include "scanner.h"
         #include "number.h"
         #include "mdcode.h"
         #include "scanner.h"
         #include "number.h"
@@ -2497,28 +3083,44 @@ something like this.
                 free(n);
         }
  
                 free(n);
         }
  
+       static int text_is(struct text t, char *s)
+       {
+               return (strlen(s) == t.len &&
+                       strncmp(s, t.txt, t.len) == 0);
+       }
+
         int main(int argc, char *argv[])
         {
                 int fd = open(argv[1], O_RDONLY);
                 int len = lseek(fd, 0, 2);
                 char *file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
         int main(int argc, char *argv[])
         {
                 int fd = open(argv[1], O_RDONLY);
                 int len = lseek(fd, 0, 2);
                 char *file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
-               struct section *s = code_extract(file, file+len, NULL);
+               struct section *table = code_extract(file, file+len, NULL);
+               struct section *s;
                 struct token_config config = {
                         .ignored = (1 << TK_line_comment)
                                  | (1 << TK_block_comment)
                 struct token_config config = {
                         .ignored = (1 << TK_line_comment)
                                  | (1 << TK_block_comment)
-                                | (1 << TK_indent)
-                                | (1 << TK_undent),
+                                | (1 << TK_in)
+                                | (1 << TK_out),
                         .number_chars = ".,_+-",
                         .word_start = "",
                         .word_cont = "",
                 };
                         .number_chars = ".,_+-",
                         .word_start = "",
                         .word_cont = "",
                 };
-               parse_calc(s->code, &config, argc > 2 ? stderr : NULL);
+               for (s = table; s; s = s->next)
+                       if (text_is(s->section, "example: input"))
+                               parse_calc(s->code, &config, argc > 2 ? stderr : NULL);
+               while (table) {
+                       struct section *t = table->next;
+                       code_free(table->code);
+                       free(table);
+                       table = t;
+               }
                 exit(0);
         }
  
                 exit(0);
         }
  
-       ~~~~~
-       # grammar
-       ~~~~~
+# calc: grammar
+
+       $LEFT + -
+       $LEFT * /
  
         Session -> Session Line
                 | Line
  
         Session -> Session Line
                 | Line
@@ -2538,17 +3140,24 @@ something like this.
                                 exit(1);
                         }
                 }$
                                 exit(1);
                         }
                 }$
-               |  NEWLINE ${ printf("Blank line\n"); }$
+               | NEWLINE ${ printf("Blank line\n"); }$
                 | ERROR NEWLINE ${ printf("Skipped a bad line\n"); }$
  
         $number
                 | ERROR NEWLINE ${ printf("Skipped a bad line\n"); }$
  
         $number
-       Expression -> Expression +  Term ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
-               | Expression -  Term ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
-               | Term ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-       Term -> Term * Factor ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
-               | Term / Factor ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
-               | Factor ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-       Factor -> NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
-               | ( Expression  ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$
+       Expression -> Expression + Expression ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
+               | Expression - Expression ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
+               | Expression * Expression ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
+               | Expression / Expression ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
+               | NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
+               | ( Expression ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$
+
+# example: input
+
+       355/113
+       3.1415926535 - 355/113
+       2 + 4 * 5
+       1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9
+       10 * 9 / 2
+       1 * 1000 + 2 * 100 + 3 * 10 + 4 * 1
+
+       error