oceani: move all array size calculation to prepare_type

[ocean] / csrc / parsergen.mdc
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 59da0ec326a05f6c2d489086ca6eb96f4bf13234..02abed32520c18dcaa1d677b81529f25ec040e03 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -393,9 +393,10 @@ here is told which was found in the `isref` argument.
                         found += 1;
                         t = token_next(ts);
                 }
                         found += 1;
                         t = token_next(ts);
                 }
-               if (found == 0)
+               if (found == 0) {
                         err = "No symbols given on precedence/TERM line";
                         goto abort;
                         err = "No symbols given on precedence/TERM line";
                         goto abort;
+               }
                 return NULL;
         abort:
                 while (t.num != TK_newline && t.num != TK_eof)
                 return NULL;
         abort:
                 while (t.num != TK_newline && t.num != TK_eof)
@@ -514,10 +515,6 @@ Now we have all the bits we need to parse a full production.
                 tk = token_next(state);
                 while (tk.num == TK_ident || tk.num == TK_mark) {
                         struct symbol *bs = sym_find(g, tk.txt);
                 tk = token_next(state);
                 while (tk.num == TK_ident || tk.num == TK_mark) {
                         struct symbol *bs = sym_find(g, tk.txt);
-                       if (bs->type == Unknown) {
-                               if (!g->terminals_declared)
-                                       bs->type = Terminal;
-                       }
                         if (bs->type == Virtual) {
                                 err = "Virtual symbol not permitted in production";
                                 goto abort;
                         if (bs->type == Virtual) {
                                 err = "Virtual symbol not permitted in production";
                                 goto abort;
@@ -691,20 +688,22 @@ used as a terminal anywhere that a terminal is expected.
                                 goto abort;
                 }
                 token_close(state);
                                 goto abort;
                 }
                 token_close(state);
-               if (g->terminals_declared) {
-                       struct symbol *s;
-                       int errs = 0;
-                       for (s = g->syms; s; s = s->next) {
-                               if (s->type != Unknown)
-                                       continue;
-                               errs += 1;
-                               fprintf(stderr, "Token %.*s not declared\n",
-                                       s->name.len, s->name.txt);
-                       }
-                       if (errs) {
-                               free(g); // FIXME free content
-                               g = NULL;
+
+               struct symbol *s;
+               for (s = g->syms; s; s = s->next) {
+                       if (s->type != Unknown)
+                               continue;
+                       if (!g->terminals_declared) {
+                               s->type = Terminal;
+                               continue;
                         }
                         }
+                       err = "not declared";
+                       fprintf(stderr, "Token %.*s not declared\n",
+                               s->name.len, s->name.txt);
+               }
+               if (err) {
+                       free(g); // FIXME free content
+                       g = NULL;
                 }
                 return g;
         abort:
                 }
                 return g;
         abort:
@@ -1192,10 +1191,27 @@ And now we can build the list of itemsets.  The lookup routine returns
  both a success flag and a pointer to where in the list an insert should
  happen, so we don't need to search a second time.
  
  both a success flag and a pointer to where in the list an insert should
  happen, so we don't need to search a second time.
  
+Each state will often have at most one reducible production, but
+occasionally will have more.  If there are more we will need a full
+action table (merged in the go to table).  If there is one, we record it
+in the state.  If none, we record that too.  These special values will
+be needed both in the generator and in the parser, so they need to be
+declared twice.
+
+###### exported types
+       enum {
+               NONE_REDUCIBLE = -1,
+               MANY_REDUCIBLE = -2,
+       };
  ###### declarations
  ###### declarations
+       enum {
+               NONE_REDUCIBLE = -1,
+               MANY_REDUCIBLE = -2,
+       };
         struct itemset {
                 struct itemset *next;
                 short state;
         struct itemset {
                 struct itemset *next;
                 short state;
+               short reducible_prod; /* or NONE_REDUCIBLE or MANY_REDUCIBLE */
                 struct symset items;
                 struct symset go_to;
                 enum assoc assoc;
                 struct symset items;
                 struct symset go_to;
                 enum assoc assoc;
@@ -1319,8 +1335,15 @@ so the item is ineffective.
                 struct symset LA = INIT_SYMSET;
                 unsigned short sn = 0;
  
                 struct symset LA = INIT_SYMSET;
                 unsigned short sn = 0;
  
-               if (bs == pr->body_size)
+               if (i == 0)
+                       is->reducible_prod = NONE_REDUCIBLE;
+               if (bs == pr->body_size) {
+                       if (is->reducible_prod == NONE_REDUCIBLE)
+                               is->reducible_prod = p;
+                       else
+                               is->reducible_prod = MANY_REDUCIBLE;
                         continue;
                         continue;
+               }
                 s = pr->body[bs];
                 if (s->precedence && is->precedence &&
                     is->precedence > s->precedence)
                 s = pr->body[bs];
                 if (s->precedence && is->precedence &&
                     is->precedence > s->precedence)
@@ -1877,9 +1900,10 @@ optional `FILE` to send tracing to.  The `token_config` gets the list of
  known words added and then is used with the `code_node` to initialize the
  scanner.
  
  known words added and then is used with the `code_node` to initialize the
  scanner.
  
-`parse_XX` then calls the library function `parser_run` to actually complete
-the parse.  This needs the `states` table and functions to call the various
-pieces of code provided in the grammar file, so they are generated first.
+`parse_XX` then calls the library function `parser_run` to actually
+complete the parse.  This needs the `states` table, the `reductions`
+table and functions to call the various pieces of code provided in the
+grammar file, so they are generated first.
  
  ###### parser_generate
  
  
  ###### parser_generate
  
@@ -1888,8 +1912,10 @@ pieces of code provided in the grammar file, so they are generated first.
         {
                 gen_known(f, g);
                 gen_non_term(f, g);
         {
                 gen_known(f, g);
                 gen_non_term(f, g);
+               gen_action(f, g);
                 gen_goto(f, g);
                 gen_states(f, g);
                 gen_goto(f, g);
                 gen_states(f, g);
+               gen_reductions(f, g);
                 gen_reduce(f, g, file, pre_reduce);
                 gen_free(f, g);
  
                 gen_reduce(f, g, file, pre_reduce);
                 gen_free(f, g);
  
@@ -1901,7 +1927,7 @@ pieces of code provided in the grammar file, so they are generated first.
                 fprintf(f, "\tconfig->words_marks = known;\n");
                 fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
                 fprintf(f, "\ttokens = token_open(code, config);\n");
                 fprintf(f, "\tconfig->words_marks = known;\n");
                 fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
                 fprintf(f, "\ttokens = token_open(code, config);\n");
-               fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config);\n");
+               fprintf(f, "\tvoid *rv = parser_run(tokens, states, reductions, do_reduce, do_free, trace, non_term, config);\n");
                 fprintf(f, "\ttoken_close(tokens);\n");
                 fprintf(f, "\treturn rv;\n");
                 fprintf(f, "}\n\n");
                 fprintf(f, "\ttoken_close(tokens);\n");
                 fprintf(f, "\treturn rv;\n");
                 fprintf(f, "}\n\n");
@@ -1942,14 +1968,63 @@ The table of nonterminals used for tracing is a similar array.
                 fprintf(f, "};\n\n");
         }
  
                 fprintf(f, "};\n\n");
         }
  
-### States and the goto tables.
+### The Action table
  
  
-For each state we record the goto table and details of the reducible
-production if there is one.
-Some of the details of the reducible production are stored in the
-`do_reduce` function to come later.  Here we store the production
-number, the body size (useful for stack management), and the resulting
-symbol (useful for knowing how to free data later).
+For this parser we do not have a separate action table.  The action
+table normally maps terminals to one of the actions SHIFT, REDUCE(p), or
+ACCEPT.  We can find which terminals to SHIFT via a lookup in the go to
+table, and most states have at most one reducible production and in that
+case to effect that reduction for any non-shiftable terminal in the
+look-ahead.
+
+However, with SLR, LALR, and canonical-LR grammars, there may
+occasionally be multiple possible reductions which we choose between
+based on the look-ahead.  To handle this possibility we allow reductions
+to be stored in the go to table.  An extra flag indicates if an entry is
+a state to go to, or a production to reduce.  We add the entries with
+that flag here, if necessary.
+
+###### functions
+       #define IS_REDUCTION (0x8000)
+
+       static void gen_action(FILE *f, struct grammar *g)
+       {
+               int i, j, k;
+               for (i = 0; i < g->states; i++) {
+                       struct itemset *is = g->statetab[i];
+                       struct symset *gt = &is->go_to;
+
+                       if (is->reducible_prod != MANY_REDUCIBLE)
+                               continue;
+                       for (j = 0; j < is->items.cnt; j++) {
+                               int itm = is->items.syms[j];
+                               int p = item_prod(itm);
+                               int bp = item_dot(itm);
+                               struct production *pr = g->productions[p];
+                               struct symset la;
+                               if (bp != pr->body_size)
+                                       continue;
+                               /* This is a reducible item */
+                               if (g->follow)
+                                       la = g->follow[pr->head->num];
+                               else
+                                       la = set_find(g, is->items.data[j]);
+                               for (k = 0 ; k < la.cnt; k++)
+                                       symset_add(gt, la.syms[k], p | IS_REDUCTION);
+                       }
+               }
+       }
+
+### States, reductions, and the go to tables.
+
+For each state we record the go to table and the reducible production if
+there is one, the details of which are in a separate table of
+reductions.  Some of the details of the reducible production are stored
+in the `do_reduce` function to come later.  In the go to table we store
+the production number and in the reductions table: the body size (useful
+for stack management), the resulting symbol (useful for knowing how to
+free data later), and the size of the resulting asn object (useful for
+preallocation space.
  
  The go to table is stored in a simple array of `sym` and corresponding
  `state`.
  
  The go to table is stored in a simple array of `sym` and corresponding
  `state`.
@@ -1958,15 +2033,18 @@ The go to table is stored in a simple array of `sym` and corresponding
  
         struct lookup {
                 short sym;
  
         struct lookup {
                 short sym;
-               short state;
+               short state_or_reduction:15;
+               unsigned short is_reduction:1;
+       };
+       struct reduction {
+               short size;
+               short sym;
+               short result_size;
         };
         struct state {
         };
         struct state {
+               short reduce_prod;      // or NONE_REDUCIBLE or MANY_REDUCIBLE
                 short go_to_cnt;
                 short go_to_cnt;
-               const struct lookup * go_to;
-               short reduce_prod;
-               short reduce_size;
-               short reduce_sym;
-               short result_size;
+               const struct lookup * go_to; // includes reductions
         };
  
  ###### functions
         };
  
  ###### functions
@@ -1983,13 +2061,38 @@ The go to table is stored in a simple array of `sym` and corresponding
                                 continue;
                         fprintf(f, "static const struct lookup goto_%d[] = {\n",
                                 i);
                                 continue;
                         fprintf(f, "static const struct lookup goto_%d[] = {\n",
                                 i);
-                       for (j = 0; j < gt.cnt; j++)
-                               fprintf(f, "\t{ %d, %d },\n",
-                                       gt.syms[j], gt.data[j]);
+                       for (j = 0; j < gt.cnt; j++) {
+                               if (gt.data[j] & IS_REDUCTION)
+                                       fprintf(f, "\t{ %d, %d, 1 },\n",
+                                               gt.syms[j], gt.data[j]&~IS_REDUCTION);
+                               else
+                                       fprintf(f, "\t{ %d, %d, 0 },\n",
+                                               gt.syms[j], gt.data[j]);
+                       }
                         fprintf(f, "};\n");
                 }
         }
  
                         fprintf(f, "};\n");
                 }
         }
  
+       static void gen_reductions(FILE *f, struct grammar *g)
+       {
+               int i;
+               fprintf(f, "#line 0 \"gen_reductions\"\n");
+               fprintf(f, "static const struct reduction reductions[] = {\n");
+               for (i = 0; i < g->production_count; i++) {
+                       struct production *pr = g->productions[i];
+                       struct symbol *hd = pr->head;
+                       fprintf(f, "\t{%d, %d, ", pr->body_size, hd->num);
+                       if (hd->struct_name.txt == NULL)
+                               fprintf(f, "0 },\n");
+                       else
+                               fprintf(f, "sizeof(struct %.*s%s) },\n",
+                                       hd->struct_name.len,
+                                       hd->struct_name.txt,
+                                       hd->isref ? "*" : "");
+               }
+               fprintf(f, "};\n\n");
+       }
+
         static void gen_states(FILE *f, struct grammar *g)
         {
                 int i;
         static void gen_states(FILE *f, struct grammar *g)
         {
                 int i;
@@ -1997,41 +2100,13 @@ The go to table is stored in a simple array of `sym` and corresponding
                 fprintf(f, "static const struct state states[] = {\n");
                 for (i = 0; i < g->states; i++) {
                         struct itemset *is = g->statetab[i];
                 fprintf(f, "static const struct state states[] = {\n");
                 for (i = 0; i < g->states; i++) {
                         struct itemset *is = g->statetab[i];
-                       int j, prod = -1, prod_len;
-
-                       for (j = 0; j < is->items.cnt; j++) {
-                               int itm = is->items.syms[j];
-                               int p = item_prod(itm);
-                               int bp = item_dot(itm);
-                               struct production *pr = g->productions[p];
+                       int prod = is->reducible_prod;
  
  
-                               if (bp < pr->body_size)
-                                       continue;
-                               /* This is what we reduce - choose longest */
-                               if (prod < 0 || prod_len < pr->body_size) {
-                                       prod = p;
-                                       prod_len = pr->body_size;
-                               }
-                       }
                         if (is->go_to.cnt)
                         if (is->go_to.cnt)
-                               fprintf(f, "\t[%d] = { %d, goto_%d, ",
-                                       i, is->go_to.cnt, i);
+                               fprintf(f, "\t[%d] = { %d, %d, goto_%d },\n",
+                                       i, prod, is->go_to.cnt, i);
                         else
                         else
-                               fprintf(f, "\t[%d] = { 0, NULL, ", i);
-                       if (prod >= 0) {
-                               struct production *pr = g->productions[prod];
-                               struct symbol *hd = pr->head;
-                               fprintf(f, "%d, %d, %d, ", 
-                                       prod, pr->body_size, pr->head->num);
-                               if (hd->struct_name.txt == NULL)
-                                       fprintf(f, "0 },\n");
-                               else
-                                       fprintf(f, "sizeof(struct %.*s%s) },\n",
-                                               hd->struct_name.len,
-                                               hd->struct_name.txt,
-                                               hd->isref ? "*" : "");
-                       } else
-                               fprintf(f, "-1, -1, -1, -1 },\n");
+                               fprintf(f, "\t[%d] = { %d, 0, NULL },\n", i, prod);
                 }
                 fprintf(f, "};\n\n");
         }
                 }
                 fprintf(f, "};\n\n");
         }
@@ -2209,17 +2284,17 @@ transformed, and will cause an error when the code is compiled.
                         }
                         c -= 1;
                 }
                         }
                         c -= 1;
                 }
-               fputs("\n", f);
                 for (i = 0; i < p->body_size; i++) {
                         if (p->body[i]->struct_name.txt &&
                             used[i]) {
                                 // assume this has been copied out
                                 if (p->body[i]->isref)
                 for (i = 0; i < p->body_size; i++) {
                         if (p->body[i]->struct_name.txt &&
                             used[i]) {
                                 // assume this has been copied out
                                 if (p->body[i]->isref)
-                                       fprintf(f, "\t\t*(void**)body[%d] = NULL;\n", i);
+                                       fprintf(f, "\t\t*(void**)body[%d] = NULL;", i);
                                 else
                                 else
-                                       fprintf(f, "\t\tmemset(body[%d], 0, sizeof(struct %.*s));\n", i, p->body[i]->struct_name.len, p->body[i]->struct_name.txt);
+                                       fprintf(f, "\t\tmemset(body[%d], 0, sizeof(struct %.*s));", i, p->body[i]->struct_name.len, p->body[i]->struct_name.txt);
                         }
                 }
                         }
                 }
+               fputs("\n", f);
                 free(used);
         }
  
                 free(used);
         }
  
@@ -2245,6 +2320,7 @@ transformed, and will cause an error when the code is compiled.
                         if (p->code.txt) {
                                 fprintf(f, "#line %d \"%s\"\n", p->code_line, file);
                                 gen_code(p, f, g);
                         if (p->code.txt) {
                                 fprintf(f, "#line %d \"%s\"\n", p->code_line, file);
                                 gen_code(p, f, g);
+                               fprintf(f, "#line 7 \"gen_reduce\"\n");
                         }
  
                         if (p->head->struct_name.txt)
                         }
  
                         if (p->head->struct_name.txt)
@@ -2565,15 +2641,15 @@ recognised properly, and link with `libicuuc` as `libmdcode` requires that.
  Having analysed the grammar and generated all the tables, we only need
  the shift/reduce engine to bring it all together.
  
  Having analysed the grammar and generated all the tables, we only need
  the shift/reduce engine to bring it all together.
  
-### Goto table lookup
+### Go to table lookup
  
  
-The parser generator has nicely provided us with goto tables sorted by
+The parser generator has nicely provided us with go to tables sorted by
  symbol number.  We need a binary search function to find a symbol in the
  table.
  
  ###### parser functions
  
  symbol number.  We need a binary search function to find a symbol in the
  table.
  
  ###### parser functions
  
-       static int search(const struct state *l, int sym)
+       static int search(const struct state *l, int sym, int reduction)
         {
                 int lo = 0;
                 int hi = l->go_to_cnt;
         {
                 int lo = 0;
                 int hi = l->go_to_cnt;
@@ -2587,10 +2663,11 @@ table.
                         else
                                 hi = mid;
                 }
                         else
                                 hi = mid;
                 }
-               if (l->go_to[lo].sym == sym)
-                       return l->go_to[lo].state;
-               else
+               if (l->go_to[lo].sym != sym)
                         return -1;
                         return -1;
+               if (!reduction == !l->go_to[lo].is_reduction)
+                       return l->go_to[lo].state_or_reduction;
+               return -1;
         }
  
  ### Memory allocation
         }
  
  ### Memory allocation
@@ -2666,7 +2743,7 @@ is exactly the same process as shifting in a terminal so we use the same
  function for both.  In both cases we provide the symbol.  The state is
  deduced from the current top-of-stack state and the new symbol.
  
  function for both.  In both cases we provide the symbol.  The state is
  deduced from the current top-of-stack state and the new symbol.
  
-To simplify other code we arrange for `shift` to fail if there is no `goto`
+To simplify other code we arrange for `shift` to fail if there is no `go to`
  state for the symbol.  This is useful in basic parsing due to our design
  that we shift when we can, and reduce when we cannot.  So the `shift`
  function reports if it could.
  state for the symbol.  This is useful in basic parsing due to our design
  that we shift when we can, and reduce when we cannot.  So the `shift`
  function reports if it could.
@@ -2677,25 +2754,6 @@ stack is empty, it always chooses zero as the next state.
  So `shift` finds the next state.  If that succeeds it extends the
  allocations if needed and pushes all the information onto the stacks.
  
  So `shift` finds the next state.  If that succeeds it extends the
  allocations if needed and pushes all the information onto the stacks.
  
-An extra complication is added to `shift` by the `EOL` token.  This
-token must be generated when a `NEWLINE` is seen, but an `EOL` is
-expected.  When this happens, the `NEWLINE` is NOT consumed, so multiple
-EOL can appear before a NEWLINE.  To indicate that the token was shifted
-by not consumed, `shift` can return the special value `2`.  The token
-number for `EOL` cannot be statically declared, so when the parser
-starts we need to look through the array of non-terminals to find the
-EOL.
-
-###### parser state
-       int tk_eol;
-
-###### find eol
-       p.tk_eol = 0;
-       while (strcmp(non_term[p.tk_eol], "EOL") != 0)
-               p.tk_eol += 1;
-       p.tk_eol += TK_reserved + config->known_count;
-
-
  ###### parser functions
  
         static int shift(struct parser *p,
  ###### parser functions
  
         static int shift(struct parser *p,
@@ -2703,27 +2761,12 @@ EOL.
                          const struct state states[])
         {
                 struct frame next = {0};
                          const struct state states[])
         {
                 struct frame next = {0};
-               int ret;
                 int newstate = p->tos
                         ? search(&states[p->stack[p->tos-1].state],
                 int newstate = p->tos
                         ? search(&states[p->stack[p->tos-1].state],
-                                sym)
+                                sym, 0)
                         : 0;
                         : 0;
-               if (newstate >= 0)
-                       ret = 1;
-               else if (sym != TK_newline)
+               if (newstate < 0)
                         return 0;
                         return 0;
-               else {
-                       // have a NEWLINE, might need an EOL
-                       sym = p->tk_eol;
-                       newstate = p->tos
-                               ? search(&states[p->stack[p->tos-1].state],
-                                        sym)
-                               : 0;
-                       if (newstate < 0)
-                               return 0;
-                       ret = 2;
-                       asn = tok_copy(*(struct token*)asn);
-               }
  
                 if (p->tos >= p->stack_size) {
                         p->stack_size += 10;
  
                 if (p->tos >= p->stack_size) {
                         p->stack_size += 10;
@@ -2738,7 +2781,7 @@ EOL.
                 p->stack[p->tos] = next;
                 p->asn_stack[p->tos] = asn;
                 p->tos++;
                 p->stack[p->tos] = next;
                 p->asn_stack[p->tos] = asn;
                 p->tos++;
-               return ret;
+               return 1;
         }
  
  `pop` primarily moves the top of stack (`tos`) back down the required
         }
  
  `pop` primarily moves the top of stack (`tos`) back down the required
@@ -2786,18 +2829,37 @@ might also be ignored.  Ignoring tokens is combined with shifting.
         }
  
  Indents are ignored unless they can be shifted onto the stack
         }
  
  Indents are ignored unless they can be shifted onto the stack
-immediately.  The end of an indented section - the OUT token - is
-ignored precisely when the indent was ignored.  To keep track of this we
-need a small stack of flags, which is easily stored as bits in an
-`unsigned long`.  This will never overflow and the scanner only allows
-20 levels of indentation.
+immediately or nothing can be shifted (in which case we reduce, and try
+again).  The end of an indented section - the OUT token - is ignored
+precisely when the indent was ignored.  To keep track of this we need a
+small stack of flags, which is easily stored as bits in an `unsigned
+long`.  This will never overflow and the scanner only allows 20 levels
+of indentation.
  
  ###### parser state
         unsigned long ignored_indents;
  
  
  ###### parser state
         unsigned long ignored_indents;
  
-NEWLINE/EOL is ignored when in an indented section of text which was not
+NEWLINE is ignored when in an indented section of text which was not
  explicitly expected by the grammar.  So if the most recent indent is
  explicitly expected by the grammar.  So if the most recent indent is
-ignored, so is any EOL token.
+ignored, so is any NEWLINE token.
+
+If a NEWLINE is seen but it cannot be shifted, we try to shift an EOL
+token instead.  If that succeeds, we make a new copy of the NEWLINE
+token and continue.  This allows a NEWLINE to appear to be preceded by
+an indefinite number of EOL tokens.
+
+The token number for `EOL` cannot be statically declared, so when the
+parser starts we need to look through the array of non-terminals to find
+the EOL.
+
+###### parser state
+       int tk_eol;
+
+###### find eol
+       p.tk_eol = 0;
+       while (strcmp(non_term[p.tk_eol], "EOL") != 0)
+               p.tk_eol += 1;
+       p.tk_eol += TK_reserved + config->known_count;
  
  For other tokens, we shift the next token if that is possible, otherwise
  we try to reduce a production.
  
  For other tokens, we shift the next token if that is possible, otherwise
  we try to reduce a production.
@@ -2815,29 +2877,42 @@ we try to reduce a production.
                 continue;
         }
  
                 continue;
         }
  
-       switch (shift(&p, tk->num, tk, states)) {
-       case 1:
+       if (shift(&p, tk->num, tk, states)) {
                 if (tk->num == TK_out)
                         p.ignored_indents >>= 1;
                 if (tk->num == TK_in)
                         p.ignored_indents <<= 1;
  
                 if (tk->num == TK_out)
                         p.ignored_indents >>= 1;
                 if (tk->num == TK_in)
                         p.ignored_indents <<= 1;
  
+               parser_trace_action(trace, "Shift");
                 tk = NULL;
                 tk = NULL;
-               /* fallthrough */
-       case 2:
-               parser_trace_action(trace, tk ? "ShiftEOL" : "Shift");
                 ## did shift
                 continue;
                 ## did shift
                 continue;
+       } else if (tk->num == TK_newline &&
+                  shift(&p, p.tk_eol, tk, states)) {
+               tk = tok_copy(*tk);
+               parser_trace_action(trace, "ShiftEOL");
+               continue;
         }
  
         if (tk->num == TK_in) {
         }
  
         if (tk->num == TK_in) {
-               /* No indent expected here, so ignore IN */
-               free(tk);
-               tk = NULL;
-               p.ignored_indents <<= 1;
-               p.ignored_indents |= 1;
-               parser_trace_action(trace, "Ignore");
-               continue;
+               int should_reduce;
+               if (states[tos->state].reduce_prod == MANY_REDUCIBLE)
+                       /* Only reduce if TK_in in lookahead */
+                       should_reduce = (search(&states[tos->state], TK_in, 1) >= 0);
+               else
+                       /* Only reduce if we cannot shift anything */
+                       should_reduce = (states[tos->state].go_to_cnt == 0);
+               if (!should_reduce) {
+                       /* No indent expected here and reduce is not indicated,
+                        * so ignore IN
+                        */
+                       free(tk);
+                       tk = NULL;
+                       p.ignored_indents <<= 1;
+                       p.ignored_indents |= 1;
+                       parser_trace_action(trace, "Ignore");
+                       continue;
+               }
         }
  
  We have already discussed the bulk of the handling of a "reduce" action,
         }
  
  We have already discussed the bulk of the handling of a "reduce" action,
@@ -2853,24 +2928,26 @@ report that that input has been accepted.
  ###### parser vars
  
         void *ret = NULL;
  ###### parser vars
  
         void *ret = NULL;
+       int reduction;
  
  ###### try reduce
  
  
  ###### try reduce
  
-       if (states[tos->state].reduce_prod >= 0) {
+       reduction = states[tos->state].reduce_prod;
+       if (reduction == MANY_REDUCIBLE)
+               reduction = search(&states[tos->state], tk->num, 1);
+       if (reduction >= 0) {
                 void **body;
                 void *res;
                 void **body;
                 void *res;
-               const struct state *nextstate = &states[tos->state];
-               int prod = nextstate->reduce_prod;
-               int size = nextstate->reduce_size;
-               int res_size = nextstate->result_size;
+               int size = reductions[reduction].size;
+               int res_size = reductions[reduction].result_size;
  
                 body = p.asn_stack + (p.tos - size);
                 res = res_size ? calloc(1, res_size) : NULL;
  
                 body = p.asn_stack + (p.tos - size);
                 res = res_size ? calloc(1, res_size) : NULL;
-               res_size = do_reduce(prod, body, config, res);
-               if (res_size != nextstate->result_size)
+               res_size = do_reduce(reduction, body, config, res);
+               if (res_size != reductions[reduction].result_size)
                         abort();
                 pop(&p, size, do_free);
                         abort();
                 pop(&p, size, do_free);
-               if (!shift(&p, nextstate->reduce_sym, res, states)) {
+               if (!shift(&p, reductions[reduction].sym, res, states)) {
                         accepted = 1;
                         ret = res;
                         parser_trace_action(trace, "Accept");
                         accepted = 1;
                         ret = res;
                         parser_trace_action(trace, "Accept");
@@ -2929,6 +3006,7 @@ dropping tokens until either we manage to shift one, or reach end-of-file.
  
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
  
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
+                        const struct reduction reductions[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
@@ -2950,6 +3028,7 @@ dropping tokens until either we manage to shift one, or reach end-of-file.
  ###### exported functions
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
  ###### exported functions
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
+                        const struct reduction reductions[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
@@ -3162,3 +3241,73 @@ exits with an error.
         355//113
  
         error
         355//113
  
         error
+
+## A second example
+
+I had originally thought that the LR05 style grammar would suit me there
+should never be a sequence of token that means either of two different
+things depending on what comes follows it.  Without that ambiguity, the
+extra strength of LALR, or even SLR, would not be needed.  However I
+discovered this was too simplistic.  There are times when it is quite
+reasonable for the empty string, or maybe a NEWLINE, to have different
+meanings depending on the next token.  For this, LR05 is not sufficient,
+so I added full action-table support.
+
+This example tests the selection of REDUCE actions based on the
+look-ahead symbol, using a trivial grammar where the empty string can
+reasonably mean different things depending on what it precedes.
+
+
+####### File: parsergen.mk
+       demos :: do_lalr_demo
+       lalr_demo.c lalr_demo.h : parsergen parsergen.mdc
+               ./parsergen --LALR --tag demo -o lalr_demo parsergen.mdc
+       lalr_demo: lalr_demo.o libparser.o libscanner.o libmdcode.o
+               $(CC) -o lalr_demo lalr_demo.c libparser.o libscanner.o libmdcode.o -licuuc
+       do_lalr_demo: lalr_demo
+               NOTRACE=1 ./lalr_demo
+               @echo 'Should produce "start of line, empty sign, empty sigl"'
+
+####### demo: grammar
+
+       $TERM $ @ + -
+       line -> ${ printf("start of line"); }$
+               | line term
+       term -> sigl IDENTIFIER
+               | sign NUMBER
+       sigl ->   $
+               | @
+               | ${ printf(", empty sigl"); }$
+       sign ->   +
+               | -
+               | ${ printf(", empty sign"); }$
+
+####### demo: header
+
+       //
+
+####### demo: code
+
+       #include <stdlib.h>
+       #include <stdio.h>
+       #include "mdcode.h"
+       #include "scanner.h"
+       #include "parser.h"
+       #include "lalr_demo.h"
+       int main(int argc, char *argv[])
+       {
+               char test[] = "````\n$a -1 5 b\n";
+               struct section *table = code_extract(test, test+sizeof(test)-1, NULL);
+               struct token_config config = {
+                       .ignored = (1 << TK_line_comment)
+                                | (1 << TK_newline)
+                                | (1 << TK_in)
+                                | (1 << TK_out),
+                       .number_chars = "",
+                       .word_start = "",
+                       .word_cont = "",
+               };
+               parse_lalr_demo(table->code, &config, getenv("NOTRACE") ? NULL : stderr);
+               printf("\n");
+               exit(0);
+       }