parsergen: improve symbol-discard in error handling.

author NeilBrown <neil@brown.name>

Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)

committer NeilBrown <neil@brown.name>

Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)
author NeilBrown <neil@brown.name>
Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)
committer NeilBrown <neil@brown.name>
Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index c25a87e1f621fd3ffa95d4a809c2276a235b2476..aacc59810be2e13aad50d1494171609797dcd71d 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -2713,9 +2713,32 @@ the most recent start of line.  This is how a newline forcible
  terminates any line-like structure - we try to reduce down to at most
  one symbol for each line where newlines are allowed.
  
+When, during error handling, we discard token read in, we want to keep
+discarding until we see one that is recognised.  If we had a full set
+of LR(1) grammar states, this will mean looking in the look-ahead set,
+but we don't keep a full look-ahead set.  We only record the subset
+that leads to SHIFT.  We can, however, deduce the look-ahead set but
+looking at the SHIFT subsets for all states that we can get to by
+reducing zero or more times.  So we need a little function which
+checks if a given token is in any of these look-ahead sets.
+
  ###### parser includes
         #include "parser.h"
+
  ###### parser_run
+
+       static int in_lookahead(struct token *tk, const struct state *states, int state)
+       {
+               while (state >= 0) {
+                       if (search(&states[state], tk->num) >= 0)
+                               return 1;
+                       if (states[state].reduce_prod < 0)
+                               return 0;
+                       state = search(&states[state], states[state].reduce_sym);
+               }
+               return 0;
+       }
+
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
@@ -2852,7 +2875,7 @@ one symbol for each line where newlines are allowed.
                                 break;
                         }
                         tos = &p.stack[p.tos-1];
-                       while (search(&states[tos->state], tk->num) < 0 &&
+                       while (!in_lookahead(tk, states, tos->state) &&
                                tk->num != TK_eof) {
                                 free(tk);
                                 tk = tok_copy(token_next(tokens));
author	NeilBrown <neil@brown.name>
	Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)
committer	NeilBrown <neil@brown.name>
	Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)