]> ocean-lang.org Git - ocean/commitdiff
parsergen: improve symbol-discard in error handling.
authorNeilBrown <neil@brown.name>
Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)
committerNeilBrown <neil@brown.name>
Mon, 19 Feb 2018 05:38:12 +0000 (16:38 +1100)
As we don't keep the full look-ahead set, we need to pay a
bit more attention when discarding input symbols, looking
for one we recognize.  We need to consider anything
that can be shifted in any state we can reach by simple
shifting.

Signed-off-by: NeilBrown <neil@brown.name>
csrc/parsergen.mdc

index c25a87e1f621fd3ffa95d4a809c2276a235b2476..aacc59810be2e13aad50d1494171609797dcd71d 100644 (file)
@@ -2713,9 +2713,32 @@ the most recent start of line.  This is how a newline forcible
 terminates any line-like structure - we try to reduce down to at most
 one symbol for each line where newlines are allowed.
 
+When, during error handling, we discard token read in, we want to keep
+discarding until we see one that is recognised.  If we had a full set
+of LR(1) grammar states, this will mean looking in the look-ahead set,
+but we don't keep a full look-ahead set.  We only record the subset
+that leads to SHIFT.  We can, however, deduce the look-ahead set but
+looking at the SHIFT subsets for all states that we can get to by
+reducing zero or more times.  So we need a little function which
+checks if a given token is in any of these look-ahead sets.
+
 ###### parser includes
        #include "parser.h"
+
 ###### parser_run
+
+       static int in_lookahead(struct token *tk, const struct state *states, int state)
+       {
+               while (state >= 0) {
+                       if (search(&states[state], tk->num) >= 0)
+                               return 1;
+                       if (states[state].reduce_prod < 0)
+                               return 0;
+                       state = search(&states[state], states[state].reduce_sym);
+               }
+               return 0;
+       }
+
        void *parser_run(struct token_state *tokens,
                         const struct state states[],
                         int (*do_reduce)(int, void**, struct token_config*, void*),
@@ -2852,7 +2875,7 @@ one symbol for each line where newlines are allowed.
                                break;
                        }
                        tos = &p.stack[p.tos-1];
-                       while (search(&states[tos->state], tk->num) < 0 &&
+                       while (!in_lookahead(tk, states, tos->state) &&
                               tk->num != TK_eof) {
                                free(tk);
                                tk = tok_copy(token_next(tokens));