oceani: := is no longer a token.

[ocean] / csrc / parsergen.mdc
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 905e8f8d125ca7432338059d84dae693ae3597fc..bc1a604d3a8d696bb666d4262c735740b3648d92 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -2713,9 +2713,32 @@ the most recent start of line.  This is how a newline forcible
  terminates any line-like structure - we try to reduce down to at most
  one symbol for each line where newlines are allowed.
  
+When, during error handling, we discard token read in, we want to keep
+discarding until we see one that is recognised.  If we had a full set
+of LR(1) grammar states, this will mean looking in the look-ahead set,
+but we don't keep a full look-ahead set.  We only record the subset
+that leads to SHIFT.  We can, however, deduce the look-ahead set but
+looking at the SHIFT subsets for all states that we can get to by
+reducing zero or more times.  So we need a little function which
+checks if a given token is in any of these look-ahead sets.
+
  ###### parser includes
         #include "parser.h"
+
  ###### parser_run
+
+       static int in_lookahead(struct token *tk, const struct state *states, int state)
+       {
+               while (state >= 0) {
+                       if (search(&states[state], tk->num) >= 0)
+                               return 1;
+                       if (states[state].reduce_prod < 0)
+                               return 0;
+                       state = search(&states[state], states[state].reduce_sym);
+               }
+               return 0;
+       }
+
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
@@ -2841,9 +2864,9 @@ one symbol for each line where newlines are allowed.
                         short indents = 0, start_of_line;
  
                         err_tk = tok_copy(*tk);
-                       while (shift(&p, TK_error, 0, 0,
-                                    err_tk, states) == 0
-                              && p.tos > 0)
+                       while (p.tos > 0 &&
+                              shift(&p, TK_error, 0, 0,
+                                    err_tk, states) == 0)
                                 // discard this state
                                 indents += pop(&p, 1, &start_of_line, do_free);
                         if (p.tos == 0) {
@@ -2852,7 +2875,7 @@ one symbol for each line where newlines are allowed.
                                 break;
                         }
                         tos = &p.stack[p.tos-1];
-                       while (search(&states[tos->state], tk->num) < 0 &&
+                       while (!in_lookahead(tk, states, tos->state) &&
                                tk->num != TK_eof) {
                                 free(tk);
                                 tk = tok_copy(token_next(tokens));
@@ -2865,11 +2888,7 @@ one symbol for each line where newlines are allowed.
                                         // FIXME update since_indent here
                                 }
                         }
-                       if (p.tos == 0 && tk->num == TK_eof)
-                               break;
-                       tos = &p.stack[p.tos-1];
                         tos->indents += indents;
-                       exit(1);
                 }
                 free(tk);
                 pop(&p, p.tos, NULL, do_free);