parsergen: move EOL handling out of shift()

author NeilBrown <neil@brown.name>

Sat, 13 Nov 2021 22:47:21 +0000 (09:47 +1100)

committer NeilBrown <neil@brown.name>

Sat, 13 Nov 2021 22:50:59 +0000 (09:50 +1100)
author NeilBrown <neil@brown.name>
Sat, 13 Nov 2021 22:47:21 +0000 (09:47 +1100)
committer NeilBrown <neil@brown.name>
Sat, 13 Nov 2021 22:50:59 +0000 (09:50 +1100)
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 709a9acb962e8be130030d7d7d183c2fb3222ed2..d9467442eeab1dfec4720f7a2b4f5c6666d43867 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -2688,25 +2688,6 @@ stack is empty, it always chooses zero as the next state.
  So `shift` finds the next state.  If that succeeds it extends the
  allocations if needed and pushes all the information onto the stacks.
  
  So `shift` finds the next state.  If that succeeds it extends the
  allocations if needed and pushes all the information onto the stacks.
  
-An extra complication is added to `shift` by the `EOL` token.  This
-token must be generated when a `NEWLINE` is seen, but an `EOL` is
-expected.  When this happens, the `NEWLINE` is NOT consumed, so multiple
-EOL can appear before a NEWLINE.  To indicate that the token was shifted
-by not consumed, `shift` can return the special value `2`.  The token
-number for `EOL` cannot be statically declared, so when the parser
-starts we need to look through the array of non-terminals to find the
-EOL.
-
-###### parser state
-       int tk_eol;
-
-###### find eol
-       p.tk_eol = 0;
-       while (strcmp(non_term[p.tk_eol], "EOL") != 0)
-               p.tk_eol += 1;
-       p.tk_eol += TK_reserved + config->known_count;
-
-
  ###### parser functions
  
         static int shift(struct parser *p,
  ###### parser functions
  
         static int shift(struct parser *p,
@@ -2714,27 +2695,12 @@ EOL.
                          const struct state states[])
         {
                 struct frame next = {0};
                          const struct state states[])
         {
                 struct frame next = {0};
-               int ret;
                 int newstate = p->tos
                         ? search(&states[p->stack[p->tos-1].state],
                                  sym)
                         : 0;
                 int newstate = p->tos
                         ? search(&states[p->stack[p->tos-1].state],
                                  sym)
                         : 0;
-               if (newstate >= 0)
-                       ret = 1;
-               else if (sym != TK_newline)
+               if (newstate < 0)
                         return 0;
                         return 0;
-               else {
-                       // have a NEWLINE, might need an EOL
-                       sym = p->tk_eol;
-                       newstate = p->tos
-                               ? search(&states[p->stack[p->tos-1].state],
-                                        sym)
-                               : 0;
-                       if (newstate < 0)
-                               return 0;
-                       ret = 2;
-                       asn = tok_copy(*(struct token*)asn);
-               }
  
                 if (p->tos >= p->stack_size) {
                         p->stack_size += 10;
  
                 if (p->tos >= p->stack_size) {
                         p->stack_size += 10;
@@ -2749,7 +2715,7 @@ EOL.
                 p->stack[p->tos] = next;
                 p->asn_stack[p->tos] = asn;
                 p->tos++;
                 p->stack[p->tos] = next;
                 p->asn_stack[p->tos] = asn;
                 p->tos++;
-               return ret;
+               return 1;
         }
  
  `pop` primarily moves the top of stack (`tos`) back down the required
         }
  
  `pop` primarily moves the top of stack (`tos`) back down the required
@@ -2807,9 +2773,27 @@ of indentation.
  ###### parser state
         unsigned long ignored_indents;
  
  ###### parser state
         unsigned long ignored_indents;
  
-NEWLINE/EOL is ignored when in an indented section of text which was not
+NEWLINE is ignored when in an indented section of text which was not
  explicitly expected by the grammar.  So if the most recent indent is
  explicitly expected by the grammar.  So if the most recent indent is
-ignored, so is any EOL token.
+ignored, so is any NEWLINE token.
+
+If a NEWLINE is seen but it cannot be shifted, we try to shift an EOL
+token instead.  If that succeeds, we make a new copy of the NEWLINE
+token and continue.  This allows a NEWLINE to appear to be preceded by
+an indefinite number of EOL tokens.
+
+The token number for `EOL` cannot be statically declared, so when the
+parser starts we need to look through the array of non-terminals to find
+the EOL.
+
+###### parser state
+       int tk_eol;
+
+###### find eol
+       p.tk_eol = 0;
+       while (strcmp(non_term[p.tk_eol], "EOL") != 0)
+               p.tk_eol += 1;
+       p.tk_eol += TK_reserved + config->known_count;
  
  For other tokens, we shift the next token if that is possible, otherwise
  we try to reduce a production.
  
  For other tokens, we shift the next token if that is possible, otherwise
  we try to reduce a production.
@@ -2827,19 +2811,21 @@ we try to reduce a production.
                 continue;
         }
  
                 continue;
         }
  
-       switch (shift(&p, tk->num, tk, states)) {
-       case 1:
+       if (shift(&p, tk->num, tk, states)) {
                 if (tk->num == TK_out)
                         p.ignored_indents >>= 1;
                 if (tk->num == TK_in)
                         p.ignored_indents <<= 1;
  
                 if (tk->num == TK_out)
                         p.ignored_indents >>= 1;
                 if (tk->num == TK_in)
                         p.ignored_indents <<= 1;
  
+               parser_trace_action(trace, "Shift");
                 tk = NULL;
                 tk = NULL;
-               /* fallthrough */
-       case 2:
-               parser_trace_action(trace, tk ? "ShiftEOL" : "Shift");
                 ## did shift
                 continue;
                 ## did shift
                 continue;
+       } else if (tk->num == TK_newline &&
+                  shift(&p, p.tk_eol, tk, states)) {
+               tk = tok_copy(*tk);
+               parser_trace_action(trace, "ShiftEOL");
+               continue;
         }
  
         if (tk->num == TK_in && states[p.stack[p.tos-1].state].go_to_cnt > 0) {
         }
  
         if (tk->num == TK_in && states[p.stack[p.tos-1].state].go_to_cnt > 0) {
author	NeilBrown <neil@brown.name>
	Sat, 13 Nov 2021 22:47:21 +0000 (09:47 +1100)
committer	NeilBrown <neil@brown.name>
	Sat, 13 Nov 2021 22:50:59 +0000 (09:50 +1100)