parsergen: add support for EOL token

author NeilBrown <neil@brown.name>

Wed, 10 Mar 2021 00:49:24 +0000 (11:49 +1100)

committer NeilBrown <neil@brown.name>

Wed, 10 Mar 2021 01:01:11 +0000 (12:01 +1100)
author NeilBrown <neil@brown.name>
Wed, 10 Mar 2021 00:49:24 +0000 (11:49 +1100)
committer NeilBrown <neil@brown.name>
Wed, 10 Mar 2021 01:01:11 +0000 (12:01 +1100)
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index b78a00d3511e7825bcbe9ef8cbc7340b8ed66fae..0bef7934d3ec112b77eff4ecd4a007240ff92f34 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -2684,6 +2684,25 @@ stack is empty, it always chooses zero as the next state.
  So `shift` finds the next state.  If that succeeds it extends the
  allocations if needed and pushes all the information onto the stacks.
  
+An extra complication is added to `shift` by the `EOL` token.  This
+token must be generated when a `NEWLINE` is seen, but an `EOL` is
+expected.  When this happens, the `NEWLINE` is NOT consumed, so multiple
+EOL can appear before a NEWLINE.  To indicate that the token was shifted
+by not consumed, `shift` can return the special value `2`.  The token
+number for `EOL` cannot be statically declared, so when the parser
+starts we need to look through the array of non-terminals to find the
+EOL.
+
+###### parser state
+       int tk_eol;
+
+###### find eol
+       p.tk_eol = 0;
+       while (strcmp(non_term[p.tk_eol], "EOL") != 0)
+               p.tk_eol += 1;
+       p.tk_eol += TK_reserved + config->known_count;
+
+
  ###### parser functions
  
         static int shift(struct parser *p,
@@ -2691,12 +2710,28 @@ allocations if needed and pushes all the information onto the stacks.
                          const struct state states[])
         {
                 struct frame next = {0};
+               int ret;
                 int newstate = p->tos
                         ? search(&states[p->stack[p->tos-1].state],
                                  sym)
                         : 0;
-               if (newstate < 0)
+               if (newstate >= 0)
+                       ret = 1;
+               else if (sym != TK_newline)
                         return 0;
+               else {
+                       // have a NEWLINE, might need an EOL
+                       sym = p->tk_eol;
+                       newstate = p->tos
+                               ? search(&states[p->stack[p->tos-1].state],
+                                        sym)
+                               : 0;
+                       if (newstate < 0)
+                               return 0;
+                       ret = 2;
+                       asn = tok_copy(*(struct token*)asn);
+               }
+
                 if (p->tos >= p->stack_size) {
                         p->stack_size += 10;
                         p->stack = realloc(p->stack, p->stack_size
@@ -2710,7 +2745,7 @@ allocations if needed and pushes all the information onto the stacks.
                 p->stack[p->tos] = next;
                 p->asn_stack[p->tos] = asn;
                 p->tos++;
-               return 1;
+               return ret;
         }
  
  `pop` primarily moves the top of stack (`tos`) back down the required
@@ -2733,8 +2768,8 @@ in.
  ### The heart of the parser.
  
  Now we have the parser.  For each token we might shift it, trigger a
-reduction, or start error handling.  2D tokens (IN, OUT, NEWLINE) might
-also be ignored.  Ignoring tokens is combined with shifting.
+reduction, or start error handling.  2D tokens (IN, OUT, NEWLINE, EOL)
+might also be ignored.  Ignoring tokens is combined with shifting.
  
  ###### parser vars
  
@@ -2768,7 +2803,7 @@ need a small stack of flags, which is easily stored as bits in an
         unsigned long ignored_indents;
         int indent_depth;
  
-NEWLINE is ignored when in an indented section of text which was not
+NEWLINE/EOL is ignored when in an indented section of text which was not
  explicitly expected by the grammar.  So if the most recent indent is
  ignored, so is any EOL token.
  
@@ -2788,15 +2823,8 @@ we try to reduce a production.
                 continue;
         }
  
-       if (tk->num == TK_newline) {
-               if (1) {
-                       free(tk);
-                       tk = NULL;
-                       parser_trace_action(trace, "Discard");
-                       continue;
-               }
-       }
-       if (shift(&p, tk->num, tk, states)) {
+       switch (shift(&p, tk->num, tk, states)) {
+       case 1:
                 if (tk->num == TK_out)
                         p.indent_depth -= 1;
                 if (tk->num == TK_in) {
@@ -2804,7 +2832,9 @@ we try to reduce a production.
                         p.ignored_indents &= ~(1 << p.indent_depth);
                 }
                 tk = NULL;
-               parser_trace_action(trace, "Shift");
+               /* fallthrough */
+       case 2:
+               parser_trace_action(trace, tk ? "ShiftEOL" : "Shift");
                 ## did shift
                 continue;
         }
@@ -2915,6 +2945,8 @@ dropping tokens until either we manage to shift one, or reach end-of-file.
         {
                 ## parser vars
  
+               ## find eol
+
                 ## heart of parser
  
                 free(tk);
author	NeilBrown <neil@brown.name>
	Wed, 10 Mar 2021 00:49:24 +0000 (11:49 +1100)
committer	NeilBrown <neil@brown.name>
	Wed, 10 Mar 2021 01:01:11 +0000 (12:01 +1100)