From 77165c59bca010dba0bdb8775552d6af944046a3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 10 Mar 2021 11:49:24 +1100 Subject: [PATCH] parsergen: add support for EOL token And EOL token is generated when a NEWLINE is found and an EOL can be shifted. This allows a product to declare that it must finish at the end of a line, without consuming the NEWLINE. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 62 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index b78a00d..0bef793 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -2684,6 +2684,25 @@ stack is empty, it always chooses zero as the next state. So `shift` finds the next state. If that succeeds it extends the allocations if needed and pushes all the information onto the stacks. +An extra complication is added to `shift` by the `EOL` token. This +token must be generated when a `NEWLINE` is seen, but an `EOL` is +expected. When this happens, the `NEWLINE` is NOT consumed, so multiple +EOL can appear before a NEWLINE. To indicate that the token was shifted +by not consumed, `shift` can return the special value `2`. The token +number for `EOL` cannot be statically declared, so when the parser +starts we need to look through the array of non-terminals to find the +EOL. + +###### parser state + int tk_eol; + +###### find eol + p.tk_eol = 0; + while (strcmp(non_term[p.tk_eol], "EOL") != 0) + p.tk_eol += 1; + p.tk_eol += TK_reserved + config->known_count; + + ###### parser functions static int shift(struct parser *p, @@ -2691,12 +2710,28 @@ allocations if needed and pushes all the information onto the stacks. const struct state states[]) { struct frame next = {0}; + int ret; int newstate = p->tos ? search(&states[p->stack[p->tos-1].state], sym) : 0; - if (newstate < 0) + if (newstate >= 0) + ret = 1; + else if (sym != TK_newline) return 0; + else { + // have a NEWLINE, might need an EOL + sym = p->tk_eol; + newstate = p->tos + ? search(&states[p->stack[p->tos-1].state], + sym) + : 0; + if (newstate < 0) + return 0; + ret = 2; + asn = tok_copy(*(struct token*)asn); + } + if (p->tos >= p->stack_size) { p->stack_size += 10; p->stack = realloc(p->stack, p->stack_size @@ -2710,7 +2745,7 @@ allocations if needed and pushes all the information onto the stacks. p->stack[p->tos] = next; p->asn_stack[p->tos] = asn; p->tos++; - return 1; + return ret; } `pop` primarily moves the top of stack (`tos`) back down the required @@ -2733,8 +2768,8 @@ in. ### The heart of the parser. Now we have the parser. For each token we might shift it, trigger a -reduction, or start error handling. 2D tokens (IN, OUT, NEWLINE) might -also be ignored. Ignoring tokens is combined with shifting. +reduction, or start error handling. 2D tokens (IN, OUT, NEWLINE, EOL) +might also be ignored. Ignoring tokens is combined with shifting. ###### parser vars @@ -2768,7 +2803,7 @@ need a small stack of flags, which is easily stored as bits in an unsigned long ignored_indents; int indent_depth; -NEWLINE is ignored when in an indented section of text which was not +NEWLINE/EOL is ignored when in an indented section of text which was not explicitly expected by the grammar. So if the most recent indent is ignored, so is any EOL token. @@ -2788,15 +2823,8 @@ we try to reduce a production. continue; } - if (tk->num == TK_newline) { - if (1) { - free(tk); - tk = NULL; - parser_trace_action(trace, "Discard"); - continue; - } - } - if (shift(&p, tk->num, tk, states)) { + switch (shift(&p, tk->num, tk, states)) { + case 1: if (tk->num == TK_out) p.indent_depth -= 1; if (tk->num == TK_in) { @@ -2804,7 +2832,9 @@ we try to reduce a production. p.ignored_indents &= ~(1 << p.indent_depth); } tk = NULL; - parser_trace_action(trace, "Shift"); + /* fallthrough */ + case 2: + parser_trace_action(trace, tk ? "ShiftEOL" : "Shift"); ## did shift continue; } @@ -2915,6 +2945,8 @@ dropping tokens until either we manage to shift one, or reach end-of-file. { ## parser vars + ## find eol + ## heart of parser free(tk); -- 2.43.0