From 10db06aed6af588a0ccd05e80a0f50286949d56c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 14 Nov 2021 09:47:21 +1100 Subject: [PATCH] parsergen: move EOL handling out of shift() shift() is called in several places, and in only one of those is there a need to make NEWLINE to EOL. Move the code out of shift() and instead make a second call to shift() if shifting NEWLINE failed. I think this make the code clearer. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 72 +++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 43 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index 709a9ac..d946744 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -2688,25 +2688,6 @@ stack is empty, it always chooses zero as the next state. So `shift` finds the next state. If that succeeds it extends the allocations if needed and pushes all the information onto the stacks. -An extra complication is added to `shift` by the `EOL` token. This -token must be generated when a `NEWLINE` is seen, but an `EOL` is -expected. When this happens, the `NEWLINE` is NOT consumed, so multiple -EOL can appear before a NEWLINE. To indicate that the token was shifted -by not consumed, `shift` can return the special value `2`. The token -number for `EOL` cannot be statically declared, so when the parser -starts we need to look through the array of non-terminals to find the -EOL. - -###### parser state - int tk_eol; - -###### find eol - p.tk_eol = 0; - while (strcmp(non_term[p.tk_eol], "EOL") != 0) - p.tk_eol += 1; - p.tk_eol += TK_reserved + config->known_count; - - ###### parser functions static int shift(struct parser *p, @@ -2714,27 +2695,12 @@ EOL. const struct state states[]) { struct frame next = {0}; - int ret; int newstate = p->tos ? search(&states[p->stack[p->tos-1].state], sym) : 0; - if (newstate >= 0) - ret = 1; - else if (sym != TK_newline) + if (newstate < 0) return 0; - else { - // have a NEWLINE, might need an EOL - sym = p->tk_eol; - newstate = p->tos - ? search(&states[p->stack[p->tos-1].state], - sym) - : 0; - if (newstate < 0) - return 0; - ret = 2; - asn = tok_copy(*(struct token*)asn); - } if (p->tos >= p->stack_size) { p->stack_size += 10; @@ -2749,7 +2715,7 @@ EOL. p->stack[p->tos] = next; p->asn_stack[p->tos] = asn; p->tos++; - return ret; + return 1; } `pop` primarily moves the top of stack (`tos`) back down the required @@ -2807,9 +2773,27 @@ of indentation. ###### parser state unsigned long ignored_indents; -NEWLINE/EOL is ignored when in an indented section of text which was not +NEWLINE is ignored when in an indented section of text which was not explicitly expected by the grammar. So if the most recent indent is -ignored, so is any EOL token. +ignored, so is any NEWLINE token. + +If a NEWLINE is seen but it cannot be shifted, we try to shift an EOL +token instead. If that succeeds, we make a new copy of the NEWLINE +token and continue. This allows a NEWLINE to appear to be preceded by +an indefinite number of EOL tokens. + +The token number for `EOL` cannot be statically declared, so when the +parser starts we need to look through the array of non-terminals to find +the EOL. + +###### parser state + int tk_eol; + +###### find eol + p.tk_eol = 0; + while (strcmp(non_term[p.tk_eol], "EOL") != 0) + p.tk_eol += 1; + p.tk_eol += TK_reserved + config->known_count; For other tokens, we shift the next token if that is possible, otherwise we try to reduce a production. @@ -2827,19 +2811,21 @@ we try to reduce a production. continue; } - switch (shift(&p, tk->num, tk, states)) { - case 1: + if (shift(&p, tk->num, tk, states)) { if (tk->num == TK_out) p.ignored_indents >>= 1; if (tk->num == TK_in) p.ignored_indents <<= 1; + parser_trace_action(trace, "Shift"); tk = NULL; - /* fallthrough */ - case 2: - parser_trace_action(trace, tk ? "ShiftEOL" : "Shift"); ## did shift continue; + } else if (tk->num == TK_newline && + shift(&p, p.tk_eol, tk, states)) { + tk = tok_copy(*tk); + parser_trace_action(trace, "ShiftEOL"); + continue; } if (tk->num == TK_in && states[p.stack[p.tos-1].state].go_to_cnt > 0) { -- 2.43.0