X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=0bef7934d3ec112b77eff4ecd4a007240ff92f34;hp=b78a00d3511e7825bcbe9ef8cbc7340b8ed66fae;hb=77165c59bca010dba0bdb8775552d6af944046a3;hpb=e9af634f13a0957cd510052483b6f9e35a986596 diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index b78a00d..0bef793 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -2684,6 +2684,25 @@ stack is empty, it always chooses zero as the next state. So `shift` finds the next state. If that succeeds it extends the allocations if needed and pushes all the information onto the stacks. +An extra complication is added to `shift` by the `EOL` token. This +token must be generated when a `NEWLINE` is seen, but an `EOL` is +expected. When this happens, the `NEWLINE` is NOT consumed, so multiple +EOL can appear before a NEWLINE. To indicate that the token was shifted +by not consumed, `shift` can return the special value `2`. The token +number for `EOL` cannot be statically declared, so when the parser +starts we need to look through the array of non-terminals to find the +EOL. + +###### parser state + int tk_eol; + +###### find eol + p.tk_eol = 0; + while (strcmp(non_term[p.tk_eol], "EOL") != 0) + p.tk_eol += 1; + p.tk_eol += TK_reserved + config->known_count; + + ###### parser functions static int shift(struct parser *p, @@ -2691,12 +2710,28 @@ allocations if needed and pushes all the information onto the stacks. const struct state states[]) { struct frame next = {0}; + int ret; int newstate = p->tos ? search(&states[p->stack[p->tos-1].state], sym) : 0; - if (newstate < 0) + if (newstate >= 0) + ret = 1; + else if (sym != TK_newline) return 0; + else { + // have a NEWLINE, might need an EOL + sym = p->tk_eol; + newstate = p->tos + ? search(&states[p->stack[p->tos-1].state], + sym) + : 0; + if (newstate < 0) + return 0; + ret = 2; + asn = tok_copy(*(struct token*)asn); + } + if (p->tos >= p->stack_size) { p->stack_size += 10; p->stack = realloc(p->stack, p->stack_size @@ -2710,7 +2745,7 @@ allocations if needed and pushes all the information onto the stacks. p->stack[p->tos] = next; p->asn_stack[p->tos] = asn; p->tos++; - return 1; + return ret; } `pop` primarily moves the top of stack (`tos`) back down the required @@ -2733,8 +2768,8 @@ in. ### The heart of the parser. Now we have the parser. For each token we might shift it, trigger a -reduction, or start error handling. 2D tokens (IN, OUT, NEWLINE) might -also be ignored. Ignoring tokens is combined with shifting. +reduction, or start error handling. 2D tokens (IN, OUT, NEWLINE, EOL) +might also be ignored. Ignoring tokens is combined with shifting. ###### parser vars @@ -2768,7 +2803,7 @@ need a small stack of flags, which is easily stored as bits in an unsigned long ignored_indents; int indent_depth; -NEWLINE is ignored when in an indented section of text which was not +NEWLINE/EOL is ignored when in an indented section of text which was not explicitly expected by the grammar. So if the most recent indent is ignored, so is any EOL token. @@ -2788,15 +2823,8 @@ we try to reduce a production. continue; } - if (tk->num == TK_newline) { - if (1) { - free(tk); - tk = NULL; - parser_trace_action(trace, "Discard"); - continue; - } - } - if (shift(&p, tk->num, tk, states)) { + switch (shift(&p, tk->num, tk, states)) { + case 1: if (tk->num == TK_out) p.indent_depth -= 1; if (tk->num == TK_in) { @@ -2804,7 +2832,9 @@ we try to reduce a production. p.ignored_indents &= ~(1 << p.indent_depth); } tk = NULL; - parser_trace_action(trace, "Shift"); + /* fallthrough */ + case 2: + parser_trace_action(trace, tk ? "ShiftEOL" : "Shift"); ## did shift continue; } @@ -2915,6 +2945,8 @@ dropping tokens until either we manage to shift one, or reach end-of-file. { ## parser vars + ## find eol + ## heart of parser free(tk);