From 1bfa4fc8cfc5fafa5b2b3ae6bdd9b77a4242e74a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 16 Jun 2019 08:29:16 +1000 Subject: [PATCH] parsergen: introuce $$NEWINE pseudo-precedence. Sometimes we need a produce to be terminated by a newline, but we don't want to consume the newline with a "shift". Case in point is: Block -> : StatementList Which can be used with Statement -> if Expression Block StatementList -> Statement I want this to parse: if something: if otherthing: action which might seem a little odd, but is syntactically sensible. The NEWLINE at the end is requred, and must close both nested Statements. The NEWLINE will already cause a REDUCE, but if we don't have Block -> : Statementlist NEWLINE then something else could force a reduce, and we don't want that. So introduce a marking "$$NEWLINE" which is similar to imposing a precedence on a production. Now Block -> : StatementList $$NEWLINE means that a NEWLINE is required to end a Block, but it isn't shifted. If anything else if found here, it is an error. We also allow $eof and OUT to reduce this production. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index d30275e..82d09b3 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -100,6 +100,7 @@ symbol. struct production { unsigned short precedence; enum assoc assoc; + char line_like; ## production fields }; struct grammar { @@ -496,12 +497,15 @@ Now we have all the bits we need to parse a full production. goto abort; } vs = sym_find(g, tk.txt); - if (vs->type != Virtual) { + if (vs->num == TK_newline) + p.line_like = 1; + else if (vs->type != Virtual) { err = "symbol after $$ must be virtual"; goto abort; + } else { + p.precedence = vs->precedence; + p.assoc = vs->assoc; } - p.precedence = vs->precedence; - p.assoc = vs->assoc; tk = token_next(state); } if (tk.num == TK_open) { @@ -1337,8 +1341,12 @@ into the go to set, so the item is ineffective. int to_end; add_first(pr, bs+1, &LA, g, &to_end); if (to_end) { - struct symset ss = set_find(g, is->items.data[i]); - symset_union(&LA, &ss); + if (pr->line_like) + symset_add(&LA, TK_newline, 0); + else { + struct symset ss = set_find(g, is->items.data[i]); + symset_union(&LA, &ss); + } } sn = save_set(g, LA); LA = set_find(g, sn); @@ -1635,6 +1643,8 @@ it up a bit. First the items, with production number and associativity. printf(" [%d%s]", s->precedence, assoc_names[s->assoc]); } + if (pr->line_like) + printf(" $$NEWLINE"); printf("\n"); } @@ -1833,7 +1843,7 @@ counted, and are reported as non-critical. This will not affect a int k; for (k = 0; k < la.cnt; k++) { int pos = symset_find(&shifts, la.syms[k]); - if (pos >= 0) { + if (pos >= 0 && la.syms[k] != TK_newline) { if (symset_find(&la, TK_newline) < 0) { printf(" State %d has SHIFT/REDUCE conflict on ", i); cnt++; @@ -1964,7 +1974,8 @@ The go to table is stored in a simple array of `sym` and corresponding short reduce_prod; short reduce_size; short reduce_sym; - short starts_line; + char starts_line; + char newline_only; short min_prefix; }; @@ -2013,13 +2024,15 @@ The go to table is stored in a simple array of `sym` and corresponding } if (prod >= 0) - fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d },\n", + fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d, %d },\n", i, is->go_to.cnt, i, prod, g->productions[prod]->body_size, g->productions[prod]->head->num, - is->starts_line, is->min_prefix); + is->starts_line, + g->productions[prod]->line_like, + is->min_prefix); else - fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n", + fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, 0, %d },\n", i, is->go_to.cnt, i, is->starts_line, is->min_prefix); } @@ -2851,7 +2864,13 @@ checks if a given token is in any of these look-ahead sets. continue; } force_reduce: - if (states[tos->state].reduce_prod >= 0) { + if (states[tos->state].reduce_prod >= 0 && + states[tos->state].newline_only && + tk->num != TK_newline && tk->num != TK_eof && tk->num != TK_out) { + /* Anything other than newline in an error as this + * production must end at EOL + */ + } else if (states[tos->state].reduce_prod >= 0) { void **body; void *res; const struct state *nextstate = &states[tos->state]; -- 2.43.0