From 9729d02ec39220036d9e137e45aee7615f484ceb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 4 May 2014 20:21:00 +1000 Subject: [PATCH] parsergen: compute starts_line for each state. Using the per-symbol "can_eol" we can deduce for each state whether it is expected to (sometime) start a line-oriented syntax element. The flag is "starts_line" and is made available to the parser. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index fc14b91..2cd4f5c 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -1128,6 +1128,7 @@ should happen, so we don't need to search a second time. struct symset items; struct symset go_to; char completed; + char starts_line; }; ###### grammar fields @@ -1165,7 +1166,7 @@ recalculated and their LA sets updated. them to a data structure, of freeing them. static int add_itemset(struct grammar *g, struct symset ss, - enum grammar_type type) + enum grammar_type type, int starts_line) { struct itemset **where, *is; int i; @@ -1177,6 +1178,7 @@ them to a data structure, of freeing them. is->items = ss; is->next = *where; is->go_to = INIT_DATASET; + is->starts_line = starts_line; *where = is; return is->state; } @@ -1251,6 +1253,7 @@ though. sn = save_set(g, LA); LA = set_find(g, sn); } + /* Add productions for this symbol */ for (p2 = s->first_production; p2 < g->production_count && @@ -1284,15 +1287,20 @@ with a pre-existing itemset). ###### derive itemsets // Now we have a completed itemset, so we need to - // create all the 'next' itemset and create them + // compute all the 'next' itemsets and create them // if they don't exist. for (i = 0; i < done.cnt; i++) { int j; unsigned short state; + int starts_line = 0; + struct symbol *sym = g->symtab[done.syms[i]]; struct symset newitemset = INIT_SYMSET; if (type >= LALR) newitemset = INIT_DATASET; + if (sym->can_eol || + (sym->nullable && is->starts_line)) + starts_line = 1; for (j = 0; j < is->items.cnt; j++) { int itm = is->items.syms[j]; int p = item_prod(itm); @@ -1303,7 +1311,7 @@ with a pre-existing itemset). if (bp == pr->body_size) continue; - if (pr->body[bp]->num != done.syms[i]) + if (pr->body[bp] != sym) continue; if (type >= LALR) la = is->items.data[j]; @@ -1325,7 +1333,7 @@ with a pre-existing itemset). } } } - state = add_itemset(g, newitemset, type); + state = add_itemset(g, newitemset, type, starts_line); if (symset_find(&is->go_to, done.syms[i]) < 0) symset_add(&is->go_to, done.syms[i], state); } @@ -1349,7 +1357,7 @@ with `TK_eof` as the LA set. } // production 0, offset 0 (with no data) symset_add(&first, item_num(0, 0), la); - add_itemset(g, first, type); + add_itemset(g, first, type, g->productions[0]->body[0]->can_eol); for (again = 0, is = g->items; is; is = is->next ?: again ? (again = 0, g->items) : NULL) { @@ -1568,7 +1576,7 @@ Now we can report all the item sets complete with items, LA sets, and GO TO. for (s = 0; s < g->states; s++) { int j; struct itemset *is = g->statetab[s]; - printf(" Itemset %d:\n", s); + printf(" Itemset %d:%s\n", s, is->starts_line?" (startsline)":""); for (j = 0; j < is->items.cnt; j++) { report_item(g, is->items.syms[j]); if (is->items.data != NO_DATA) @@ -1830,6 +1838,7 @@ The go to table is stored in a simple array of `sym` and corresponding short reduce_size; short reduce_sym; short shift_sym; + short starts_line; }; @@ -1886,13 +1895,15 @@ The go to table is stored in a simple array of `sym` and corresponding } if (prod >= 0) - fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0 },\n", + fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0, %d },\n", i, is->go_to.cnt, i, prod, g->productions[prod]->body_size, - g->productions[prod]->head->num); + g->productions[prod]->head->num, + is->starts_line); else - fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d },\n", - i, is->go_to.cnt, i, shift_sym); + fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n", + i, is->go_to.cnt, i, shift_sym, + is->starts_line); } fprintf(f, "};\n\n"); } -- 2.43.0