From 9729d02ec39220036d9e137e45aee7615f484ceb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 4 May 2014 20:21:00 +1000
Subject: [PATCH] parsergen: compute starts_line for each state.

Using the per-symbol "can_eol" we can deduce for each state whether
it is expected to (sometime) start a line-oriented syntax element.

The flag is "starts_line" and is made available to the parser.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 csrc/parsergen.mdc | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc
index fc14b91..2cd4f5c 100644
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -1128,6 +1128,7 @@ should happen, so we don't need to search a second time.
 		struct symset items;
 		struct symset go_to;
 		char completed;
+		char starts_line;
 	};
 
 ###### grammar fields
@@ -1165,7 +1166,7 @@ recalculated and their LA sets updated.
 them to a data structure, of freeing them.
 
 	static int add_itemset(struct grammar *g, struct symset ss,
-	                       enum grammar_type type)
+	                       enum grammar_type type, int starts_line)
 	{
 		struct itemset **where, *is;
 		int i;
@@ -1177,6 +1178,7 @@ them to a data structure, of freeing them.
 			is->items = ss;
 			is->next = *where;
 			is->go_to = INIT_DATASET;
+			is->starts_line = starts_line;
 			*where = is;
 			return is->state;
 		}
@@ -1251,6 +1253,7 @@ though.
 			sn = save_set(g, LA);
 			LA = set_find(g, sn);
 		}
+
 		/* Add productions for this symbol */
 		for (p2 = s->first_production;
 		     p2 < g->production_count &&
@@ -1284,15 +1287,20 @@ with a pre-existing itemset).
 
 ###### derive itemsets
 	// Now we have a completed itemset, so we need to
-	// create all the 'next' itemset and create them
+	// compute all the 'next' itemsets and create them
 	// if they don't exist.
 	for (i = 0; i < done.cnt; i++) {
 		int j;
 		unsigned short state;
+		int starts_line = 0;
+		struct symbol *sym = g->symtab[done.syms[i]];
 		struct symset newitemset = INIT_SYMSET;
 		if (type >= LALR)
 			newitemset = INIT_DATASET;
 
+		if (sym->can_eol ||
+		    (sym->nullable && is->starts_line))
+			starts_line = 1;
 		for (j = 0; j < is->items.cnt; j++) {
 			int itm = is->items.syms[j];
 			int p = item_prod(itm);
@@ -1303,7 +1311,7 @@ with a pre-existing itemset).
 
 			if (bp == pr->body_size)
 				continue;
-			if (pr->body[bp]->num != done.syms[i])
+			if (pr->body[bp] != sym)
 				continue;
 			if (type >= LALR)
 				la = is->items.data[j];
@@ -1325,7 +1333,7 @@ with a pre-existing itemset).
 				}
 			}
 		}
-		state = add_itemset(g, newitemset, type);
+		state = add_itemset(g, newitemset, type, starts_line);
 		if (symset_find(&is->go_to, done.syms[i]) < 0)
 			symset_add(&is->go_to, done.syms[i], state);
 	}
@@ -1349,7 +1357,7 @@ with `TK_eof` as the LA set.
 		}
 		// production 0, offset 0 (with no data)
 		symset_add(&first, item_num(0, 0), la);
-		add_itemset(g, first, type);
+		add_itemset(g, first, type, g->productions[0]->body[0]->can_eol);
 		for (again = 0, is = g->items;
 		     is;
 		     is = is->next ?: again ? (again = 0, g->items) : NULL) {
@@ -1568,7 +1576,7 @@ Now we can report all the item sets complete with items, LA sets, and GO TO.
 		for (s = 0; s < g->states; s++) {
 			int j;
 			struct itemset *is = g->statetab[s];
-			printf("  Itemset %d:\n", s);
+			printf("  Itemset %d:%s\n", s, is->starts_line?" (startsline)":"");
 			for (j = 0; j < is->items.cnt; j++) {
 				report_item(g, is->items.syms[j]);
 				if (is->items.data != NO_DATA)
@@ -1830,6 +1838,7 @@ The go to table is stored in a simple array of `sym` and corresponding
 		short reduce_size;
 		short reduce_sym;
 		short shift_sym;
+		short starts_line;
 	};
 
 
@@ -1886,13 +1895,15 @@ The go to table is stored in a simple array of `sym` and corresponding
 			}
 
 			if (prod >= 0)
-				fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0 },\n",
+				fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0, %d },\n",
 					i, is->go_to.cnt, i, prod,
 					g->productions[prod]->body_size,
-					g->productions[prod]->head->num);
+					g->productions[prod]->head->num,
+					is->starts_line);
 			else
-				fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d },\n",
-					i, is->go_to.cnt, i, shift_sym);
+				fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n",
+					i, is->go_to.cnt, i, shift_sym,
+					is->starts_line);
 		}
 		fprintf(f, "};\n\n");
 	}
-- 
2.43.0