From: NeilBrown <neil@brown.name>
Date: Mon, 29 Jan 2018 05:20:01 +0000 (+1100)
Subject: parsergen.mdc: add precedence handling
X-Git-Tag: StoneyCreek~19
X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=commitdiff_plain;h=fe605fdc4f7d109ac9041d477f546fdb9fdae86c

parsergen.mdc: add precedence handling

This hasn't been documented properly in the text yet, but
the example has been changed to work and it seems good.

There is no support for precedence to select between two
reductions because I don't believe that ever happens :-) and
I haven't done anything special for non-associative because
I don't know what I would do.

Signed-off-by: NeilBrown <neil@brown.name>
---

diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc
index 97bba42..0d78324 100644
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -351,6 +351,7 @@ production inherits from the last symbol which has a precedence.
 			s->precedence = g->prec_levels;
 			s->assoc = assoc;
 			found += 1;
+			t = token_next(ts);
 		}
 		if (found == 0)
 			err = "No symbols given on precedence line";
@@ -1185,6 +1186,8 @@ FIXME: document min_prefix
 		short state;
 		struct symset items;
 		struct symset go_to;
+		enum assoc assoc;
+		unsigned short precedence;
 		char completed;
 		char starts_line;
 		int min_prefix;
@@ -1225,6 +1228,7 @@ recalculated and their LA sets updated.
 them to a data structure, of freeing them.
 
 	static int add_itemset(struct grammar *g, struct symset ss,
+	                       enum assoc assoc, unsigned short precedence,
 	                       enum grammar_type type)
 	{
 		struct itemset **where, *is;
@@ -1235,6 +1239,8 @@ them to a data structure, of freeing them.
 			is->state = g->states;
 			g->states += 1;
 			is->items = ss;
+			is->assoc = assoc;
+			is->precedence = precedence;
 			is->next = *where;
 			is->go_to = INIT_DATASET;
 			*where = is;
@@ -1281,8 +1287,16 @@ is used in the next stage.
 If any of these symbols are flagged as starting a line, then this
 state must be a `starts_line` state so now is a good time to record that.
 
-NOTE: precedence handling should happen here - I haven't written this yet
-though.
+When itemsets are created we assign a precedence to the itemset from
+the complete item, if there is one.  We ignore the possibility of
+there being two and don't (currently) handle precedence in such
+grammars.  When completing a grammar we ignore any item where DOT is
+followed by a terminal with a precedence lower (numerically higher)
+than that for the itemset.  Unless the terminal has right
+associativity, we also ignore items where the terminal has the same
+precedence.  The result is that unwanted items are still in the
+itemset, but the terminal doesn't get into the go to set, so the item
+is ineffective.
 
 ###### complete itemset
 	for (i = 0; i < is->items.cnt; i++) {
@@ -1300,6 +1314,18 @@ though.
 		if (bs == pr->body_size)
 			continue;
 		s = pr->body[bs];
+		if (s->precedence && is->precedence &&
+		    is->precedence < s->precedence)
+			/* This terminal has a low precedence and
+			 * shouldn't be shifted
+			 */
+			continue;
+		if (s->precedence && is->precedence &&
+		    is->precedence == s->precedence && s->assoc != Right)
+			/* This terminal has a matching precedence and is
+			 * not Right-associative, so we mustn't shift it.
+			 */
+			continue;
 		if (symset_find(&done, s->num) < 0) {
 			symset_add(&done, s->num, 0);
 			if (s->line_like)
@@ -1359,6 +1385,8 @@ with a pre-existing itemset).
 		int j;
 		unsigned short state;
 		struct symbol *sym = g->symtab[done.syms[i]];
+		enum assoc assoc = Non;
+		unsigned short precedence = 0;
 		struct symset newitemset = INIT_SYMSET;
 		if (type >= LALR)
 			newitemset = INIT_DATASET;
@@ -1378,6 +1406,14 @@ with a pre-existing itemset).
 			if (type >= LALR)
 				la = is->items.data[j];
 			pos = symset_find(&newitemset, pr->head->num);
+			if (bp + 1 == pr->body_size &&
+			    pr->precedence > 0 &&
+			    (precedence == 0 ||
+			     pr->precedence < precedence)) {
+				// new itemset is reducible and has a precedence.
+				precedence = pr->precedence;
+				assoc = pr->assoc;
+			}
 			if (pos < 0)
 				symset_add(&newitemset, item_num(p, bp+1), la);
 			else if (type >= LALR) {
@@ -1395,12 +1431,12 @@ with a pre-existing itemset).
 				}
 			}
 		}
-		state = add_itemset(g, newitemset, type);
+		state = add_itemset(g, newitemset, assoc, precedence, type);
 		if (symset_find(&is->go_to, done.syms[i]) < 0)
 			symset_add(&is->go_to, done.syms[i], state);
 	}
 
-All that is left is to crate the initial itemset from production zero, and
+All that is left is to create the initial itemset from production zero, and
 with `TK_eof` as the LA set.
 
 ###### functions
@@ -1419,7 +1455,7 @@ with `TK_eof` as the LA set.
 		}
 		// production 0, offset 0 (with no data)
 		symset_add(&first, item_num(0, 0), la);
-		add_itemset(g, first, type);
+		add_itemset(g, first, Non, 0, type);
 		for (again = 0, is = g->items;
 		     is;
 		     is = is->next ?: again ? (again = 0, g->items) : NULL) {
@@ -1595,9 +1631,15 @@ it up a bit.  First the items, with production number and associativity.
 		if (dot == pr->body_size)
 			printf(" .");
 		printf(" [%d]", p);
-		if (pr->precedence)
+		if (pr->precedence && dot == pr->body_size)
 			printf(" (%d%s)", pr->precedence,
 			       assoc_names[pr->assoc]);
+		if (dot < pr->body_size &&
+		    pr->body[dot]->precedence) {
+			struct symbol *s = pr->body[dot];
+			printf(" [%d%s]", s->precedence,
+			       assoc_names[s->assoc]);
+		}
 		printf("\n");
 	}
 
@@ -1642,8 +1684,11 @@ Now we can report all the item sets complete with items, LA sets, and GO TO.
 		for (s = 0; s < g->states; s++) {
 			int j;
 			struct itemset *is = g->statetab[s];
-			printf("  Itemset %d:%s min prefix=%d\n",
+			printf("  Itemset %d:%s min prefix=%d",
 			       s, is->starts_line?" (startsline)":"", is->min_prefix);
+			if (is->precedence)
+				printf(" %d%s", is->precedence, assoc_names[is->assoc]);
+			printf("\n");
 			for (j = 0; j < is->items.cnt; j++) {
 				report_item(g, is->items.syms[j]);
 				if (is->items.data != NO_DATA)
@@ -2998,6 +3043,9 @@ an error.
 
 # calc: grammar
 
+	$LEFT * /
+	$LEFT + -
+
 	Session -> Session Line
 		| Line
 
@@ -3020,13 +3068,9 @@ an error.
 		| ERROR NEWLINE ${ printf("Skipped a bad line\n"); }$
 
 	$number
-	Expression -> Expression + Term ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
-		| Expression - Term ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
-		| Term ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-	Term -> Term * Factor ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
-		| Term / Factor ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
-		| Factor ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-	Factor -> NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
+	Expression -> Expression + Expression ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
+		| Expression - Expression ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
+		| Expression * Expression ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
+		| Expression / Expression ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
+		| NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
 		| ( Expression ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$