parsergen.mdc: add precedence handling

[ocean] / csrc / parsergen.mdc
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 97bba421519d13d35c9230ace1722c99e9536c2b..0d7832491444975e85ef8b28c50df7e29246f5ab 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -351,6 +351,7 @@ production inherits from the last symbol which has a precedence.
                         s->precedence = g->prec_levels;
                         s->assoc = assoc;
                         found += 1;
+                       t = token_next(ts);
                 }
                 if (found == 0)
                         err = "No symbols given on precedence line";
@@ -1185,6 +1186,8 @@ FIXME: document min_prefix
                 short state;
                 struct symset items;
                 struct symset go_to;
+               enum assoc assoc;
+               unsigned short precedence;
                 char completed;
                 char starts_line;
                 int min_prefix;
@@ -1225,6 +1228,7 @@ recalculated and their LA sets updated.
  them to a data structure, of freeing them.
  
         static int add_itemset(struct grammar *g, struct symset ss,
+                              enum assoc assoc, unsigned short precedence,
                                enum grammar_type type)
         {
                 struct itemset **where, *is;
@@ -1235,6 +1239,8 @@ them to a data structure, of freeing them.
                         is->state = g->states;
                         g->states += 1;
                         is->items = ss;
+                       is->assoc = assoc;
+                       is->precedence = precedence;
                         is->next = *where;
                         is->go_to = INIT_DATASET;
                         *where = is;
@@ -1281,8 +1287,16 @@ is used in the next stage.
  If any of these symbols are flagged as starting a line, then this
  state must be a `starts_line` state so now is a good time to record that.
  
-NOTE: precedence handling should happen here - I haven't written this yet
-though.
+When itemsets are created we assign a precedence to the itemset from
+the complete item, if there is one.  We ignore the possibility of
+there being two and don't (currently) handle precedence in such
+grammars.  When completing a grammar we ignore any item where DOT is
+followed by a terminal with a precedence lower (numerically higher)
+than that for the itemset.  Unless the terminal has right
+associativity, we also ignore items where the terminal has the same
+precedence.  The result is that unwanted items are still in the
+itemset, but the terminal doesn't get into the go to set, so the item
+is ineffective.
  
  ###### complete itemset
         for (i = 0; i < is->items.cnt; i++) {
@@ -1300,6 +1314,18 @@ though.
                 if (bs == pr->body_size)
                         continue;
                 s = pr->body[bs];
+               if (s->precedence && is->precedence &&
+                   is->precedence < s->precedence)
+                       /* This terminal has a low precedence and
+                        * shouldn't be shifted
+                        */
+                       continue;
+               if (s->precedence && is->precedence &&
+                   is->precedence == s->precedence && s->assoc != Right)
+                       /* This terminal has a matching precedence and is
+                        * not Right-associative, so we mustn't shift it.
+                        */
+                       continue;
                 if (symset_find(&done, s->num) < 0) {
                         symset_add(&done, s->num, 0);
                         if (s->line_like)
@@ -1359,6 +1385,8 @@ with a pre-existing itemset).
                 int j;
                 unsigned short state;
                 struct symbol *sym = g->symtab[done.syms[i]];
+               enum assoc assoc = Non;
+               unsigned short precedence = 0;
                 struct symset newitemset = INIT_SYMSET;
                 if (type >= LALR)
                         newitemset = INIT_DATASET;
@@ -1378,6 +1406,14 @@ with a pre-existing itemset).
                         if (type >= LALR)
                                 la = is->items.data[j];
                         pos = symset_find(&newitemset, pr->head->num);
+                       if (bp + 1 == pr->body_size &&
+                           pr->precedence > 0 &&
+                           (precedence == 0 ||
+                            pr->precedence < precedence)) {
+                               // new itemset is reducible and has a precedence.
+                               precedence = pr->precedence;
+                               assoc = pr->assoc;
+                       }
                         if (pos < 0)
                                 symset_add(&newitemset, item_num(p, bp+1), la);
                         else if (type >= LALR) {
@@ -1395,12 +1431,12 @@ with a pre-existing itemset).
                                 }
                         }
                 }
-               state = add_itemset(g, newitemset, type);
+               state = add_itemset(g, newitemset, assoc, precedence, type);
                 if (symset_find(&is->go_to, done.syms[i]) < 0)
                         symset_add(&is->go_to, done.syms[i], state);
         }
  
-All that is left is to crate the initial itemset from production zero, and
+All that is left is to create the initial itemset from production zero, and
  with `TK_eof` as the LA set.
  
  ###### functions
@@ -1419,7 +1455,7 @@ with `TK_eof` as the LA set.
                 }
                 // production 0, offset 0 (with no data)
                 symset_add(&first, item_num(0, 0), la);
-               add_itemset(g, first, type);
+               add_itemset(g, first, Non, 0, type);
                 for (again = 0, is = g->items;
                      is;
                      is = is->next ?: again ? (again = 0, g->items) : NULL) {
@@ -1595,9 +1631,15 @@ it up a bit.  First the items, with production number and associativity.
                 if (dot == pr->body_size)
                         printf(" .");
                 printf(" [%d]", p);
-               if (pr->precedence)
+               if (pr->precedence && dot == pr->body_size)
                         printf(" (%d%s)", pr->precedence,
                                assoc_names[pr->assoc]);
+               if (dot < pr->body_size &&
+                   pr->body[dot]->precedence) {
+                       struct symbol *s = pr->body[dot];
+                       printf(" [%d%s]", s->precedence,
+                              assoc_names[s->assoc]);
+               }
                 printf("\n");
         }
  
@@ -1642,8 +1684,11 @@ Now we can report all the item sets complete with items, LA sets, and GO TO.
                 for (s = 0; s < g->states; s++) {
                         int j;
                         struct itemset *is = g->statetab[s];
-                       printf("  Itemset %d:%s min prefix=%d\n",
+                       printf("  Itemset %d:%s min prefix=%d",
                                s, is->starts_line?" (startsline)":"", is->min_prefix);
+                       if (is->precedence)
+                               printf(" %d%s", is->precedence, assoc_names[is->assoc]);
+                       printf("\n");
                         for (j = 0; j < is->items.cnt; j++) {
                                 report_item(g, is->items.syms[j]);
                                 if (is->items.data != NO_DATA)
@@ -2998,6 +3043,9 @@ an error.
  
  # calc: grammar
  
+       $LEFT * /
+       $LEFT + -
+
         Session -> Session Line
                 | Line
  
@@ -3020,13 +3068,9 @@ an error.
                 | ERROR NEWLINE ${ printf("Skipped a bad line\n"); }$
  
         $number
-       Expression -> Expression + Term ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
-               | Expression - Term ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
-               | Term ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-       Term -> Term * Factor ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
-               | Term / Factor ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
-               | Factor ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-       Factor -> NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
+       Expression -> Expression + Expression ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
+               | Expression - Expression ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
+               | Expression * Expression ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
+               | Expression / Expression ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
+               | NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
                 | ( Expression ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$