parsergen: remove symbol synthesis option.

[ocean] / csrc / parsergen.mdc
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 7499e07c9e5996688cf0642ab1cea7c441cf0a30..48f569168338f159598d532be78f79cd21835c10 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -351,6 +351,7 @@ production inherits from the last symbol which has a precedence.
                         s->precedence = g->prec_levels;
                         s->assoc = assoc;
                         found += 1;
+                       t = token_next(ts);
                 }
                 if (found == 0)
                         err = "No symbols given on precedence line";
@@ -452,6 +453,7 @@ Now we have all the bits we need to parse a full production.
         struct symbol **body;
         int             body_size;
         struct text     code;
+       int             code_line;
  
  ###### symbol fields
         int first_production;
@@ -501,6 +503,7 @@ Now we have all the bits we need to parse a full production.
                         tk = token_next(state);
                 }
                 if (tk.num == TK_open) {
+                       p.code_line = tk.line;
                         p.code = collect_code(state, tk);
                         if (p.code.txt == NULL) {
                                 err = "code fragment not closed properly";
@@ -1183,6 +1186,8 @@ FIXME: document min_prefix
                 short state;
                 struct symset items;
                 struct symset go_to;
+               enum assoc assoc;
+               unsigned short precedence;
                 char completed;
                 char starts_line;
                 int min_prefix;
@@ -1223,6 +1228,7 @@ recalculated and their LA sets updated.
  them to a data structure, of freeing them.
  
         static int add_itemset(struct grammar *g, struct symset ss,
+                              enum assoc assoc, unsigned short precedence,
                                enum grammar_type type)
         {
                 struct itemset **where, *is;
@@ -1233,6 +1239,8 @@ them to a data structure, of freeing them.
                         is->state = g->states;
                         g->states += 1;
                         is->items = ss;
+                       is->assoc = assoc;
+                       is->precedence = precedence;
                         is->next = *where;
                         is->go_to = INIT_DATASET;
                         *where = is;
@@ -1279,8 +1287,16 @@ is used in the next stage.
  If any of these symbols are flagged as starting a line, then this
  state must be a `starts_line` state so now is a good time to record that.
  
-NOTE: precedence handling should happen here - I haven't written this yet
-though.
+When itemsets are created we assign a precedence to the itemset from
+the complete item, if there is one.  We ignore the possibility of
+there being two and don't (currently) handle precedence in such
+grammars.  When completing a grammar we ignore any item where DOT is
+followed by a terminal with a precedence lower (numerically higher)
+than that for the itemset.  Unless the terminal has right
+associativity, we also ignore items where the terminal has the same
+precedence.  The result is that unwanted items are still in the
+itemset, but the terminal doesn't get into the go to set, so the item
+is ineffective.
  
  ###### complete itemset
         for (i = 0; i < is->items.cnt; i++) {
@@ -1298,6 +1314,18 @@ though.
                 if (bs == pr->body_size)
                         continue;
                 s = pr->body[bs];
+               if (s->precedence && is->precedence &&
+                   is->precedence < s->precedence)
+                       /* This terminal has a low precedence and
+                        * shouldn't be shifted
+                        */
+                       continue;
+               if (s->precedence && is->precedence &&
+                   is->precedence == s->precedence && s->assoc != Right)
+                       /* This terminal has a matching precedence and is
+                        * not Right-associative, so we mustn't shift it.
+                        */
+                       continue;
                 if (symset_find(&done, s->num) < 0) {
                         symset_add(&done, s->num, 0);
                         if (s->line_like)
@@ -1357,6 +1385,8 @@ with a pre-existing itemset).
                 int j;
                 unsigned short state;
                 struct symbol *sym = g->symtab[done.syms[i]];
+               enum assoc assoc = Non;
+               unsigned short precedence = 0;
                 struct symset newitemset = INIT_SYMSET;
                 if (type >= LALR)
                         newitemset = INIT_DATASET;
@@ -1376,6 +1406,14 @@ with a pre-existing itemset).
                         if (type >= LALR)
                                 la = is->items.data[j];
                         pos = symset_find(&newitemset, pr->head->num);
+                       if (bp + 1 == pr->body_size &&
+                           pr->precedence > 0 &&
+                           (precedence == 0 ||
+                            pr->precedence < precedence)) {
+                               // new itemset is reducible and has a precedence.
+                               precedence = pr->precedence;
+                               assoc = pr->assoc;
+                       }
                         if (pos < 0)
                                 symset_add(&newitemset, item_num(p, bp+1), la);
                         else if (type >= LALR) {
@@ -1393,12 +1431,12 @@ with a pre-existing itemset).
                                 }
                         }
                 }
-               state = add_itemset(g, newitemset, type);
+               state = add_itemset(g, newitemset, assoc, precedence, type);
                 if (symset_find(&is->go_to, done.syms[i]) < 0)
                         symset_add(&is->go_to, done.syms[i], state);
         }
  
-All that is left is to crate the initial itemset from production zero, and
+All that is left is to create the initial itemset from production zero, and
  with `TK_eof` as the LA set.
  
  ###### functions
@@ -1417,7 +1455,7 @@ with `TK_eof` as the LA set.
                 }
                 // production 0, offset 0 (with no data)
                 symset_add(&first, item_num(0, 0), la);
-               add_itemset(g, first, type);
+               add_itemset(g, first, Non, 0, type);
                 for (again = 0, is = g->items;
                      is;
                      is = is->next ?: again ? (again = 0, g->items) : NULL) {
@@ -1593,9 +1631,15 @@ it up a bit.  First the items, with production number and associativity.
                 if (dot == pr->body_size)
                         printf(" .");
                 printf(" [%d]", p);
-               if (pr->precedence)
+               if (pr->precedence && dot == pr->body_size)
                         printf(" (%d%s)", pr->precedence,
                                assoc_names[pr->assoc]);
+               if (dot < pr->body_size &&
+                   pr->body[dot]->precedence) {
+                       struct symbol *s = pr->body[dot];
+                       printf(" [%d%s]", s->precedence,
+                              assoc_names[s->assoc]);
+               }
                 printf("\n");
         }
  
@@ -1640,8 +1684,11 @@ Now we can report all the item sets complete with items, LA sets, and GO TO.
                 for (s = 0; s < g->states; s++) {
                         int j;
                         struct itemset *is = g->statetab[s];
-                       printf("  Itemset %d:%s min prefix=%d\n",
+                       printf("  Itemset %d:%s min prefix=%d",
                                s, is->starts_line?" (startsline)":"", is->min_prefix);
+                       if (is->precedence)
+                               printf(" %d%s", is->precedence, assoc_names[is->assoc]);
+                       printf("\n");
                         for (j = 0; j < is->items.cnt; j++) {
                                 report_item(g, is->items.syms[j]);
                                 if (is->items.data != NO_DATA)
@@ -1903,7 +1950,6 @@ The go to table is stored in a simple array of `sym` and corresponding
                 short reduce_prod;
                 short reduce_size;
                 short reduce_sym;
-               short shift_sym;
                 short starts_line;
                 short min_prefix;
         };
@@ -1937,23 +1983,15 @@ The go to table is stored in a simple array of `sym` and corresponding
                 for (i = 0; i < g->states; i++) {
                         struct itemset *is = g->statetab[i];
                         int j, prod = -1, prod_len;
-                       int shift_sym = -1;
-                       int shift_len = 0, shift_remain = 0;
+
                         for (j = 0; j < is->items.cnt; j++) {
                                 int itm = is->items.syms[j];
                                 int p = item_prod(itm);
                                 int bp = item_index(itm);
                                 struct production *pr = g->productions[p];
  
-                               if (bp < pr->body_size) {
-                                       if (shift_sym < 0 ||
-                                           (shift_len == bp && shift_remain > pr->body_size - bp)) {
-                                               shift_sym = pr->body[bp]->num;
-                                               shift_len = bp;
-                                               shift_remain = pr->body_size - bp;
-                                       }
+                               if (bp < pr->body_size)
                                         continue;
-                               }
                                 /* This is what we reduce */
                                 if (prod < 0 || prod_len < pr->body_size) {
                                         prod = p;
@@ -1962,14 +2000,14 @@ The go to table is stored in a simple array of `sym` and corresponding
                         }
  
                         if (prod >= 0)
-                               fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0, %d, %d },\n",
+                               fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d },\n",
                                         i, is->go_to.cnt, i, prod,
                                         g->productions[prod]->body_size,
                                         g->productions[prod]->head->num,
                                         is->starts_line, is->min_prefix);
                         else
-                               fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d, %d },\n",
-                                       i, is->go_to.cnt, i, shift_sym,
+                               fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n",
+                                       i, is->go_to.cnt, i,
                                         is->starts_line, is->min_prefix);
                 }
                 fprintf(f, "};\n\n");
@@ -2084,8 +2122,10 @@ automatically freed.  This is equivalent to assigning `NULL` to the pointer.
                         struct production *p = g->productions[i];
                         fprintf(f, "\tcase %d:\n", i);
  
-                       if (p->code.txt)
+                       if (p->code.txt) {
+                               fprintf(f, "#line %d \"%s\"\n", p->code_line, file);
                                 gen_code(p, f, g);
+                       }
  
                         if (p->head->struct_name.txt)
                                 fprintf(f, "\t\tret_size = sizeof(struct %.*s%s);\n",
@@ -2773,16 +2813,6 @@ one symbol for each line where newlines are allowed.
                                 parser_trace_action(trace, "Reduce");
                                 continue;
                         }
-                       if (tk->num == TK_out) {
-                               // Indent problem - synthesise tokens to get us
-                               // out of here.
-                               fprintf(stderr, "Synthesize %d to handle indent problem\n", states[tos->state].shift_sym);
-                               shift(&p, states[tos->state].shift_sym,
-                                     0, 1, tok_copy(*tk), states);
-                               // FIXME need to report this error somehow
-                               parser_trace_action(trace, "Synthesize");
-                               continue;
-                       }
                         /* Error. We walk up the stack until we
                          * find a state which will accept TK_error.
                          * We then shift in TK_error and see what state
@@ -2994,6 +3024,9 @@ an error.
  
  # calc: grammar
  
+       $LEFT * /
+       $LEFT + -
+
         Session -> Session Line
                 | Line
  
@@ -3016,13 +3049,9 @@ an error.
                 | ERROR NEWLINE ${ printf("Skipped a bad line\n"); }$
  
         $number
-       Expression -> Expression + Term ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
-               | Expression - Term ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
-               | Term ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-       Term -> Term * Factor ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
-               | Term / Factor ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
-               | Factor ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
-
-       Factor -> NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
+       Expression -> Expression + Expression ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
+               | Expression - Expression ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
+               | Expression * Expression ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
+               | Expression / Expression ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$
+               | NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
                 | ( Expression ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$