From 32f5baf8857ef1f48451cbb5eacbd0f2f4528abc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 24 Nov 2013 17:50:09 +1100 Subject: [PATCH] parsergen: recorded a prefered shift-symbol for error recovery. When we find there is no valid parse step, one option that we don't currently try is to synthesize a symbol and shift it. i.e. insert a missing symbol. A future patch will provide a circumstance where this is the ideal response, so here we choose a symbol which could usefully be shifted. We choose the one that will get us closest to the end of a production. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index ec9b8f2..252b3d6 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -1754,8 +1754,8 @@ The table of nonterminals used for tracing is a similar array. ### States and the goto tables. -For each state we record the goto table and the reducible production if -there is one. +For each state we record the goto table, the reducible production if +there is one, or a symbol to shift for error recovery. Some of the details of the reducible production are stored in the `do_reduce` function to come later. Here we store the production number, the body size (useful for stack management) and the resulting symbol (useful @@ -1776,6 +1776,7 @@ The go to table is stored in a simple array of `sym` and corresponding short reduce_prod; short reduce_size; short reduce_sym; + short shift_sym; }; @@ -1806,24 +1807,39 @@ The go to table is stored in a simple array of `sym` and corresponding fprintf(f, "static const struct state states[] = {\n"); for (i = 0; i < g->states; i++) { struct itemset *is = g->statetab[i]; - int j, prod = -1; + int j, prod = -1, prod_len; + int shift_sym = -1; + int shift_len = 0, shift_remain = 0; for (j = 0; j < is->items.cnt; j++) { int itm = is->items.syms[j]; int p = item_prod(itm); int bp = item_index(itm); struct production *pr = g->productions[p]; - if (bp < pr->body_size) + if (bp < pr->body_size) { + if (shift_sym < 0 || + (shift_len == bp && shift_remain > pr->body_size - bp)) { + shift_sym = pr->body[bp]->num; + shift_len = bp; + shift_remain = pr->body_size - bp; + } continue; + } /* This is what we reduce */ - prod = p; - break; + if (prod < 0 || prod_len < pr->body_size) { + prod = p; + prod_len = pr->body_size; + } } - fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d },\n", - i, is->go_to.cnt, i, prod, - prod < 0 ? -1 : g->productions[prod]->body_size, - prod < 0 ? -1 : g->productions[prod]->head->num); + if (prod >= 0) + fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0 },\n", + i, is->go_to.cnt, i, prod, + g->productions[prod]->body_size, + g->productions[prod]->head->num); + else + fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d },\n", + i, is->go_to.cnt, i, shift_sym); } fprintf(f, "};\n\n"); } -- 2.43.0