parsergen: store reduction information separate from states.

author NeilBrown <neil@brown.name>

Sat, 13 Nov 2021 08:25:23 +0000 (19:25 +1100)

committer NeilBrown <neil@brown.name>

Sat, 13 Nov 2021 22:50:58 +0000 (09:50 +1100)
author NeilBrown <neil@brown.name>
Sat, 13 Nov 2021 08:25:23 +0000 (19:25 +1100)
committer NeilBrown <neil@brown.name>
Sat, 13 Nov 2021 22:50:58 +0000 (09:50 +1100)
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 6cb5a7371aa3935663d05df35a186ef8d2a6da32..076f3823941a20c4abb05237da892bc726d70266 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -1876,9 +1876,10 @@ optional `FILE` to send tracing to.  The `token_config` gets the list of
  known words added and then is used with the `code_node` to initialize the
  scanner.
  
  known words added and then is used with the `code_node` to initialize the
  scanner.
  
-`parse_XX` then calls the library function `parser_run` to actually complete
-the parse.  This needs the `states` table and functions to call the various
-pieces of code provided in the grammar file, so they are generated first.
+`parse_XX` then calls the library function `parser_run` to actually
+complete the parse.  This needs the `states` table, the `reductions`
+table and functions to call the various pieces of code provided in the
+grammar file, so they are generated first.
  
  ###### parser_generate
  
  
  ###### parser_generate
  
@@ -1889,6 +1890,7 @@ pieces of code provided in the grammar file, so they are generated first.
                 gen_non_term(f, g);
                 gen_goto(f, g);
                 gen_states(f, g);
                 gen_non_term(f, g);
                 gen_goto(f, g);
                 gen_states(f, g);
+               gen_reductions(f, g);
                 gen_reduce(f, g, file, pre_reduce);
                 gen_free(f, g);
  
                 gen_reduce(f, g, file, pre_reduce);
                 gen_free(f, g);
  
@@ -1900,7 +1902,7 @@ pieces of code provided in the grammar file, so they are generated first.
                 fprintf(f, "\tconfig->words_marks = known;\n");
                 fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
                 fprintf(f, "\ttokens = token_open(code, config);\n");
                 fprintf(f, "\tconfig->words_marks = known;\n");
                 fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
                 fprintf(f, "\ttokens = token_open(code, config);\n");
-               fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config);\n");
+               fprintf(f, "\tvoid *rv = parser_run(tokens, states, reductions, do_reduce, do_free, trace, non_term, config);\n");
                 fprintf(f, "\ttoken_close(tokens);\n");
                 fprintf(f, "\treturn rv;\n");
                 fprintf(f, "}\n\n");
                 fprintf(f, "\ttoken_close(tokens);\n");
                 fprintf(f, "\treturn rv;\n");
                 fprintf(f, "}\n\n");
@@ -1941,14 +1943,16 @@ The table of nonterminals used for tracing is a similar array.
                 fprintf(f, "};\n\n");
         }
  
                 fprintf(f, "};\n\n");
         }
  
-### States and the goto tables.
+### States, reductions, and the go to tables.
  
  
-For each state we record the goto table and details of the reducible
-production if there is one.
-Some of the details of the reducible production are stored in the
-`do_reduce` function to come later.  Here we store the production
-number, the body size (useful for stack management), and the resulting
-symbol (useful for knowing how to free data later).
+For each state we record the go to table and the reducible production if
+there is one, the details of which are in a separate table of
+reductions.  Some of the details of the reducible production are stored
+in the `do_reduce` function to come later.  In the go to table we store
+the production number and in the reductions table: the body size (useful
+for stack management), the resulting symbol (useful for knowing how to
+free data later), and the size of the resulting asn object (useful for
+preallocation space.
  
  The go to table is stored in a simple array of `sym` and corresponding
  `state`.
  
  The go to table is stored in a simple array of `sym` and corresponding
  `state`.
@@ -1959,13 +1963,15 @@ The go to table is stored in a simple array of `sym` and corresponding
                 short sym;
                 short state;
         };
                 short sym;
                 short state;
         };
+       struct reduction {
+               short size;
+               short sym;
+               short result_size;
+       };
         struct state {
                 short go_to_cnt;
                 const struct lookup * go_to;
                 short reduce_prod;
         struct state {
                 short go_to_cnt;
                 const struct lookup * go_to;
                 short reduce_prod;
-               short reduce_size;
-               short reduce_sym;
-               short result_size;
         };
  
  ###### functions
         };
  
  ###### functions
@@ -1989,6 +1995,26 @@ The go to table is stored in a simple array of `sym` and corresponding
                 }
         }
  
                 }
         }
  
+       static void gen_reductions(FILE *f, struct grammar *g)
+       {
+               int i;
+               fprintf(f, "#line 0 \"gen_reductions\"\n");
+               fprintf(f, "static const struct reduction reductions[] = {\n");
+               for (i = 0; i < g->production_count; i++) {
+                       struct production *pr = g->productions[i];
+                       struct symbol *hd = pr->head;
+                       fprintf(f, "\t{%d, %d, ", pr->body_size, hd->num);
+                       if (hd->struct_name.txt == NULL)
+                               fprintf(f, "0 },\n");
+                       else
+                               fprintf(f, "sizeof(struct %.*s%s) },\n",
+                                       hd->struct_name.len,
+                                       hd->struct_name.txt,
+                                       hd->isref ? "*" : "");
+               }
+               fprintf(f, "};\n\n");
+       }
+
         static void gen_states(FILE *f, struct grammar *g)
         {
                 int i;
         static void gen_states(FILE *f, struct grammar *g)
         {
                 int i;
@@ -2013,24 +2039,10 @@ The go to table is stored in a simple array of `sym` and corresponding
                                 }
                         }
                         if (is->go_to.cnt)
                                 }
                         }
                         if (is->go_to.cnt)
-                               fprintf(f, "\t[%d] = { %d, goto_%d, ",
-                                       i, is->go_to.cnt, i);
+                               fprintf(f, "\t[%d] = { %d, goto_%d, %d },\n",
+                                       i, is->go_to.cnt, i, prod);
                         else
                         else
-                               fprintf(f, "\t[%d] = { 0, NULL, ", i);
-                       if (prod >= 0) {
-                               struct production *pr = g->productions[prod];
-                               struct symbol *hd = pr->head;
-                               fprintf(f, "%d, %d, %d, ", 
-                                       prod, pr->body_size, pr->head->num);
-                               if (hd->struct_name.txt == NULL)
-                                       fprintf(f, "0 },\n");
-                               else
-                                       fprintf(f, "sizeof(struct %.*s%s) },\n",
-                                               hd->struct_name.len,
-                                               hd->struct_name.txt,
-                                               hd->isref ? "*" : "");
-                       } else
-                               fprintf(f, "-1, -1, -1, -1 },\n");
+                               fprintf(f, "\t[%d] = { 0, NULL, %d },\n", i, prod);
                 }
                 fprintf(f, "};\n\n");
         }
                 }
                 fprintf(f, "};\n\n");
         }
@@ -2564,9 +2576,9 @@ recognised properly, and link with `libicuuc` as `libmdcode` requires that.
  Having analysed the grammar and generated all the tables, we only need
  the shift/reduce engine to bring it all together.
  
  Having analysed the grammar and generated all the tables, we only need
  the shift/reduce engine to bring it all together.
  
-### Goto table lookup
+### Go to table lookup
  
  
-The parser generator has nicely provided us with goto tables sorted by
+The parser generator has nicely provided us with go to tables sorted by
  symbol number.  We need a binary search function to find a symbol in the
  table.
  
  symbol number.  We need a binary search function to find a symbol in the
  table.
  
@@ -2665,7 +2677,7 @@ is exactly the same process as shifting in a terminal so we use the same
  function for both.  In both cases we provide the symbol.  The state is
  deduced from the current top-of-stack state and the new symbol.
  
  function for both.  In both cases we provide the symbol.  The state is
  deduced from the current top-of-stack state and the new symbol.
  
-To simplify other code we arrange for `shift` to fail if there is no `goto`
+To simplify other code we arrange for `shift` to fail if there is no `go to`
  state for the symbol.  This is useful in basic parsing due to our design
  that we shift when we can, and reduce when we cannot.  So the `shift`
  function reports if it could.
  state for the symbol.  This is useful in basic parsing due to our design
  that we shift when we can, and reduce when we cannot.  So the `shift`
  function reports if it could.
@@ -2861,16 +2873,16 @@ report that that input has been accepted.
                 void *res;
                 const struct state *nextstate = &states[tos->state];
                 int prod = nextstate->reduce_prod;
                 void *res;
                 const struct state *nextstate = &states[tos->state];
                 int prod = nextstate->reduce_prod;
-               int size = nextstate->reduce_size;
-               int res_size = nextstate->result_size;
+               int size = reductions[prod].size;
+               int res_size = reductions[prod].result_size;
  
                 body = p.asn_stack + (p.tos - size);
                 res = res_size ? calloc(1, res_size) : NULL;
                 res_size = do_reduce(prod, body, config, res);
  
                 body = p.asn_stack + (p.tos - size);
                 res = res_size ? calloc(1, res_size) : NULL;
                 res_size = do_reduce(prod, body, config, res);
-               if (res_size != nextstate->result_size)
+               if (res_size != reductions[prod].result_size)
                         abort();
                 pop(&p, size, do_free);
                         abort();
                 pop(&p, size, do_free);
-               if (!shift(&p, nextstate->reduce_sym, res, states)) {
+               if (!shift(&p, reductions[prod].sym, res, states)) {
                         accepted = 1;
                         ret = res;
                         parser_trace_action(trace, "Accept");
                         accepted = 1;
                         ret = res;
                         parser_trace_action(trace, "Accept");
@@ -2929,6 +2941,7 @@ dropping tokens until either we manage to shift one, or reach end-of-file.
  
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
  
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
+                        const struct reduction reductions[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
@@ -2950,6 +2963,7 @@ dropping tokens until either we manage to shift one, or reach end-of-file.
  ###### exported functions
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
  ###### exported functions
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
+                        const struct reduction reductions[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
                          int (*do_reduce)(int, void**, struct token_config*, void*),
                          void (*do_free)(short, void*),
                          FILE *trace, const char *non_term[],
author	NeilBrown <neil@brown.name>
	Sat, 13 Nov 2021 08:25:23 +0000 (19:25 +1100)
committer	NeilBrown <neil@brown.name>
	Sat, 13 Nov 2021 22:50:58 +0000 (09:50 +1100)