#include <stdlib.h>
#include <stdio.h>
## parser includes
+ ## parser functions
## parser
###### File: calc.cgm
## demo grammar
static void gen_parser(FILE *f, struct grammar *g, char *file, char *name)
{
gen_known(f, g);
+ gen_non_term(f, g);
gen_goto(f, g);
gen_states(f, g);
gen_reduce(f, g, file);
fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
fprintf(f, "\tconfig->ignored |= (1 << TK_line_comment) | (1 << TK_block_comment);\n");
fprintf(f, "\ttokens = token_open(code, config);\n");
- fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace);\n");
+ fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config->known_count);\n");
fprintf(f, "\ttoken_close(tokens);\n");
fprintf(f, "\treturn rv;\n");
fprintf(f, "}\n\n");
### Table words table
The know words is simply an array of terminal symbols.
+The table of nonterminals used for tracing is a similar array.
###### functions
fprintf(f, "};\n\n");
}
+ static void gen_non_term(FILE *f, struct grammar *g)
+ {
+ int i;
+ fprintf(f, "#line 0 \"gen_non_term\"\n");
+ fprintf(f, "static const char *non_term[] = {\n");
+ for (i = TK_reserved;
+ i < g->num_syms;
+ i++)
+ if (g->symtab[i]->type == Nonterminal)
+ fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len,
+ g->symtab[i]->name.txt);
+ fprintf(f, "};\n\n");
+ }
+
### States and the goto tables.
For each state we record the goto table and the reducible production if
static void gen_reduce(FILE *f, struct grammar *g, char *file)
{
- int i, j;
+ int i;
fprintf(f, "#line 0 \"gen_reduce\"\n");
fprintf(f, "static int do_reduce(int prod, int depth, void **body,\n");
- fprintf(f, " void *ret, FILE *trace)\n");
+ fprintf(f, " void *ret)\n");
fprintf(f, "{\n");
fprintf(f, "\tint ret_size = 0;\n");
if (p->code.txt)
gen_code(p, f, g);
- fprintf(f, "\t\tif (trace) {\n");
- fprintf(f, "\t\t\tfprintf(trace, \"[%%2d]%.*s ->\", depth);\n",
- p->head->name.len, p->head->name.txt);
- for (j = 0; j < p->body_size; j++) {
- if (p->body[j]->type == Terminal) {
- fputs("\t\t\tfputs(\" \", trace);\n", f);
- fprintf(f, "\t\t\ttext_dump(trace, (*(struct token*)body[%d]).txt, 20);\n", j);
- } else {
- fprintf(f, "\t\t\tfprintf(trace, \" %.*s\");\n",
- p->body[j]->name.len,
- p->body[j]->name.txt);
- }
- }
- fprintf(f, "\t\t}\n");
-
if (p->head->struct_name.txt)
fprintf(f, "\t\tret_size = sizeof(struct %.*s);\n",
p->head->struct_name.len,
symbol number. We need a binary search function to find a symbol in the
table.
-###### parser
+###### parser functions
static int search(const struct state *l, int sym)
{
freeing function. The symbol leads us to the right free function through
`do_free`.
-###### parser
+###### parser functions
struct parser {
int state;
So `shift` finds the next state. If that succeed it extends the allocations
if needed and pushed all the information onto the stacks.
-###### parser
+###### parser functions
static int shift(struct parser *p,
int sym, void *asn,
and frees and `asn` entries that need to be freed. It is called _after_ we
reduce a production, just before we `shift` the nonterminal in.
-###### parser
+###### parser functions
static void pop(struct parser *p, int num,
void(*do_free)(short sym, void *asn))
###### parser includes
#include <memory.h>
-###### parser
+###### parser functions
void *memdup(void *m, int len)
{
###### parser
void *parser_run(struct token_state *tokens,
const struct state states[],
- int (*do_reduce)(int, int, void**, void*, FILE*),
+ int (*do_reduce)(int, int, void**, void*),
void (*do_free)(short, void*),
- FILE *trace)
+ FILE *trace, const char *non_term[], int knowns)
{
struct parser p = { 0 };
struct token *tk;
tk = tok_copy(token_next(tokens));
while (!accepted) {
+ if (trace)
+ parser_trace(trace, &p, tk, states, non_term, knowns);
+
if (shift(&p, tk->num, tk, states)) {
- if (trace) {
- fputs("Shift ", trace);
- text_dump(trace, tk->txt, 20);
- fputs("\n", trace);
- }
tk = tok_copy(token_next(tokens));
continue;
}
(p.tos - states[p.state].reduce_size);
bufsize = do_reduce(prod, p.tos, body,
- buf, trace);
- if (trace)
- fputs("\n", trace);
+ buf);
pop(&p, size, do_free);
shift(&p, sym, memdup(buf, bufsize), states);
###### exported functions
void *parser_run(struct token_state *tokens,
const struct state states[],
- int (*do_reduce)(int, int, void**, void*, FILE*),
+ int (*do_reduce)(int, int, void**, void*),
void (*do_free)(short, void*),
- FILE *trace);
+ FILE *trace, const char *non_term[], int knowns);
+
+### Tracing
+
+Being able to visualize the parser in action can be invaluable when
+debugging the parser code, or trying to understand how the parse of a
+particular grammar progresses. The stack contains all the important
+state, so just printing out the stack every time around the parse loop
+can make it possible to see exactly what is happening.
+
+This doesn't explicitly show each SHIFT and REDUCE action. However they
+are easily deduced from the change between consecutive lines, and the
+details of each state can be found by cross referencing the states list
+in the stack with the "report" that parsergen can generate.
+For terminal symbols, we just dump the token. For non-terminals we
+print the name of the symbol. The look ahead token is reported at the
+end inside square brackets.
+
+###### parser functions
+
+ void parser_trace(FILE *trace, struct parser *p,
+ struct token *tk, const struct state states[],
+ const char *non_term[], int knowns)
+ {
+ int i;
+ for (i = 0; i < p->tos; i++) {
+ int sym = p->stack[i].sym;
+ fprintf(trace, "(%d) ", p->stack[i].state);
+ if (sym < TK_reserved + knowns) {
+ struct token *t = p->asn_stack[i];
+ text_dump(trace, t->txt, 20);
+ } else
+ fputs(non_term[sym - TK_reserved - knowns],
+ trace);
+ fputs(" ", trace);
+ }
+ fprintf(trace, "(%d) [", p->state);
+ text_dump(trace, tk->txt, 20);
+ fputs("]\n", trace);
+ }
# A Worked Example