X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=0099f5785db32072118b228473d8aa94ed9bf09f;hp=7b917b408d3cd7193504863dabca86b5a299918f;hb=6a0710bfe1622b5420ba53dca034ed18376b8740;hpb=eeb98f44a148729b35f2a87c63dbfb46fe0925bc

diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc
index 7b917b4..0099f57 100644
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -40,6 +40,7 @@ There are several distinct sections.
 	#include <stdlib.h>
 	#include <stdio.h>
 	## parser includes
+	## parser functions
 	## parser
 ###### File: calc.cgm
 	## demo grammar
@@ -234,6 +235,12 @@ The data type name is simply stored and applied to the head of all
 subsequent productions.  It must be the name of a structure, so `$expr`
 maps to `struct expr`.
 
+Any productions given before the first data type will have no data type
+and can carry no information.  In order to allow other non-terminals to
+have no type, the data type `$void` can be given.  This does *not* mean
+that `struct void` will be used, but rather than no type will be
+associated with future non-terminals.
+
 The precedence line must contain a list of symbols - typically
 terminal symbols, but not necessarily.  It can only contain symbols
 that have not been seen yet, so precedence declaration must precede
@@ -281,6 +288,8 @@ production inherits from the last symbol which has a precedence.
 			assoc = Non;
 		else {
 			g->current_type = t.txt;
+			if (text_is(t.txt, "void"))
+				g->current_type.txt = NULL;
 			t = token_next(ts);
 			if (t.num != TK_newline) {
 				err = "Extra tokens after type name";
@@ -1296,6 +1305,7 @@ with `TK_eof` as the LA set.
 			if (is->completed)
 				continue;
 			is->completed = 1;
+			again = 1;
 			## complete itemset
 			## derive itemsets
 			symset_free(done);
@@ -1685,6 +1695,7 @@ pieces of code provided in the grammar file, so they are generated first.
 	static void gen_parser(FILE *f, struct grammar *g, char *file, char *name)
 	{
 		gen_known(f, g);
+		gen_non_term(f, g);
 		gen_goto(f, g);
 		gen_states(f, g);
 		gen_reduce(f, g, file);
@@ -1699,7 +1710,7 @@ pieces of code provided in the grammar file, so they are generated first.
 		fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
 		fprintf(f, "\tconfig->ignored |= (1 << TK_line_comment) | (1 << TK_block_comment);\n");
 		fprintf(f, "\ttokens = token_open(code, config);\n");
-		fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace);\n");
+		fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config->known_count);\n");
 		fprintf(f, "\ttoken_close(tokens);\n");
 		fprintf(f, "\treturn rv;\n");
 		fprintf(f, "}\n\n");
@@ -1708,6 +1719,7 @@ pieces of code provided in the grammar file, so they are generated first.
 ### Table words table
 
 The know words is simply an array of terminal symbols.
+The table of nonterminals used for tracing is a similar array.
 
 ###### functions
 
@@ -1724,6 +1736,20 @@ The know words is simply an array of terminal symbols.
 		fprintf(f, "};\n\n");
 	}
 
+	static void gen_non_term(FILE *f, struct grammar *g)
+	{
+		int i;
+		fprintf(f, "#line 0 \"gen_non_term\"\n");
+		fprintf(f, "static const char *non_term[] = {\n");
+		for (i = TK_reserved;
+		     i < g->num_syms;
+		     i++)
+			if (g->symtab[i]->type == Nonterminal)
+				fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len,
+					g->symtab[i]->name.txt);
+		fprintf(f, "};\n\n");
+	}
+
 ### States and the goto tables.
 
 For each state we record the goto table and the reducible production if
@@ -1867,10 +1893,10 @@ to the appropriate type for each access.  All this is handling in
 
 	static void gen_reduce(FILE *f, struct grammar *g, char *file)
 	{
-		int i, j;
+		int i;
 		fprintf(f, "#line 0 \"gen_reduce\"\n");
 		fprintf(f, "static int do_reduce(int prod, int depth, void **body,\n");
-		fprintf(f, "                      void *ret, FILE *trace)\n");
+		fprintf(f, "                      void *ret)\n");
 		fprintf(f, "{\n");
 		fprintf(f, "\tint ret_size = 0;\n");
 
@@ -1882,21 +1908,6 @@ to the appropriate type for each access.  All this is handling in
 			if (p->code.txt)
 				gen_code(p, f, g);
 
-			fprintf(f, "\t\tif (trace) {\n");
-			fprintf(f, "\t\t\tfprintf(trace, \"[%%2d]%.*s ->\", depth);\n",
-			        p->head->name.len, p->head->name.txt);
-			for (j = 0; j < p->body_size; j++) {
-				if (p->body[j]->type == Terminal) {
-					fputs("\t\t\tfputs(\" \", trace);\n", f);
-					fprintf(f, "\t\t\ttext_dump(trace, (*(struct token*)body[%d]).txt, 20);\n", j);
-				} else {
-					fprintf(f, "\t\t\tfprintf(trace, \" %.*s\");\n",
-					        p->body[j]->name.len,
-					        p->body[j]->name.txt);
-				}
-			}
-			fprintf(f, "\t\t}\n");
-
 			if (p->head->struct_name.txt)
 				fprintf(f, "\t\tret_size = sizeof(struct %.*s);\n",
 					p->head->struct_name.len,
@@ -2195,7 +2206,7 @@ The parser generator has nicely provided us with goto tables sorted by
 symbol number.  We need a binary search function to find a symbol in the
 table.
 
-###### parser
+###### parser functions
 
 	static int search(const struct state *l, int sym)
 	{
@@ -2239,7 +2250,7 @@ The `state` is the most important one and guides the parsing process.  The
 freeing function.  The symbol leads us to the right free function through
 `do_free`.
 
-###### parser
+###### parser functions
 
 	struct parser {
 		int state;
@@ -2270,7 +2281,7 @@ function reports if it could.
 So `shift` finds the next state.  If that succeed it extends the allocations
 if needed and pushed all the information onto the stacks.
 
-###### parser
+###### parser functions
 
 	static int shift(struct parser *p,
 	                 int sym, void *asn,
@@ -2299,7 +2310,7 @@ if needed and pushed all the information onto the stacks.
 and frees and `asn` entries that need to be freed.  It is called _after_ we
 reduce a production, just before we `shift` the nonterminal in.
 
-###### parser
+###### parser functions
 
 	static void pop(struct parser *p, int num,
 	                void(*do_free)(short sym, void *asn))
@@ -2323,7 +2334,7 @@ copying, hence `memdup` and `tokcopy`.
 ###### parser includes
 	#include <memory.h>
 
-###### parser
+###### parser functions
 
 	void *memdup(void *m, int len)
 	{
@@ -2359,9 +2370,9 @@ We return whatever `asn` was returned by reducing production zero.
 ###### parser
 	void *parser_run(struct token_state *tokens,
 	                 const struct state states[],
-	                 int (*do_reduce)(int, int, void**, void*, FILE*),
+	                 int (*do_reduce)(int, int, void**, void*),
 	                 void (*do_free)(short, void*),
-	                 FILE *trace)
+	                 FILE *trace, const char *non_term[], int knowns)
 	{
 		struct parser p = { 0 };
 		struct token *tk;
@@ -2370,12 +2381,10 @@ We return whatever `asn` was returned by reducing production zero.
 
 		tk = tok_copy(token_next(tokens));
 		while (!accepted) {
+			if (trace)
+				parser_trace(trace, &p, tk, states, non_term, knowns);
+
 			if (shift(&p, tk->num, tk, states)) {
-				if (trace) {
-					fputs("Shift ", trace);
-					text_dump(trace, tk->txt, 20);
-					fputs("\n", trace);
-				}
 				tk = tok_copy(token_next(tokens));
 				continue;
 			}
@@ -2391,9 +2400,7 @@ We return whatever `asn` was returned by reducing production zero.
 					(p.tos - states[p.state].reduce_size);
 
 				bufsize = do_reduce(prod, p.tos, body,
-				                    buf, trace);
-				if (trace)
-					fputs("\n", trace);
+				                    buf);
 
 				pop(&p, size, do_free);
 				shift(&p, sym, memdup(buf, bufsize), states);
@@ -2434,10 +2441,49 @@ We return whatever `asn` was returned by reducing production zero.
 ###### exported functions
 	void *parser_run(struct token_state *tokens,
 	                 const struct state states[],
-	                 int (*do_reduce)(int, int, void**, void*, FILE*),
+	                 int (*do_reduce)(int, int, void**, void*),
 	                 void (*do_free)(short, void*),
-	                 FILE *trace);
+	                 FILE *trace, const char *non_term[], int knowns);
+
+### Tracing
+
+Being able to visualize the parser in action can be invaluable when
+debugging the parser code, or trying to understand how the parse of a
+particular grammar progresses.  The stack contains all the important
+state, so just printing out the stack every time around the parse loop
+can make it possible to see exactly what is happening.
+
+This doesn't explicitly show each SHIFT and REDUCE action.  However they
+are easily deduced from the change between consecutive lines, and the
+details of each state can be found by cross referencing the states list
+in the stack with the "report" that parsergen can generate.
 
+For terminal symbols, we just dump the token.  For non-terminals we
+print the name of the symbol.  The look ahead token is reported at the
+end inside square brackets.
+
+###### parser functions
+
+	void parser_trace(FILE *trace, struct parser *p,
+			  struct token *tk, const struct state states[],
+			  const char *non_term[], int knowns)
+	{
+		int i;
+		for (i = 0; i < p->tos; i++) {
+			int sym = p->stack[i].sym;
+			fprintf(trace, "(%d) ", p->stack[i].state);
+			if (sym < TK_reserved + knowns) {
+				struct token *t = p->asn_stack[i];
+				text_dump(trace, t->txt, 20);
+			} else
+				fputs(non_term[sym - TK_reserved - knowns],
+				      trace);
+			fputs(" ", trace);
+		}
+		fprintf(trace, "(%d) [", p->state);
+		text_dump(trace, tk->txt, 20);
+		fputs("]\n", trace);
+	}
 
 # A Worked Example