X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=679fab6996de66a9fe267d914c654812f74771f3;hp=2fdbf453c8114746843e5b3f95ee2ab07b570932;hb=f24b54a97b9955aa5761b22aa64d5418e23e80f2;hpb=f5e50e504ef5724ed7d9f13c411fff28d2b17a1e

diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc
index 2fdbf45..679fab6 100644
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -100,6 +100,7 @@ symbol.
 	struct production {
 		unsigned short precedence;
 		enum assoc assoc;
+		char line_like;
 		## production fields
 	};
 	struct grammar {
@@ -277,6 +278,9 @@ declares how it associates.  This level is stored in each symbol
 listed and may be inherited by any production which uses the symbol.  A
 production inherits from the last symbol which has a precedence.
 
+The symbols on the first precedence line have the lowest precedence.
+Subsequent lines introduce symbols with higher precedence.
+
 ###### grammar fields
 	struct text current_type;
 	int type_isref;
@@ -493,12 +497,15 @@ Now we have all the bits we need to parse a full production.
 				goto abort;
 			}
 			vs = sym_find(g, tk.txt);
-			if (vs->type != Virtual) {
-				err = "symbol after $$ must be virtual";
+			if (vs->num == TK_newline)
+				p.line_like = 1;
+			else if (vs->precedence == 0) {
+				err = "symbol after $$ must have precedence";
 				goto abort;
+			} else {
+				p.precedence = vs->precedence;
+				p.assoc = vs->assoc;
 			}
-			p.precedence = vs->precedence;
-			p.assoc = vs->assoc;
 			tk = token_next(state);
 		}
 		if (tk.num == TK_open) {
@@ -1287,12 +1294,11 @@ When itemsets are created we assign a precedence to the itemset from
 the complete item, if there is one.  We ignore the possibility of
 there being two and don't (currently) handle precedence in such
 grammars.  When completing a grammar we ignore any item where DOT is
-followed by a terminal with a precedence lower (numerically higher)
-than that for the itemset.  Unless the terminal has right
-associativity, we also ignore items where the terminal has the same
-precedence.  The result is that unwanted items are still in the
-itemset, but the terminal doesn't get into the go to set, so the item
-is ineffective.
+followed by a terminal with a precedence lower than that for the
+itemset.  Unless the terminal has right associativity, we also ignore
+items where the terminal has the same precedence.  The result is that
+unwanted items are still in the itemset, but the terminal doesn't get
+into the go to set, so the item is ineffective.
 
 ###### complete itemset
 	for (i = 0; i < is->items.cnt; i++) {
@@ -1303,6 +1309,8 @@ is ineffective.
 		struct symbol *s;
 		struct symset LA = INIT_SYMSET;
 		unsigned short sn = 0;
+		struct symset LAnl = INIT_SYMSET;
+		unsigned short snnl = 0;
 
 		if (is->min_prefix == 0 ||
 		    (bs > 0 && bs < is->min_prefix))
@@ -1311,7 +1319,7 @@ is ineffective.
 			continue;
 		s = pr->body[bs];
 		if (s->precedence && is->precedence &&
-		    is->precedence < s->precedence)
+		    is->precedence > s->precedence)
 			/* This terminal has a low precedence and
 			 * shouldn't be shifted
 			 */
@@ -1340,6 +1348,10 @@ is ineffective.
 			}
 			sn = save_set(g, LA);
 			LA = set_find(g, sn);
+			if (symset_find(&LA, TK_newline))
+				symset_add(&LAnl, TK_newline, 0);
+			snnl = save_set(g, LAnl);
+			LAnl = set_find(g, snnl);
 		}
 
 		/* Add productions for this symbol */
@@ -1350,19 +1362,25 @@ is ineffective.
 			int itm = item_num(p2, 0);
 			int pos = symset_find(&is->items, itm);
 			if (pos < 0) {
-				symset_add(&is->items, itm, sn);
+				if (g->productions[p2]->line_like)
+					symset_add(&is->items, itm, snnl);
+				else
+					symset_add(&is->items, itm, sn);
 				/* Will have re-ordered, so start
 				 * from beginning again */
 				i = -1;
 			} else if (type >= LALR) {
 				struct symset ss = set_find(g, is->items.data[pos]);
 				struct symset tmp = INIT_SYMSET;
+				struct symset *la = &LA;
 
+				if (g->productions[p2]->line_like)
+					la = &LAnl;
 				symset_union(&tmp, &ss);
-				if (symset_union(&tmp, &LA)) {
+				if (symset_union(&tmp, la)) {
 					is->items.data[pos] = save_set(g, tmp);
 					i = -1;
-				}else
+				} else
 					symset_free(tmp);
 			}
 		}
@@ -1404,8 +1422,7 @@ with a pre-existing itemset).
 			pos = symset_find(&newitemset, pr->head->num);
 			if (bp + 1 == pr->body_size &&
 			    pr->precedence > 0 &&
-			    (precedence == 0 ||
-			     pr->precedence < precedence)) {
+			    pr->precedence > precedence) {
 				// new itemset is reducible and has a precedence.
 				precedence = pr->precedence;
 				assoc = pr->assoc;
@@ -1634,6 +1651,8 @@ it up a bit.  First the items, with production number and associativity.
 			printf(" [%d%s]", s->precedence,
 			       assoc_names[s->assoc]);
 		}
+		if (pr->line_like)
+			printf(" $$NEWLINE");
 		printf("\n");
 	}
 
@@ -1772,14 +1791,10 @@ terminals to items where that terminal could be shifted and another
 which maps terminals to items that could be reduced when the terminal
 is in look-ahead.  We report when we get conflicts between the two.
 
-As a special case, if we find a SHIFT/REDUCE conflict, where a
-terminal that could be shifted is in the lookahead set of some
-reducable item, then set check if the reducable item also have
-`TK_newline` in its lookahead set.  If it does, then a newline will
-force the reduction, but anything else can reasonably be shifted, so
-that isn't really a conflict.  Such apparent conflicts do not get
-counted, and are reported as non-critical.  This will not affect a
-"traditional" grammar that does not include newlines as token.
+As a special case, if we find a SHIFT/REDUCE conflict, on the NEWLINE
+terminal, we ignore it.  NEWLINES are handled specially with its own
+rules for when to shift and when to reduce.  Conflicts are expected,
+but handled internally.
 
 	static int conflicts_slr(struct grammar *g, enum grammar_type type)
 	{
@@ -1799,14 +1814,20 @@ counted, and are reported as non-critical.  This will not affect a
 				int p = item_prod(itm);
 				int bp = item_index(itm);
 				struct production *pr = g->productions[p];
+				struct symbol *s;
 
-				if (bp < pr->body_size &&
-				    pr->body[bp]->type == Terminal) {
-					/* shiftable */
-					int sym = pr->body[bp]->num;
-					if (symset_find(&shifts, sym) < 0)
-						symset_add(&shifts, sym, itm);
-				}
+				if (bp >= pr->body_size ||
+				    pr->body[bp]->type != Terminal)
+					/* not shiftable */
+					continue;
+
+				s = pr->body[bp];
+				if (s->precedence && is->precedence)
+					/* Precedence resolves this, so no conflict */
+					continue;
+
+				if (symset_find(&shifts, s->num) < 0)
+					symset_add(&shifts, s->num, itm);
 			}
 			/* Now look for reductions and conflicts */
 			for (j = 0; j < is->items.cnt; j++) {
@@ -1826,13 +1847,10 @@ counted, and are reported as non-critical.  This will not affect a
 				int k;
 				for (k = 0; k < la.cnt; k++) {
 					int pos = symset_find(&shifts, la.syms[k]);
-					if (pos >= 0) {
-						if (symset_find(&la, TK_newline) < 0) {
-							printf("  State %d has SHIFT/REDUCE conflict on ", i);
-							cnt++;
-						} else
-							printf("  State %d has non-critical SHIFT/REDUCE conflict on ", i);
-						prtxt(g->symtab[la.syms[k]]->name);
+					if (pos >= 0 && la.syms[k] != TK_newline) {
+						printf("  State %d has SHIFT/REDUCE conflict on ", i);
+						cnt++;
+							prtxt(g->symtab[la.syms[k]]->name);
 						printf(":\n");
 						report_item(g, shifts.data[pos]);
 						report_item(g, itm);
@@ -1872,13 +1890,14 @@ pieces of code provided in the grammar file, so they are generated first.
 
 ###### parser_generate
 
-	static void gen_parser(FILE *f, struct grammar *g, char *file, char *name)
+	static void gen_parser(FILE *f, struct grammar *g, char *file, char *name,
+		               struct code_node *pre_reduce)
 	{
 		gen_known(f, g);
 		gen_non_term(f, g);
 		gen_goto(f, g);
 		gen_states(f, g);
-		gen_reduce(f, g, file);
+		gen_reduce(f, g, file, pre_reduce);
 		gen_free(f, g);
 
 		fprintf(f, "#line 0 \"gen_parser\"\n");
@@ -1899,7 +1918,9 @@ pieces of code provided in the grammar file, so they are generated first.
 ### Known words table
 
 The known words table is simply an array of terminal symbols.
-The table of nonterminals used for tracing is a similar array.
+The table of nonterminals used for tracing is a similar array.  We
+include virtual symbols in the table of non_terminals to keep the
+numbers right.
 
 ###### functions
 
@@ -1924,7 +1945,7 @@ The table of nonterminals used for tracing is a similar array.
 		for (i = TK_reserved;
 		     i < g->num_syms;
 		     i++)
-			if (g->symtab[i]->type == Nonterminal)
+			if (g->symtab[i]->type != Terminal)
 				fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len,
 					g->symtab[i]->name.txt);
 		fprintf(f, "};\n\n");
@@ -1954,7 +1975,8 @@ The go to table is stored in a simple array of `sym` and corresponding
 		short reduce_prod;
 		short reduce_size;
 		short reduce_sym;
-		short starts_line;
+		char starts_line;
+		char newline_only;
 		short min_prefix;
 	};
 
@@ -2003,13 +2025,15 @@ The go to table is stored in a simple array of `sym` and corresponding
 			}
 
 			if (prod >= 0)
-				fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d },\n",
+				fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d, %d },\n",
 					i, is->go_to.cnt, i, prod,
 					g->productions[prod]->body_size,
 					g->productions[prod]->head->num,
-					is->starts_line, is->min_prefix);
+					is->starts_line,
+					g->productions[prod]->line_like,
+					is->min_prefix);
 			else
-				fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n",
+				fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, 0, %d },\n",
 					i, is->go_to.cnt, i,
 					is->starts_line, is->min_prefix);
 		}
@@ -2102,24 +2126,31 @@ automatically freed.  This is equivalent to assigning `NULL` to the pointer.
 		fputs("\n", f);
 		for (i = 0; i < p->body_size; i++) {
 			if (p->body[i]->struct_name.txt &&
-			    p->body[i]->isref &&
-			    used[i])
+			    used[i]) {
 				// assume this has been copied out
-				fprintf(f, "\t\t*(void**)body[%d] = NULL;\n", i);
+				if (p->body[i]->isref)
+					fprintf(f, "\t\t*(void**)body[%d] = NULL;\n", i);
+				else
+					fprintf(f, "\t\tmemset(body[%d], 0, sizeof(struct %.*s));\n", i, p->body[i]->struct_name.len, p->body[i]->struct_name.txt);
+			}
 		}
 		free(used);
 	}
 
 ###### functions
 
-	static void gen_reduce(FILE *f, struct grammar *g, char *file)
+	static void gen_reduce(FILE *f, struct grammar *g, char *file,
+	                       struct code_node *code)
 	{
 		int i;
-		fprintf(f, "#line 0 \"gen_reduce\"\n");
+		fprintf(f, "#line 1 \"gen_reduce\"\n");
 		fprintf(f, "static int do_reduce(int prod, void **body, struct token_config *config, void *ret)\n");
 		fprintf(f, "{\n");
 		fprintf(f, "\tint ret_size = 0;\n");
+		if (code)
+			code_node_print(f, code, file);
 
+		fprintf(f, "#line 4 \"gen_reduce\"\n");
 		fprintf(f, "\tswitch(prod) {\n");
 		for (i = 0; i < g->production_count; i++) {
 			struct production *p = g->productions[i];
@@ -2316,6 +2347,7 @@ parser with neither. "grammar" must be provided.
 	struct code_node *hdr = NULL;
 	struct code_node *code = NULL;
 	struct code_node *gram = NULL;
+	struct code_node *pre_reduce = NULL;
 	for (s = table; s; s = s->next) {
 		struct text sec = s->section;
 		if (tag && !strip_tag(&sec, tag))
@@ -2326,6 +2358,8 @@ parser with neither. "grammar" must be provided.
 			code = s->code;
 		else if (text_is(sec, "grammar"))
 			gram = s->code;
+		else if (text_is(sec, "reduce"))
+			pre_reduce = s->code;
 		else {
 			fprintf(stderr, "Unknown content section: %.*s\n",
 			        s->section.len, s->section.txt);
@@ -2397,7 +2431,7 @@ file with the code section (if any) and the parser tables and function.
 		if (f) {
 			if (code)
 				code_node_print(f, code, infile);
-			gen_parser(f, g, infile, name);
+			gen_parser(f, g, infile, name, pre_reduce);
 			fclose(f);
 		} else {
 			fprintf(stderr, "Cannot create %s.c\n",
@@ -2831,7 +2865,13 @@ checks if a given token is in any of these look-ahead sets.
 				continue;
 			}
 		force_reduce:
-			if (states[tos->state].reduce_prod >= 0) {
+			if (states[tos->state].reduce_prod >= 0 &&
+			    states[tos->state].newline_only &&
+			    tk->num != TK_newline && tk->num != TK_eof && tk->num != TK_out) {
+				/* Anything other than newline in an error as this
+				 * production must end at EOL
+				 */
+			} else if (states[tos->state].reduce_prod >= 0) {
 				void **body;
 				void *res;
 				const struct state *nextstate = &states[tos->state];
@@ -3079,8 +3119,8 @@ an error.
 
 # calc: grammar
 
-	$LEFT * /
 	$LEFT + -
+	$LEFT * /
 
 	Session -> Session Line
 		| Line