X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=d2ff89844d2e616e0f10e470c0a7dbffde9492a7;hp=94c29df1d2c7665f61903d413918d0cbee44e720;hb=5281589bf4d9a66ce106b160e1272f1c51d7ac15;hpb=5107f51f1ba2b12dbffef31403ebfea30688b96b diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index 94c29df..d2ff898 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -1776,10 +1776,10 @@ As a special case, if we find a SHIFT/REDUCE conflict, where a terminal that could be shifted is in the lookahead set of some reducable item, then set check if the reducable item also have `TK_newline` in its lookahead set. If it does, then a newline will -force and reduction, but anything else can reasonably be shifts, so +force the reduction, but anything else can reasonably be shifted, so that isn't really a conflict. Such apparent conflicts do not get -reported. This will not affect a "tradtional" grammar that does not -include newlines as token. +counted, and are reported as non-critical. This will not affect a +"traditional" grammar that does not include newlines as token. static int conflicts_slr(struct grammar *g, enum grammar_type type) { @@ -1826,13 +1826,16 @@ include newlines as token. int k; for (k = 0; k < la.cnt; k++) { int pos = symset_find(&shifts, la.syms[k]); - if (pos >= 0 && symset_find(&la, TK_newline) < 0) { - printf(" State %d has SHIFT/REDUCE conflict on ", i); + if (pos >= 0) { + if (symset_find(&la, TK_newline) < 0) { + printf(" State %d has SHIFT/REDUCE conflict on ", i); + cnt++; + } else + printf(" State %d has non-critical SHIFT/REDUCE conflict on ", i); prtxt(g->symtab[la.syms[k]]->name); printf(":\n"); report_item(g, shifts.data[pos]); report_item(g, itm); - cnt++; } pos = symset_find(&reduce, la.syms[k]); if (pos < 0) { @@ -1869,13 +1872,14 @@ pieces of code provided in the grammar file, so they are generated first. ###### parser_generate - static void gen_parser(FILE *f, struct grammar *g, char *file, char *name) + static void gen_parser(FILE *f, struct grammar *g, char *file, char *name, + struct code_node *pre_reduce) { gen_known(f, g); gen_non_term(f, g); gen_goto(f, g); gen_states(f, g); - gen_reduce(f, g, file); + gen_reduce(f, g, file, pre_reduce); gen_free(f, g); fprintf(f, "#line 0 \"gen_parser\"\n"); @@ -1896,7 +1900,9 @@ pieces of code provided in the grammar file, so they are generated first. ### Known words table The known words table is simply an array of terminal symbols. -The table of nonterminals used for tracing is a similar array. +The table of nonterminals used for tracing is a similar array. We +include virtual symbols in the table of non_terminals to keep the +numbers right. ###### functions @@ -1921,7 +1927,7 @@ The table of nonterminals used for tracing is a similar array. for (i = TK_reserved; i < g->num_syms; i++) - if (g->symtab[i]->type == Nonterminal) + if (g->symtab[i]->type != Terminal) fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len, g->symtab[i]->name.txt); fprintf(f, "};\n\n"); @@ -2109,14 +2115,18 @@ automatically freed. This is equivalent to assigning `NULL` to the pointer. ###### functions - static void gen_reduce(FILE *f, struct grammar *g, char *file) + static void gen_reduce(FILE *f, struct grammar *g, char *file, + struct code_node *code) { int i; - fprintf(f, "#line 0 \"gen_reduce\"\n"); + fprintf(f, "#line 1 \"gen_reduce\"\n"); fprintf(f, "static int do_reduce(int prod, void **body, struct token_config *config, void *ret)\n"); fprintf(f, "{\n"); fprintf(f, "\tint ret_size = 0;\n"); + if (code) + code_node_print(f, code, file); + fprintf(f, "#line 4 \"gen_reduce\"\n"); fprintf(f, "\tswitch(prod) {\n"); for (i = 0; i < g->production_count; i++) { struct production *p = g->productions[i]; @@ -2313,6 +2323,7 @@ parser with neither. "grammar" must be provided. struct code_node *hdr = NULL; struct code_node *code = NULL; struct code_node *gram = NULL; + struct code_node *pre_reduce = NULL; for (s = table; s; s = s->next) { struct text sec = s->section; if (tag && !strip_tag(&sec, tag)) @@ -2323,6 +2334,8 @@ parser with neither. "grammar" must be provided. code = s->code; else if (text_is(sec, "grammar")) gram = s->code; + else if (text_is(sec, "reduce")) + pre_reduce = s->code; else { fprintf(stderr, "Unknown content section: %.*s\n", s->section.len, s->section.txt); @@ -2394,7 +2407,7 @@ file with the code section (if any) and the parser tables and function. if (f) { if (code) code_node_print(f, code, infile); - gen_parser(f, g, infile, name); + gen_parser(f, g, infile, name, pre_reduce); fclose(f); } else { fprintf(stderr, "Cannot create %s.c\n", @@ -2692,9 +2705,29 @@ is still a work-in-progress. `TK_newline` tokens are ignored unless the top stack frame records that they are permitted. In that case they will not be considered for shifting if it is possible to reduce some symbols that are all since -the most recent start of line. This is how a newline forcible +the most recent start of line. This is how a newline forcibly terminates any line-like structure - we try to reduce down to at most one symbol for each line where newlines are allowed. +A consequence of this is that a rule like + +###### Example: newlines - broken + + Newlines -> + | NEWLINE Newlines + IfStatement -> Newlines if .... + +cannot work, as the NEWLINE will never be shifted as the empty string +will be reduced first. Optional sets of newlines need to be include +in the thing that preceed: + +###### Example: newlines - works + + If -> if + | NEWLINE If + IfStatement -> If .... + +Here the NEWLINE will be shifted because nothing can be reduced until +the `if` is seen. When, during error handling, we discard token read in, we want to keep discarding until we see one that is recognised. If we had a full set