X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=6d291312547f0aad1ddadc57ee2affeb7e8b3802;hb=afa1be3c3a7f14a8c476d937a33376490272ba45;hp=a7330351d767b52f52ba7b14a802e1b2556dd842;hpb=fa35c2e89220fb586700f934d9beba2f3e806f0f;p=ocean diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index a733035..6d29131 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -277,6 +277,9 @@ declares how it associates. This level is stored in each symbol listed and may be inherited by any production which uses the symbol. A production inherits from the last symbol which has a precedence. +The symbols on the first precedence line have the lowest precedence. +Subsequent lines introduce symbols with higher precedence. + ###### grammar fields struct text current_type; int type_isref; @@ -1287,12 +1290,11 @@ When itemsets are created we assign a precedence to the itemset from the complete item, if there is one. We ignore the possibility of there being two and don't (currently) handle precedence in such grammars. When completing a grammar we ignore any item where DOT is -followed by a terminal with a precedence lower (numerically higher) -than that for the itemset. Unless the terminal has right -associativity, we also ignore items where the terminal has the same -precedence. The result is that unwanted items are still in the -itemset, but the terminal doesn't get into the go to set, so the item -is ineffective. +followed by a terminal with a precedence lower than that for the +itemset. Unless the terminal has right associativity, we also ignore +items where the terminal has the same precedence. The result is that +unwanted items are still in the itemset, but the terminal doesn't get +into the go to set, so the item is ineffective. ###### complete itemset for (i = 0; i < is->items.cnt; i++) { @@ -1311,7 +1313,7 @@ is ineffective. continue; s = pr->body[bs]; if (s->precedence && is->precedence && - is->precedence < s->precedence) + is->precedence > s->precedence) /* This terminal has a low precedence and * shouldn't be shifted */ @@ -1404,8 +1406,7 @@ with a pre-existing itemset). pos = symset_find(&newitemset, pr->head->num); if (bp + 1 == pr->body_size && pr->precedence > 0 && - (precedence == 0 || - pr->precedence < precedence)) { + pr->precedence > precedence) { // new itemset is reducible and has a precedence. precedence = pr->precedence; assoc = pr->assoc; @@ -1776,10 +1777,10 @@ As a special case, if we find a SHIFT/REDUCE conflict, where a terminal that could be shifted is in the lookahead set of some reducable item, then set check if the reducable item also have `TK_newline` in its lookahead set. If it does, then a newline will -force and reduction, but anything else can reasonably be shifts, so +force the reduction, but anything else can reasonably be shifted, so that isn't really a conflict. Such apparent conflicts do not get -reported. This will not affect a "tradtional" grammar that does not -include newlines as token. +counted, and are reported as non-critical. This will not affect a +"traditional" grammar that does not include newlines as token. static int conflicts_slr(struct grammar *g, enum grammar_type type) { @@ -1799,14 +1800,20 @@ include newlines as token. int p = item_prod(itm); int bp = item_index(itm); struct production *pr = g->productions[p]; + struct symbol *s; - if (bp < pr->body_size && - pr->body[bp]->type == Terminal) { - /* shiftable */ - int sym = pr->body[bp]->num; - if (symset_find(&shifts, sym) < 0) - symset_add(&shifts, sym, itm); - } + if (bp >= pr->body_size || + pr->body[bp]->type != Terminal) + /* not shiftable */ + continue; + + s = pr->body[bp]; + if (s->precedence && is->precedence) + /* Precedence resolves this, so no conflict */ + continue; + + if (symset_find(&shifts, s->num) < 0) + symset_add(&shifts, s->num, itm); } /* Now look for reductions and conflicts */ for (j = 0; j < is->items.cnt; j++) { @@ -1826,13 +1833,16 @@ include newlines as token. int k; for (k = 0; k < la.cnt; k++) { int pos = symset_find(&shifts, la.syms[k]); - if (pos >= 0 && symset_find(&la, TK_newline) < 0) { - printf(" State %d has SHIFT/REDUCE conflict on ", i); + if (pos >= 0) { + if (symset_find(&la, TK_newline) < 0) { + printf(" State %d has SHIFT/REDUCE conflict on ", i); + cnt++; + } else + printf(" State %d has non-critical SHIFT/REDUCE conflict on ", i); prtxt(g->symtab[la.syms[k]]->name); printf(":\n"); report_item(g, shifts.data[pos]); report_item(g, itm); - cnt++; } pos = symset_find(&reduce, la.syms[k]); if (pos < 0) { @@ -1869,13 +1879,14 @@ pieces of code provided in the grammar file, so they are generated first. ###### parser_generate - static void gen_parser(FILE *f, struct grammar *g, char *file, char *name) + static void gen_parser(FILE *f, struct grammar *g, char *file, char *name, + struct code_node *pre_reduce) { gen_known(f, g); gen_non_term(f, g); gen_goto(f, g); gen_states(f, g); - gen_reduce(f, g, file); + gen_reduce(f, g, file, pre_reduce); gen_free(f, g); fprintf(f, "#line 0 \"gen_parser\"\n"); @@ -1896,7 +1907,9 @@ pieces of code provided in the grammar file, so they are generated first. ### Known words table The known words table is simply an array of terminal symbols. -The table of nonterminals used for tracing is a similar array. +The table of nonterminals used for tracing is a similar array. We +include virtual symbols in the table of non_terminals to keep the +numbers right. ###### functions @@ -1921,7 +1934,7 @@ The table of nonterminals used for tracing is a similar array. for (i = TK_reserved; i < g->num_syms; i++) - if (g->symtab[i]->type == Nonterminal) + if (g->symtab[i]->type != Terminal) fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len, g->symtab[i]->name.txt); fprintf(f, "};\n\n"); @@ -2109,14 +2122,18 @@ automatically freed. This is equivalent to assigning `NULL` to the pointer. ###### functions - static void gen_reduce(FILE *f, struct grammar *g, char *file) + static void gen_reduce(FILE *f, struct grammar *g, char *file, + struct code_node *code) { int i; - fprintf(f, "#line 0 \"gen_reduce\"\n"); + fprintf(f, "#line 1 \"gen_reduce\"\n"); fprintf(f, "static int do_reduce(int prod, void **body, struct token_config *config, void *ret)\n"); fprintf(f, "{\n"); fprintf(f, "\tint ret_size = 0;\n"); + if (code) + code_node_print(f, code, file); + fprintf(f, "#line 4 \"gen_reduce\"\n"); fprintf(f, "\tswitch(prod) {\n"); for (i = 0; i < g->production_count; i++) { struct production *p = g->productions[i]; @@ -2313,6 +2330,7 @@ parser with neither. "grammar" must be provided. struct code_node *hdr = NULL; struct code_node *code = NULL; struct code_node *gram = NULL; + struct code_node *pre_reduce = NULL; for (s = table; s; s = s->next) { struct text sec = s->section; if (tag && !strip_tag(&sec, tag)) @@ -2323,6 +2341,8 @@ parser with neither. "grammar" must be provided. code = s->code; else if (text_is(sec, "grammar")) gram = s->code; + else if (text_is(sec, "reduce")) + pre_reduce = s->code; else { fprintf(stderr, "Unknown content section: %.*s\n", s->section.len, s->section.txt); @@ -2394,7 +2414,7 @@ file with the code section (if any) and the parser tables and function. if (f) { if (code) code_node_print(f, code, infile); - gen_parser(f, g, infile, name); + gen_parser(f, g, infile, name, pre_reduce); fclose(f); } else { fprintf(stderr, "Cannot create %s.c\n", @@ -3076,8 +3096,8 @@ an error. # calc: grammar - $LEFT * / $LEFT + - + $LEFT * / Session -> Session Line | Line