X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=76fec3a70a779cefe4589a9d79e861a61acd726e;hp=d30275e4e4d383a22905acd1e415c72d89fc56de;hb=ca00beb39b9d02578c1f0b556a2c2f70f28cf6e7;hpb=f8c00ecff941f5f86b4d54dd73cac3680684a11e diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index d30275e..76fec3a 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -100,6 +100,7 @@ symbol. struct production { unsigned short precedence; enum assoc assoc; + char line_like; ## production fields }; struct grammar { @@ -496,12 +497,17 @@ Now we have all the bits we need to parse a full production. goto abort; } vs = sym_find(g, tk.txt); - if (vs->type != Virtual) { - err = "symbol after $$ must be virtual"; + if (vs->num == TK_newline) + p.line_like = 1; + else if (vs->num == TK_out) + p.line_like = 2; + else if (vs->precedence == 0) { + err = "symbol after $$ must have precedence"; goto abort; + } else { + p.precedence = vs->precedence; + p.assoc = vs->assoc; } - p.precedence = vs->precedence; - p.assoc = vs->assoc; tk = token_next(state); } if (tk.num == TK_open) { @@ -1305,6 +1311,8 @@ into the go to set, so the item is ineffective. struct symbol *s; struct symset LA = INIT_SYMSET; unsigned short sn = 0; + struct symset LAnl = INIT_SYMSET; + unsigned short snnl = 0; if (is->min_prefix == 0 || (bs > 0 && bs < is->min_prefix)) @@ -1342,6 +1350,10 @@ into the go to set, so the item is ineffective. } sn = save_set(g, LA); LA = set_find(g, sn); + if (symset_find(&LA, TK_newline)) + symset_add(&LAnl, TK_newline, 0); + snnl = save_set(g, LAnl); + LAnl = set_find(g, snnl); } /* Add productions for this symbol */ @@ -1352,19 +1364,25 @@ into the go to set, so the item is ineffective. int itm = item_num(p2, 0); int pos = symset_find(&is->items, itm); if (pos < 0) { - symset_add(&is->items, itm, sn); + if (g->productions[p2]->line_like) + symset_add(&is->items, itm, snnl); + else + symset_add(&is->items, itm, sn); /* Will have re-ordered, so start * from beginning again */ i = -1; } else if (type >= LALR) { struct symset ss = set_find(g, is->items.data[pos]); struct symset tmp = INIT_SYMSET; + struct symset *la = &LA; + if (g->productions[p2]->line_like) + la = &LAnl; symset_union(&tmp, &ss); - if (symset_union(&tmp, &LA)) { + if (symset_union(&tmp, la)) { is->items.data[pos] = save_set(g, tmp); i = -1; - }else + } else symset_free(tmp); } } @@ -1635,6 +1653,10 @@ it up a bit. First the items, with production number and associativity. printf(" [%d%s]", s->precedence, assoc_names[s->assoc]); } + if (pr->line_like == 1) + printf(" $$NEWLINE"); + else if (pr->line_like) + printf(" $$OUT"); printf("\n"); } @@ -1773,14 +1795,10 @@ terminals to items where that terminal could be shifted and another which maps terminals to items that could be reduced when the terminal is in look-ahead. We report when we get conflicts between the two. -As a special case, if we find a SHIFT/REDUCE conflict, where a -terminal that could be shifted is in the lookahead set of some -reducable item, then set check if the reducable item also have -`TK_newline` in its lookahead set. If it does, then a newline will -force the reduction, but anything else can reasonably be shifted, so -that isn't really a conflict. Such apparent conflicts do not get -counted, and are reported as non-critical. This will not affect a -"traditional" grammar that does not include newlines as token. +As a special case, if we find a SHIFT/REDUCE conflict, on the NEWLINE +terminal, we ignore it. NEWLINES are handled specially with its own +rules for when to shift and when to reduce. Conflicts are expected, +but handled internally. static int conflicts_slr(struct grammar *g, enum grammar_type type) { @@ -1833,13 +1851,10 @@ counted, and are reported as non-critical. This will not affect a int k; for (k = 0; k < la.cnt; k++) { int pos = symset_find(&shifts, la.syms[k]); - if (pos >= 0) { - if (symset_find(&la, TK_newline) < 0) { - printf(" State %d has SHIFT/REDUCE conflict on ", i); - cnt++; - } else - printf(" State %d has non-critical SHIFT/REDUCE conflict on ", i); - prtxt(g->symtab[la.syms[k]]->name); + if (pos >= 0 && la.syms[k] != TK_newline) { + printf(" State %d has SHIFT/REDUCE conflict on ", i); + cnt++; + prtxt(g->symtab[la.syms[k]]->name); printf(":\n"); report_item(g, shifts.data[pos]); report_item(g, itm); @@ -1964,7 +1979,8 @@ The go to table is stored in a simple array of `sym` and corresponding short reduce_prod; short reduce_size; short reduce_sym; - short starts_line; + char starts_line; + char newline_only; short min_prefix; }; @@ -2013,13 +2029,15 @@ The go to table is stored in a simple array of `sym` and corresponding } if (prod >= 0) - fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d },\n", + fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d, %d },\n", i, is->go_to.cnt, i, prod, g->productions[prod]->body_size, g->productions[prod]->head->num, - is->starts_line, is->min_prefix); + is->starts_line, + g->productions[prod]->line_like, + is->min_prefix); else - fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n", + fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, 0, %d },\n", i, is->go_to.cnt, i, is->starts_line, is->min_prefix); } @@ -2851,7 +2869,13 @@ checks if a given token is in any of these look-ahead sets. continue; } force_reduce: - if (states[tos->state].reduce_prod >= 0) { + if (states[tos->state].reduce_prod >= 0 && + states[tos->state].newline_only && + tk->num != TK_newline && tk->num != TK_eof && tk->num != TK_out) { + /* Anything other than newline in an error as this + * production must end at EOL + */ + } else if (states[tos->state].reduce_prod >= 0) { void **body; void *res; const struct state *nextstate = &states[tos->state];