struct production {
unsigned short precedence;
enum assoc assoc;
+ char line_like;
## production fields
};
struct grammar {
listed and may be inherited by any production which uses the symbol. A
production inherits from the last symbol which has a precedence.
+The symbols on the first precedence line have the lowest precedence.
+Subsequent lines introduce symbols with higher precedence.
+
###### grammar fields
struct text current_type;
int type_isref;
goto abort;
}
vs = sym_find(g, tk.txt);
- if (vs->type != Virtual) {
- err = "symbol after $$ must be virtual";
+ if (vs->num == TK_newline)
+ p.line_like = 1;
+ else if (vs->precedence == 0) {
+ err = "symbol after $$ must have precedence";
goto abort;
+ } else {
+ p.precedence = vs->precedence;
+ p.assoc = vs->assoc;
}
- p.precedence = vs->precedence;
- p.assoc = vs->assoc;
tk = token_next(state);
}
if (tk.num == TK_open) {
the complete item, if there is one. We ignore the possibility of
there being two and don't (currently) handle precedence in such
grammars. When completing a grammar we ignore any item where DOT is
-followed by a terminal with a precedence lower (numerically higher)
-than that for the itemset. Unless the terminal has right
-associativity, we also ignore items where the terminal has the same
-precedence. The result is that unwanted items are still in the
-itemset, but the terminal doesn't get into the go to set, so the item
-is ineffective.
+followed by a terminal with a precedence lower than that for the
+itemset. Unless the terminal has right associativity, we also ignore
+items where the terminal has the same precedence. The result is that
+unwanted items are still in the itemset, but the terminal doesn't get
+into the go to set, so the item is ineffective.
###### complete itemset
for (i = 0; i < is->items.cnt; i++) {
struct symbol *s;
struct symset LA = INIT_SYMSET;
unsigned short sn = 0;
+ struct symset LAnl = INIT_SYMSET;
+ unsigned short snnl = 0;
if (is->min_prefix == 0 ||
(bs > 0 && bs < is->min_prefix))
continue;
s = pr->body[bs];
if (s->precedence && is->precedence &&
- is->precedence < s->precedence)
+ is->precedence > s->precedence)
/* This terminal has a low precedence and
* shouldn't be shifted
*/
}
sn = save_set(g, LA);
LA = set_find(g, sn);
+ symset_add(&LAnl, TK_newline, 0);
+ snnl = save_set(g, LAnl);
+ LAnl = set_find(g, snnl);
}
/* Add productions for this symbol */
int itm = item_num(p2, 0);
int pos = symset_find(&is->items, itm);
if (pos < 0) {
- symset_add(&is->items, itm, sn);
+ if (g->productions[p2]->line_like)
+ symset_add(&is->items, itm, snnl);
+ else
+ symset_add(&is->items, itm, sn);
/* Will have re-ordered, so start
* from beginning again */
i = -1;
} else if (type >= LALR) {
struct symset ss = set_find(g, is->items.data[pos]);
struct symset tmp = INIT_SYMSET;
+ struct symset *la = &LA;
+ if (g->productions[p2]->line_like)
+ la = &LAnl;
symset_union(&tmp, &ss);
- if (symset_union(&tmp, &LA)) {
+ if (symset_union(&tmp, la)) {
is->items.data[pos] = save_set(g, tmp);
i = -1;
- }else
+ } else
symset_free(tmp);
}
}
pos = symset_find(&newitemset, pr->head->num);
if (bp + 1 == pr->body_size &&
pr->precedence > 0 &&
- (precedence == 0 ||
- pr->precedence < precedence)) {
+ pr->precedence > precedence) {
// new itemset is reducible and has a precedence.
precedence = pr->precedence;
assoc = pr->assoc;
printf(" [%d%s]", s->precedence,
assoc_names[s->assoc]);
}
+ if (pr->line_like)
+ printf(" $$NEWLINE");
printf("\n");
}
terminal that could be shifted is in the lookahead set of some
reducable item, then set check if the reducable item also have
`TK_newline` in its lookahead set. If it does, then a newline will
-force and reduction, but anything else can reasonably be shifts, so
+force the reduction, but anything else can reasonably be shifted, so
that isn't really a conflict. Such apparent conflicts do not get
-reported. This will not affect a "tradtional" grammar that does not
-include newlines as token.
+counted, and are reported as non-critical. This will not affect a
+"traditional" grammar that does not include newlines as token.
static int conflicts_slr(struct grammar *g, enum grammar_type type)
{
int p = item_prod(itm);
int bp = item_index(itm);
struct production *pr = g->productions[p];
+ struct symbol *s;
- if (bp < pr->body_size &&
- pr->body[bp]->type == Terminal) {
- /* shiftable */
- int sym = pr->body[bp]->num;
- if (symset_find(&shifts, sym) < 0)
- symset_add(&shifts, sym, itm);
- }
+ if (bp >= pr->body_size ||
+ pr->body[bp]->type != Terminal)
+ /* not shiftable */
+ continue;
+
+ s = pr->body[bp];
+ if (s->precedence && is->precedence)
+ /* Precedence resolves this, so no conflict */
+ continue;
+
+ if (symset_find(&shifts, s->num) < 0)
+ symset_add(&shifts, s->num, itm);
}
/* Now look for reductions and conflicts */
for (j = 0; j < is->items.cnt; j++) {
int k;
for (k = 0; k < la.cnt; k++) {
int pos = symset_find(&shifts, la.syms[k]);
- if (pos >= 0 && symset_find(&la, TK_newline) < 0) {
- printf(" State %d has SHIFT/REDUCE conflict on ", i);
+ if (pos >= 0 && la.syms[k] != TK_newline) {
+ if (symset_find(&la, TK_newline) < 0) {
+ printf(" State %d has SHIFT/REDUCE conflict on ", i);
+ cnt++;
+ } else
+ printf(" State %d has non-critical SHIFT/REDUCE conflict on ", i);
prtxt(g->symtab[la.syms[k]]->name);
printf(":\n");
report_item(g, shifts.data[pos]);
report_item(g, itm);
- cnt++;
}
pos = symset_find(&reduce, la.syms[k]);
if (pos < 0) {
###### parser_generate
- static void gen_parser(FILE *f, struct grammar *g, char *file, char *name)
+ static void gen_parser(FILE *f, struct grammar *g, char *file, char *name,
+ struct code_node *pre_reduce)
{
gen_known(f, g);
gen_non_term(f, g);
gen_goto(f, g);
gen_states(f, g);
- gen_reduce(f, g, file);
+ gen_reduce(f, g, file, pre_reduce);
gen_free(f, g);
fprintf(f, "#line 0 \"gen_parser\"\n");
### Known words table
The known words table is simply an array of terminal symbols.
-The table of nonterminals used for tracing is a similar array.
+The table of nonterminals used for tracing is a similar array. We
+include virtual symbols in the table of non_terminals to keep the
+numbers right.
###### functions
for (i = TK_reserved;
i < g->num_syms;
i++)
- if (g->symtab[i]->type == Nonterminal)
+ if (g->symtab[i]->type != Terminal)
fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len,
g->symtab[i]->name.txt);
fprintf(f, "};\n\n");
short reduce_prod;
short reduce_size;
short reduce_sym;
- short starts_line;
+ char starts_line;
+ char newline_only;
short min_prefix;
};
}
if (prod >= 0)
- fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d },\n",
+ fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d, %d },\n",
i, is->go_to.cnt, i, prod,
g->productions[prod]->body_size,
g->productions[prod]->head->num,
- is->starts_line, is->min_prefix);
+ is->starts_line,
+ g->productions[prod]->line_like,
+ is->min_prefix);
else
- fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n",
+ fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, 0, %d },\n",
i, is->go_to.cnt, i,
is->starts_line, is->min_prefix);
}
fputs("\n", f);
for (i = 0; i < p->body_size; i++) {
if (p->body[i]->struct_name.txt &&
- p->body[i]->isref &&
- used[i])
+ used[i]) {
// assume this has been copied out
- fprintf(f, "\t\t*(void**)body[%d] = NULL;\n", i);
+ if (p->body[i]->isref)
+ fprintf(f, "\t\t*(void**)body[%d] = NULL;\n", i);
+ else
+ fprintf(f, "\t\tmemset(body[%d], 0, sizeof(struct %.*s));\n", i, p->body[i]->struct_name.len, p->body[i]->struct_name.txt);
+ }
}
free(used);
}
###### functions
- static void gen_reduce(FILE *f, struct grammar *g, char *file)
+ static void gen_reduce(FILE *f, struct grammar *g, char *file,
+ struct code_node *code)
{
int i;
- fprintf(f, "#line 0 \"gen_reduce\"\n");
+ fprintf(f, "#line 1 \"gen_reduce\"\n");
fprintf(f, "static int do_reduce(int prod, void **body, struct token_config *config, void *ret)\n");
fprintf(f, "{\n");
fprintf(f, "\tint ret_size = 0;\n");
+ if (code)
+ code_node_print(f, code, file);
+ fprintf(f, "#line 4 \"gen_reduce\"\n");
fprintf(f, "\tswitch(prod) {\n");
for (i = 0; i < g->production_count; i++) {
struct production *p = g->productions[i];
struct code_node *hdr = NULL;
struct code_node *code = NULL;
struct code_node *gram = NULL;
+ struct code_node *pre_reduce = NULL;
for (s = table; s; s = s->next) {
struct text sec = s->section;
if (tag && !strip_tag(&sec, tag))
code = s->code;
else if (text_is(sec, "grammar"))
gram = s->code;
+ else if (text_is(sec, "reduce"))
+ pre_reduce = s->code;
else {
fprintf(stderr, "Unknown content section: %.*s\n",
s->section.len, s->section.txt);
if (f) {
if (code)
code_node_print(f, code, infile);
- gen_parser(f, g, infile, name);
+ gen_parser(f, g, infile, name, pre_reduce);
fclose(f);
} else {
fprintf(stderr, "Cannot create %s.c\n",
`TK_newline` tokens are ignored unless the top stack frame records
that they are permitted. In that case they will not be considered for
shifting if it is possible to reduce some symbols that are all since
-the most recent start of line. This is how a newline forcible
+the most recent start of line. This is how a newline forcibly
terminates any line-like structure - we try to reduce down to at most
one symbol for each line where newlines are allowed.
+A consequence of this is that a rule like
+
+###### Example: newlines - broken
+
+ Newlines ->
+ | NEWLINE Newlines
+ IfStatement -> Newlines if ....
+
+cannot work, as the NEWLINE will never be shifted as the empty string
+will be reduced first. Optional sets of newlines need to be include
+in the thing that preceed:
+
+###### Example: newlines - works
+
+ If -> if
+ | NEWLINE If
+ IfStatement -> If ....
+
+Here the NEWLINE will be shifted because nothing can be reduced until
+the `if` is seen.
When, during error handling, we discard token read in, we want to keep
discarding until we see one that is recognised. If we had a full set
continue;
}
force_reduce:
- if (states[tos->state].reduce_prod >= 0) {
+ if (states[tos->state].reduce_prod >= 0 &&
+ states[tos->state].newline_only &&
+ tk->num != TK_newline && tk->num != TK_eof && tk->num != TK_out) {
+ /* Anything other than newline in an error as this
+ * production must end at EOL
+ */
+ } else if (states[tos->state].reduce_prod >= 0) {
void **body;
void *res;
const struct state *nextstate = &states[tos->state];
# calc: grammar
- $LEFT * /
$LEFT + -
+ $LEFT * /
Session -> Session Line
| Line