}
}
-### Setting `can_eol` and `starts_line`
+### Setting `can_eol` and `line_like`
In order to be able to ignore newline tokens when not relevant, but
still include them in the parse when needed, we will need to know
which states can start a "line-like" section of code. We ignore
newlines when there is an indent since the most recent start of a
-line-like section.
+line-like symbol.
-To know what is line-like, we first need to know which symbols can end
-a line-like section, which is precisely those which can end with a
-newline token. These symbols don't necessarily alway end with a
-newline, but they can. Hence they are not described as "lines" but
-only "line-like".
+To know which symbols are line-like, we first need to know which
+symbols start with a NEWLINE token. Any symbol which is followed by a
+NEWLINE, or anything that starts with a NEWLINE, is deemed to be a line-like symbol.
+Certainly when trying to parse one of these we must take not of NEWLINEs.
-Clearly the `TK_newline` token can end with a newline. Any symbol
-which is the head of a production that contains a line-ending symbol
-followed only by nullable symbols is also a line-ending symbol. We
-use a new field `can_eol` to record this attribute of symbols, and
-compute it in a repetitive manner similar to `set_nullable`.
+Clearly the `TK_newline` token can start with a NEWLINE. Any symbol
+which is the head of a production that contains a starts-with-NEWLINE
+symbol preceeded only by nullable symbols is also a
+starts-with-NEWLINE symbol. We use a new field `can_eol` to record
+this attribute of symbols, and compute it in a repetitive manner
+similar to `set_nullable`.
+
+Once we have that, we can determine which symbols are `line_like` be
+seeing which are followed by a `can_eol` symbol in any production.
###### symbol fields
int can_eol;
- int starts_line;
+ int line_like;
###### functions
static void set_can_eol(struct grammar *g)
if (pr->head->can_eol)
continue;
- for (s = pr->body_size - 1; s >= 0; s--) {
+ for (s = 0 ; s < pr->body_size; s++) {
if (pr->body[s]->can_eol) {
pr->head->can_eol = 1;
check_again = 1;
}
}
- static void set_starts_line(struct grammar *g)
+ static void set_line_like(struct grammar *g)
{
int p;
for (p = 0; p < g->production_count; p++) {
struct production *pr = g->productions[p];
int s;
- for (s = 0; s < pr->body_size - 1; s++)
+ for (s = 1; s < pr->body_size; s++)
if (pr->body[s]->can_eol)
- pr->body[s+1]->starts_line = 1;
+ pr->body[s-1]->line_like = 1;
}
}
struct symset go_to;
char completed;
char starts_line;
+ int min_prefix;
};
###### grammar fields
struct symset LA = INIT_SYMSET;
unsigned short sn = 0;
+ if (is->min_prefix == 0 ||
+ (bs > 0 && bs < is->min_prefix))
+ is->min_prefix = bs;
if (bs == pr->body_size)
continue;
s = pr->body[bs];
if (symset_find(&done, s->num) < 0) {
symset_add(&done, s->num, 0);
- if (s->starts_line)
+ if (s->line_like)
is->starts_line = 1;
}
if (s->type != Nonterminal)
set_nullable(g);
set_can_eol(g);
- set_starts_line(g);
+ set_line_like(g);
if (type >= SLR)
build_first(g);
printf(" %c%c%c%3d%c: ",
s->nullable ? '.':' ',
s->can_eol ? '>':' ',
- s->starts_line ? '<':' ',
+ s->line_like ? '<':' ',
s->num, symtypes[s->type]);
prtxt(s->name);
if (s->precedence)
for (s = 0; s < g->states; s++) {
int j;
struct itemset *is = g->statetab[s];
- printf(" Itemset %d:%s\n", s, is->starts_line?" (startsline)":"");
+ printf(" Itemset %d:%s min prefix=%d\n",
+ s, is->starts_line?" (startsline)":"", is->min_prefix);
for (j = 0; j < is->items.cnt; j++) {
report_item(g, is->items.syms[j]);
if (is->items.data != NO_DATA)
short reduce_sym;
short shift_sym;
short starts_line;
+ short min_prefix;
};
}
if (prod >= 0)
- fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0, %d },\n",
+ fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0, %d, %d },\n",
i, is->go_to.cnt, i, prod,
g->productions[prod]->body_size,
g->productions[prod]->head->num,
- is->starts_line);
+ is->starts_line, is->min_prefix);
else
- fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n",
+ fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d, %d },\n",
i, is->go_to.cnt, i, shift_sym,
- is->starts_line);
+ is->starts_line, is->min_prefix);
}
fprintf(f, "};\n\n");
}
* sizeof(p->asn_stack[0]));
}
next->state = newstate;
- next->newline_permitted = 0;
- if (p->tos)
- next->newline_permitted =
- (p->stack[p->tos-1].newline_permitted?:-1)+1;
- if (next->indents > next->starts_indented)
- next->newline_permitted = 0;
- if (next->indents && next->newline_permitted > 2)
- next->newline_permitted = 0;
if (states[newstate].starts_line)
next->newline_permitted = 1;
+ else if (next->indents)
+ next->newline_permitted = 0;
+ else if (p->tos)
+ next->newline_permitted =
+ p->stack[p->tos-1].newline_permitted;
+ else
+ next->newline_permitted = 0;
+
if (next->since_newline) {
if (p->tos)
next->since_newline = p->stack[p->tos-1].since_newline + 1;
if (states[tos->state].starts_line)
tos->newline_permitted = 1;
else if (p.tos > 1)
- tos->newline_permitted = (p.stack[p.tos-2].newline_permitted ?:-1)+1;
+ tos->newline_permitted = p.stack[p.tos-2].newline_permitted;
+ else
+ tos->newline_permitted = 0;
}
free(tk);
tk = NULL;
// will fail).
}
if (next.sym == TK_newline) {
- if (! tos->newline_permitted) {
+ if (!tos->newline_permitted) {
free(tk);
tk = NULL;
parser_trace_action(trace, "Discard");