struct production {
unsigned short precedence;
enum assoc assoc;
- char line_like;
## production fields
};
struct grammar {
Symbols can be either `TK_ident` or `TK_mark`. They are saved in a
table of known symbols and the resulting parser will report them as
`TK_reserved + N`. A small set of identifiers are reserved for the
-different token types that `scanner` can report.
+different token types that `scanner` can report, and an even smaller set
+are reserved for a special token that the parser can generate (`EOL`) as
+will be described later. This latter set cannot use predefined numbers,
+so they are marked as `isspecial` for now and will get assigned a number
+with the non-terminals later.
###### declarations
{ TK_out, "OUT" },
{ TK_newline, "NEWLINE" },
{ TK_eof, "$eof" },
+ { -1, "EOL" },
};
+
###### symbol fields
short num;
+ unsigned int isspecial:1;
Note that `TK_eof` and the two `TK_*_comment` tokens cannot be
recognised. The former is automatically expected at the end of the text
s = sym_find(g, t);
s->type = Terminal;
s->num = reserved_words[i].num;
+ s->isspecial = 1;
}
}
goto abort;
}
vs = sym_find(g, tk.txt);
- if (vs->num == TK_newline)
- p.line_like = 1;
- else if (vs->num == TK_out)
- p.line_like = 2;
- else if (vs->precedence == 0) {
+ if (vs->precedence == 0) {
err = "symbol after $$ must have precedence";
goto abort;
} else {
}
}
-### Setting `line_like`
-
-In order to be able to ignore newline tokens when not relevant, but
-still include them in the parse when needed, we will need to know
-which states can start a "line-like" section of code. We ignore
-newlines when there is an indent since the most recent start of a
-line-like symbol.
-
-A "line_like" symbol is simply any symbol that can derive a NEWLINE.
-If a symbol cannot derive a NEWLINE, then it is only part of a line -
-so is word-like. If it can derive a NEWLINE, then we consider it to
-be like a line.
-
-Clearly the `TK_newline` token can derive a NEWLINE. Any symbol which
-is the head of a production that contains a line_like symbol is also a
-line-like symbol. We use a new field `line_like` to record this
-attribute of symbols, and compute it in a repetitive manner similar to
-`set_nullable`.
-
-###### symbol fields
- int line_like;
-
-###### functions
- static void set_line_like(struct grammar *g)
- {
- int check_again = 1;
- g->symtab[TK_newline]->line_like = 1;
- while (check_again) {
- int p;
- check_again = 0;
- for (p = 0; p < g->production_count; p++) {
- struct production *pr = g->productions[p];
- int s;
-
- if (pr->head->line_like)
- continue;
-
- for (s = 0 ; s < pr->body_size; s++) {
- if (pr->body[s]->line_like) {
- pr->head->line_like = 1;
- check_again = 1;
- break;
- }
- }
- }
- }
- }
-
### Building the `first` sets
When calculating what can follow a particular non-terminal, we will need
need to be consider for completion again. So a `completed` flag is
needed.
-For correct handling of `TK_newline` when parsing, we will need to
-know which states (itemsets) can occur at the start of a line, so we
-will record a `starts_line` flag too whenever DOT is at the start of a
-`line_like` symbol.
-
-Finally, for handling `TK_out` we need to know whether productions in the
-current state started *before* the most recent indent. A state
-doesn't usually keep details of individual productions, so we need to
-add one extra detail. `min_prefix` is the smallest non-zero number of
-symbols *before* DOT in any production in an itemset. This will allow
-us to determine if the the most recent indent is sufficiently recent
-to cancel it against a `TK_out`. If it was seen longer ago than the
-`min_prefix`, and if the current state cannot be reduced, then the
-indented section must have ended in the middle of a syntactic unit, so
-an error must be signaled.
-
And now we can build the list of itemsets. The lookup routine returns
both a success flag and a pointer to where in the list an insert should
happen, so we don't need to search a second time.
enum assoc assoc;
unsigned short precedence;
char completed;
- char starts_line;
- int min_prefix;
};
###### grammar fields
item.
We also collect a set of all symbols which follow "DOT" (in `done`) as
-this is used in the next stage. If any of these symbols are flagged as
-`line_like`, then this state must be a `starts_line` state so now is a
-good time to record that.
+this is used in the next stage.
When itemsets are created we assign a precedence to the itemset from the
complete item, if there is one. We ignore the possibility of there
struct symbol *s;
struct symset LA = INIT_SYMSET;
unsigned short sn = 0;
- struct symset LAnl = INIT_SYMSET;
- unsigned short snnl = 0;
- if (is->min_prefix == 0 ||
- (bs > 0 && bs < is->min_prefix))
- is->min_prefix = bs;
if (bs == pr->body_size)
continue;
s = pr->body[bs];
* not Right-associative, so we mustn't shift it.
*/
continue;
- if (symset_find(&done, s->num) < 0) {
+ if (symset_find(&done, s->num) < 0)
symset_add(&done, s->num, 0);
- }
+
if (s->type != Nonterminal)
continue;
- if (s->line_like)
- is->starts_line = 1;
check_again = 1;
if (type >= LALR) {
// Need the LA set.
}
sn = save_set(g, LA);
LA = set_find(g, sn);
- if (symset_find(&LA, TK_newline))
- symset_add(&LAnl, TK_newline, 0);
- snnl = save_set(g, LAnl);
- LAnl = set_find(g, snnl);
}
/* Add productions for this symbol */
int itm = item_num(p2, 0);
int pos = symset_find(&is->items, itm);
if (pos < 0) {
- if (g->productions[p2]->line_like)
- symset_add(&is->items, itm, snnl);
- else
- symset_add(&is->items, itm, sn);
+ symset_add(&is->items, itm, sn);
/* Will have re-ordered, so start
* from beginning again */
i = -1;
struct symset tmp = INIT_SYMSET;
struct symset *la = &LA;
- if (g->productions[p2]->line_like)
- la = &LAnl;
symset_union(&tmp, &ss);
if (symset_union(&tmp, la)) {
is->items.data[pos] = save_set(g, tmp);
continue;
if (pr->body[bp] != sym)
continue;
+
+ bp += 1;
if (type >= LALR)
la = is->items.data[j];
- pos = symset_find(&newitemset, pr->head->num);
- if (bp + 1 == pr->body_size &&
+ if (bp == pr->body_size &&
pr->precedence > 0 &&
pr->precedence > precedence) {
// new itemset is reducible and has a precedence.
precedence = pr->precedence;
assoc = pr->assoc;
}
+ pos = symset_find(&newitemset, item_num(p, bp));
+
if (pos < 0)
- symset_add(&newitemset, item_num(p, bp+1), la);
+ symset_add(&newitemset, item_num(p, bp), la);
else if (type >= LALR) {
// Need to merge la set.
int la2 = newitemset.data[pos];
Once we have built everything we allocate arrays for the two lists:
symbols and itemsets. This allows more efficient access during
-reporting. The symbols are grouped as terminals and then non-terminals,
-and we record the changeover point in `first_nonterm`.
+reporting. The symbols are grouped as terminals, then non-terminals,
+then virtual, with the start of non-terminals recorded as `first_nonterm`.
+Special terminals -- meaning just EOL -- are included with the
+non-terminals so that they are not expected by the scanner.
###### grammar fields
struct symbol **symtab;
struct itemset *is;
int snum = TK_reserved;
for (s = g->syms; s; s = s->next)
- if (s->num < 0 && s->type == Terminal) {
+ if (s->num < 0 && s->type == Terminal && !s->isspecial) {
s->num = snum;
snum++;
}
g->symtab[s->num] = s;
set_nullable(g);
- set_line_like(g);
if (type >= SLR)
build_first(g);
Firstly we have the complete list of symbols, together with the
"FIRST" set if that was generated. We add a mark to each symbol to
-show if it is considered to be "line-like" (`<`), or if it is nullable (`.`).
+show if it is nullable (`.`).
###### functions
if (!s)
continue;
- printf(" %c%c%3d%c: ",
+ printf(" %c%3d%c: ",
s->nullable ? '.':' ',
- s->line_like ? '<':' ',
s->num, symtypes[s->type]);
prtxt(s->name);
if (s->precedence)
printf(" [%d%s]", s->precedence,
assoc_names[s->assoc]);
}
- if (pr->line_like == 1)
- printf(" $$NEWLINE");
- else if (pr->line_like)
- printf(" $$OUT");
printf("\n");
}
for (s = 0; s < g->states; s++) {
int j;
struct itemset *is = g->statetab[s];
- printf(" Itemset %d:%s min prefix=%d",
- s, is->starts_line?" (startsline)":"", is->min_prefix);
+ printf(" Itemset %d:", s);
if (is->precedence)
printf(" %d%s", is->precedence, assoc_names[is->assoc]);
printf("\n");
which maps terminals to items that could be reduced when the terminal
is in look-ahead. We report when we get conflicts between the two.
-As a special case, if we find a SHIFT/REDUCE conflict, on the NEWLINE
-terminal, we ignore it. NEWLINES are handled specially with its own
-rules for when to shift and when to reduce. Conflicts are expected,
-but handled internally.
-
static int conflicts_slr(struct grammar *g, enum grammar_type type)
{
int i;
int k;
for (k = 0; k < la.cnt; k++) {
int pos = symset_find(&shifts, la.syms[k]);
- if (pos >= 0 && la.syms[k] != TK_newline) {
+ if (pos >= 0) {
printf(" State %d has SHIFT/REDUCE conflict on ", i);
cnt++;
prtxt(g->symtab[la.syms[k]]->name);
for (i = TK_reserved;
i < g->num_syms;
i++)
- if (g->symtab[i]->type == Nonterminal)
+ if (g->symtab[i]->type == Nonterminal ||
+ g->symtab[i]->isspecial)
fprintf(f, "\t\"%.*s\",\n", g->symtab[i]->name.len,
g->symtab[i]->name.txt);
fprintf(f, "};\n\n");
short reduce_prod;
short reduce_size;
short reduce_sym;
- char starts_line;
- char newline_only;
- short min_prefix;
+ short result_size;
};
###### functions
int i;
fprintf(f, "#line 0 \"gen_goto\"\n");
for (i = 0; i < g->states; i++) {
+ struct symset gt = g->statetab[i]->go_to;
int j;
+
+ if (gt.cnt == 0)
+ continue;
fprintf(f, "static const struct lookup goto_%d[] = {\n",
i);
- struct symset gt = g->statetab[i]->go_to;
for (j = 0; j < gt.cnt; j++)
fprintf(f, "\t{ %d, %d },\n",
gt.syms[j], gt.data[j]);
prod_len = pr->body_size;
}
}
-
- if (prod >= 0)
- fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d, %d },\n",
- i, is->go_to.cnt, i, prod,
- g->productions[prod]->body_size,
- g->productions[prod]->head->num,
- is->starts_line,
- g->productions[prod]->line_like,
- is->min_prefix);
+ if (is->go_to.cnt)
+ fprintf(f, "\t[%d] = { %d, goto_%d, ",
+ i, is->go_to.cnt, i);
else
- fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, 0, %d },\n",
- i, is->go_to.cnt, i,
- is->starts_line, is->min_prefix);
+ fprintf(f, "\t[%d] = { 0, NULL, ", i);
+ if (prod >= 0) {
+ struct production *pr = g->productions[prod];
+ struct symbol *hd = pr->head;
+ fprintf(f, "%d, %d, %d, ",
+ prod, pr->body_size, pr->head->num);
+ if (hd->struct_name.txt == NULL)
+ fprintf(f, "0 },\n");
+ else
+ fprintf(f, "sizeof(struct %.*s%s) },\n",
+ hd->struct_name.len,
+ hd->struct_name.txt,
+ hd->isref ? "*" : "");
+ } else
+ fprintf(f, "-1, -1, -1, -1 },\n");
}
fprintf(f, "};\n\n");
}
###### functions
static void gen_reduce(FILE *f, struct grammar *g, char *file,
- struct code_node *code)
+ struct code_node *pre_reduce)
{
int i;
fprintf(f, "#line 1 \"gen_reduce\"\n");
fprintf(f, "static int do_reduce(int prod, void **body, struct token_config *config, void *ret)\n");
fprintf(f, "{\n");
fprintf(f, "\tint ret_size = 0;\n");
- if (code)
- code_node_print(f, code, file);
+ if (pre_reduce)
+ code_node_print(f, pre_reduce, file);
fprintf(f, "#line 4 \"gen_reduce\"\n");
fprintf(f, "\tswitch(prod) {\n");
fprintf(f, "static void do_free(short sym, void *asn)\n");
fprintf(f, "{\n");
fprintf(f, "\tif (!asn) return;\n");
- fprintf(f, "\tif (sym < %d) {\n", g->first_nonterm);
+ fprintf(f, "\tif (sym < %d", g->first_nonterm);
+ /* Need to handle special terminals too */
+ for (i = 0; i < g->num_syms; i++) {
+ struct symbol *s = g->symtab[i];
+ if (i >= g->first_nonterm && s->type == Terminal &&
+ s->isspecial)
+ fprintf(f, " || sym == %d", s->num);
+ }
+ fprintf(f, ") {\n");
fprintf(f, "\t\tfree(asn);\n\t\treturn;\n\t}\n");
fprintf(f, "\tswitch(sym) {\n");
case 't':
tag = optarg; break;
default:
- fprintf(stderr, "Usage: parsergen ...\n");
+ fprintf(stderr, "Usage: parsergen -[05SL1R] [-t tag] [-o output] input\n");
exit(1);
}
}
### Memory allocation
The `scanner` returns tokens in a local variable - we want them in allocated
-memory so they can live in the `asn_stack`. Similarly the `asn` produced by
-a reduce is in a large buffer. Both of these require some allocation and
-copying, hence `memdup` and `tok_copy`.
+memory so they can live in the `asn_stack`. So we provide `tok_copy` to
+make an allocated copy as required.
###### parser includes
#include <memory.h>
###### parser functions
- void *memdup(void *m, int len)
- {
- void *ret;
- if (len == 0)
- return NULL;
- ret = malloc(len);
- memcpy(ret, m, len);
- return ret;
- }
-
static struct token *tok_copy(struct token tk)
{
struct token *new = malloc(sizeof(*new));
function. The symbol leads us to the right free function through
`do_free`.
-The `indents` count tracks the line indents with-in the symbol or
-immediately follow it. These are used to allow indent information to
-guide parsing and error recovery.
-
-`since_newline` tracks how many stack frames since the last
-start-of-line (whether indented or not). So if `since_newline` is
-zero, then this symbol is at the start of a line. Similarly
-`since_indent` counts the number of states since an indent, it is zero
-precisely when `indents` is not zero.
-
-`newline_permitted` keeps track of whether newlines should be ignored
-or not.
-
The stack is most properly seen as alternating states and symbols -
states, like the 'DOT' in items, are between symbols. Each frame in
our stack holds a state and the symbol that was before it. The
struct parser {
struct frame {
short state;
- short newline_permitted;
-
short sym;
- short indents;
- short since_newline;
- short since_indent;
} *stack;
void **asn_stack;
int stack_size;
reduce a production we will pop off frames corresponding to the body
symbols, then push on a frame for the head of the production. This last
is exactly the same process as shifting in a terminal so we use the same
-function for both. In both cases we provide the symbol, the number of
-indents the symbol contains (which will be zero for a terminal symbol)
-and a flag indicating the the symbol was at (or was reduced from a
-symbol which was at) the start of a line. The state is deduced from the
-current top-of-stack state and the new symbol.
+function for both. In both cases we provide the symbol. The state is
+deduced from the current top-of-stack state and the new symbol.
To simplify other code we arrange for `shift` to fail if there is no `goto`
state for the symbol. This is useful in basic parsing due to our design
So `shift` finds the next state. If that succeeds it extends the
allocations if needed and pushes all the information onto the stacks.
-Newlines are permitted after a `starts_line` state until an internal
-indent. If the new frame has neither a `starts_line` state nor an
-indent, newlines are permitted if the previous stack frame permitted
-them.
-
###### parser functions
static int shift(struct parser *p,
- short sym, short indents, short start_of_line,
- void *asn,
+ short sym, void *asn,
const struct state states[])
{
- // Push an entry onto the stack
struct frame next = {0};
int newstate = p->tos
? search(&states[p->stack[p->tos-1].state],
* sizeof(p->asn_stack[0]));
}
next.sym = sym;
- next.indents = indents;
next.state = newstate;
- if (states[newstate].starts_line)
- next.newline_permitted = 1;
- else if (indents)
- next.newline_permitted = 0;
- else if (p->tos)
- next.newline_permitted =
- p->stack[p->tos-1].newline_permitted;
- else
- next.newline_permitted = 0;
-
- if (!start_of_line) {
- if (p->tos)
- next.since_newline = p->stack[p->tos-1].since_newline + 1;
- else
- next.since_newline = 1;
- }
- if (indents)
- next.since_indent = 0;
- else if (p->tos)
- next.since_indent = p->stack[p->tos-1].since_indent + 1;
- else
- next.since_indent = 1;
p->stack[p->tos] = next;
p->asn_stack[p->tos] = asn;
}
`pop` primarily moves the top of stack (`tos`) back down the required
-amount and frees any `asn` entries that need to be freed. It also
-collects a summary of the indents and line starts in the symbols that
-are being removed. It is called _after_ we reduce a production, just
-before we `shift` the nonterminal in.
+amount and frees any `asn` entries that need to be freed. It is called
+_after_ we reduce a production, just before we `shift` the nonterminal
+in.
###### parser functions
- static int pop(struct parser *p, int num,
- short *start_of_line,
- void(*do_free)(short sym, void *asn))
+ static void pop(struct parser *p, int num,
+ void(*do_free)(short sym, void *asn))
{
int i;
- short indents = 0;
- int sol = 0;
p->tos -= num;
- for (i = 0; i < num; i++) {
- sol |= !p->stack[p->tos+i].since_newline;
- indents += p->stack[p->tos+i].indents;
- do_free(p->stack[p->tos+i].sym,
- p->asn_stack[p->tos+i]);
- }
- if (start_of_line)
- *start_of_line = sol;
- return indents;
+ for (i = 0; i < num; i++)
+ do_free(p->stack[p->tos+i].sym, p->asn_stack[p->tos+i]);
}
### The heart of the parser.
We return whatever `asn` was returned by reducing production zero.
-If we can neither shift nor reduce we have an error to handle. We pop
-single entries off the stack until we can shift the `TK_error` symbol,
-then drop input tokens until we find one we can shift into the new error
-state. We need to ensure that something is dropped or shifted after an
-error, or we could get into an infinite loop, only shifting in
-`TK_error`, then reducing. So we track if there has been a shift since
-the last error, and if not the next error always discards one token.
-
When we find `TK_in` and `TK_out` tokens which report indents we need
to handle them directly as the grammar cannot express what we want to
do with them.
Here the NEWLINE will be shifted because nothing can be reduced until
the `if` is seen.
-When during error handling we discard tokens read in, we want to keep
-discarding until we see one that is recognised. If we had a full set
-of LR(1) grammar states, this would mean looking in the look-ahead set,
-but we don't keep a full look-ahead set. We only record the subset
-that leads to SHIFT. We can, however, deduce the look-ahead set by
-looking at the SHIFT subsets for all states that we can get to by
-reducing zero or more times. So we need a little function which
-checks if a given token is in any of these look-ahead sets.
+We have already discussed the bulk of the handling of a "reduce" action,
+with the `pop()` and `shift()` functions doing much of the work. There
+is a little more complexity needed to manage storage for the asn (Abstract
+Syntax Node), and also a test of whether the reduction is permitted.
-###### parser includes
- #include "parser.h"
+When we try to shift the result of reducing production-zero, it will
+fail because there is no next state. In this case the asn will not have
+been stored on the stack, so it get stored in the `ret` variable, and we
+report that that input has been accepted.
-###### parser_run
+###### parser vars
- static int in_lookahead(struct token *tk, const struct state *states, int state)
- {
- while (state >= 0) {
- if (search(&states[state], tk->num) >= 0)
- return 1;
- if (states[state].reduce_prod < 0)
- return 0;
- state = search(&states[state], states[state].reduce_sym);
+ void *ret = NULL;
+
+###### try reduce
+
+ if (states[tos->state].reduce_prod >= 0) {
+ void **body;
+ void *res;
+ const struct state *nextstate = &states[tos->state];
+ int prod = nextstate->reduce_prod;
+ int size = nextstate->reduce_size;
+ int res_size = nextstate->result_size;
+
+ body = p.asn_stack + (p.tos - size);
+ res = res_size ? calloc(1, res_size) : NULL;
+ res_size = do_reduce(prod, body, config, res);
+ if (res_size != nextstate->result_size)
+ abort();
+ pop(&p, size, do_free);
+ if (!shift(&p, nextstate->reduce_sym, res, states)) {
+ accepted = 1;
+ ret = res;
+ parser_trace_action(trace, "Accept");
+ } else
+ parser_trace_action(trace, "Reduce");
+ continue;
+ }
+
+If we can neither shift nor reduce we have an error to handle. There
+are two possible responses to an error: we can pop single frames off the
+stack until we find a frame that can shift `TK_error`, or we can discard
+the current look-ahead symbol.
+
+When we first see an error we do the first of these and set a flag to
+record that we are processing an error. If the normal shift/reduce
+tests fail to find that anything can be done from that state, we start
+dropping tokens until either we manage to shift one, or reach end-of-file.
+
+###### parser vars
+
+ int in_err = 0;
+
+###### did shift
+
+ in_err = 0;
+
+###### handle error
+
+ if (in_err) {
+ parser_trace_action(trace, "DISCARD");
+ if (tk->num == TK_eof)
+ break;
+ free(tk);
+ tk = NULL;
+ } else {
+ struct token *err_tk;
+
+ parser_trace_action(trace, "ERROR");
+
+ err_tk = tok_copy(*tk);
+ while (p.tos > 0 && shift(&p, TK_error, err_tk, states) == 0)
+ // discard this state
+ pop(&p, 1, do_free);
+ if (p.tos == 0) {
+ free(err_tk);
+ // no state accepted TK_error
+ break;
}
- return 0;
+ in_err = 1;
}
+###### parser includes
+ #include "parser.h"
+
+###### parser_run
+
void *parser_run(struct token_state *tokens,
const struct state states[],
int (*do_reduce)(int, void**, struct token_config*, void*),
struct parser p = { 0 };
struct token *tk = NULL;
int accepted = 0;
- int shift_since_err = 1;
- void *ret = NULL;
+ ## parser vars
- shift(&p, TK_eof, 0, 1, NULL, states);
+ shift(&p, TK_eof, NULL, states);
while (!accepted && p.tos > 0) {
- struct token *err_tk;
struct frame *tos = &p.stack[p.tos-1];
if (!tk)
tk = tok_copy(token_next(tokens));
tk, states, non_term, config->known_count);
if (tk->num == TK_in) {
- tos->indents += 1;
- tos->since_newline = 0;
- tos->since_indent = 0;
- if (!states[tos->state].starts_line)
- tos->newline_permitted = 0;
free(tk);
tk = NULL;
parser_trace_action(trace, "Record");
continue;
}
if (tk->num == TK_out) {
- if (states[tos->state].reduce_size >= 0 &&
- states[tos->state].reduce_size <= tos->since_indent)
- goto force_reduce;
- if (states[tos->state].min_prefix >= tos->since_indent) {
+ if (1) {
// OK to cancel
- struct frame *in = tos - tos->since_indent;
- in->indents -= 1;
- if (in->indents == 0) {
- /* Reassess since_indent and newline_permitted */
- if (in > p.stack) {
- in->since_indent = in[-1].since_indent + 1;
- in->newline_permitted = in[-1].newline_permitted;
- } else {
- in->since_indent = 0;
- in->newline_permitted = 0;
- }
- if (states[in->state].starts_line)
- in->newline_permitted = 1;
- while (in < tos) {
- in += 1;
- in->since_indent = in[-1].since_indent + 1;
- if (states[in->state].starts_line)
- in->newline_permitted = 1;
- else
- in->newline_permitted = in[-1].newline_permitted;
- }
- }
+
free(tk);
tk = NULL;
parser_trace_action(trace, "Cancel");
// will fail.
}
if (tk->num == TK_newline) {
- if (!tos->newline_permitted) {
+ if (1) {
free(tk);
tk = NULL;
parser_trace_action(trace, "Discard");
continue;
}
- if (tos->since_newline > 1 &&
- states[tos->state].reduce_size >= 0 &&
- states[tos->state].reduce_size <= tos->since_newline)
- goto force_reduce;
}
- if (shift(&p, tk->num, 0, tk->num == TK_newline, tk, states)) {
- shift_since_err = 1;
+ if (shift(&p, tk->num, tk, states)) {
tk = NULL;
parser_trace_action(trace, "Shift");
+ ## did shift
continue;
}
- force_reduce:
- if (states[tos->state].reduce_prod >= 0 &&
- states[tos->state].newline_only &&
- !(tk->num == TK_newline ||
- tk->num == TK_eof ||
- tk->num == TK_out ||
- (tos->indents == 0 && tos->since_newline == 0))) {
- /* Anything other than newline or out or eof
- * in an error unless we are already at start
- * of line, as this production must end at EOL.
- */
- } else if (states[tos->state].reduce_prod >= 0) {
- void **body;
- void *res;
- const struct state *nextstate = &states[tos->state];
- int prod = nextstate->reduce_prod;
- int size = nextstate->reduce_size;
- int bufsize;
- static char buf[16*1024];
- short indents, start_of_line;
-
- body = p.asn_stack + (p.tos - size);
-
- bufsize = do_reduce(prod, body, config, buf);
-
- indents = pop(&p, size, &start_of_line,
- do_free);
- res = memdup(buf, bufsize);
- memset(buf, 0, bufsize);
- if (!shift(&p, nextstate->reduce_sym,
- indents, start_of_line,
- res, states)) {
- if (prod != 0) abort();
- accepted = 1;
- ret = res;
- }
- parser_trace_action(trace, "Reduce");
- continue;
- }
- /* Error. We walk up the stack until we
- * find a state which will accept TK_error.
- * We then shift in TK_error and see what state
- * that takes us too.
- * Then we discard input tokens until
- * we find one that is acceptable.
- */
- parser_trace_action(trace, "ERROR");
- short indents = 0, start_of_line;
-
- err_tk = tok_copy(*tk);
- while (p.tos > 0 &&
- shift(&p, TK_error, 0, 0,
- err_tk, states) == 0)
- // discard this state
- indents += pop(&p, 1, &start_of_line, do_free);
- if (p.tos == 0) {
- free(err_tk);
- // no state accepted TK_error
- break;
- }
- if (!shift_since_err) {
- /* must discard at least one token to avoid
- * infinite loop.
- */
- if (tk->num == TK_eof)
- break;
- free(tk);
- tk = tok_copy(token_next(tokens));
- }
- shift_since_err = 0;
- tos = &p.stack[p.tos-1];
- while (!in_lookahead(tk, states, tos->state) &&
- tk->num != TK_eof) {
- free(tk);
- tk = tok_copy(token_next(tokens));
- shift_since_err = 1;
- if (tk->num == TK_in)
- indents += 1;
- if (tk->num == TK_out) {
- if (indents == 0)
- break;
- indents -= 1;
- // FIXME update since_indent here
- }
- }
- tos->indents += indents;
+
+ ## try reduce
+ ## handle error
}
free(tk);
- pop(&p, p.tos, NULL, do_free);
+ pop(&p, p.tos, do_free);
free(p.asn_stack);
free(p.stack);
return ret;
[TK_newline] = "NEWLINE",
[TK_eof] = "$eof",
};
- static void parser_trace_state(FILE *trace, struct frame *f, const struct state states[])
- {
- fprintf(trace, "(%d", f->state);
- if (states[f->state].starts_line)
- fprintf(trace, "s");
- if (f->newline_permitted)
- fprintf(trace, "n%d", f->since_newline);
- fprintf(trace, ") ");
- }
void parser_trace(FILE *trace, struct parser *p,
struct token *tk, const struct state states[],
} else
fputs(non_term[sym - TK_reserved - knowns],
trace);
- if (f->indents)
- fprintf(trace, ".%d", f->indents);
- if (f->since_newline == 0)
- fputs("/", trace);
fputs(" ", trace);
}
- parser_trace_state(trace, f, states);
+ fprintf(trace, "(%d) ", f->state);
}
fprintf(trace, "[");
if (tk->num < TK_reserved &&