function. The symbol leads us to the right free function through
`do_free`.
-The `indents` count tracks the line indents with-in the symbol or
-immediately follow it. These are used to allow indent information to
-guide parsing and error recovery.
-
-`since_newline` tracks how many stack frames since the last
-start-of-line (whether indented or not). So if `since_newline` is
-zero, then this symbol is at the start of a line. Similarly
-`since_indent` counts the number of states since an indent, it is zero
-precisely when `indents` is not zero.
-
-`newline_permitted` keeps track of whether newlines should be ignored
-or not.
-
The stack is most properly seen as alternating states and symbols -
states, like the 'DOT' in items, are between symbols. Each frame in
our stack holds a state and the symbol that was before it. The
struct parser {
struct frame {
short state;
- short newline_permitted;
-
short sym;
- short indents;
- short since_newline;
- short since_indent;
} *stack;
void **asn_stack;
int stack_size;
reduce a production we will pop off frames corresponding to the body
symbols, then push on a frame for the head of the production. This last
is exactly the same process as shifting in a terminal so we use the same
-function for both. In both cases we provide the symbol, the number of
-indents the symbol contains (which will be zero for a terminal symbol)
-and a flag indicating the the symbol was at (or was reduced from a
-symbol which was at) the start of a line. The state is deduced from the
-current top-of-stack state and the new symbol.
+function for both. In both cases we provide the symbol. The state is
+deduced from the current top-of-stack state and the new symbol.
To simplify other code we arrange for `shift` to fail if there is no `goto`
state for the symbol. This is useful in basic parsing due to our design
###### parser functions
static int shift(struct parser *p,
- short sym, short indents, short start_of_line,
- void *asn,
+ short sym, void *asn,
const struct state states[])
{
- // Push an entry onto the stack
struct frame next = {0};
int newstate = p->tos
? search(&states[p->stack[p->tos-1].state],
* sizeof(p->asn_stack[0]));
}
next.sym = sym;
- next.indents = indents;
next.state = newstate;
- if (!start_of_line) {
- if (p->tos)
- next.since_newline = p->stack[p->tos-1].since_newline + 1;
- else
- next.since_newline = 1;
- }
- if (indents)
- next.since_indent = 0;
- else if (p->tos)
- next.since_indent = p->stack[p->tos-1].since_indent + 1;
- else
- next.since_indent = 1;
-
p->stack[p->tos] = next;
p->asn_stack[p->tos] = asn;
p->tos++;
}
`pop` primarily moves the top of stack (`tos`) back down the required
-amount and frees any `asn` entries that need to be freed. It also
-collects a summary of the indents and line starts in the symbols that
-are being removed. It is called _after_ we reduce a production, just
-before we `shift` the nonterminal in.
+amount and frees any `asn` entries that need to be freed. It is called
+_after_ we reduce a production, just before we `shift` the nonterminal
+in.
###### parser functions
- static int pop(struct parser *p, int num,
- short *start_of_line,
- void(*do_free)(short sym, void *asn))
+ static void pop(struct parser *p, int num,
+ void(*do_free)(short sym, void *asn))
{
int i;
- short indents = 0;
- int sol = 0;
p->tos -= num;
- for (i = 0; i < num; i++) {
- sol |= !p->stack[p->tos+i].since_newline;
- indents += p->stack[p->tos+i].indents;
- do_free(p->stack[p->tos+i].sym,
- p->asn_stack[p->tos+i]);
- }
- if (start_of_line)
- *start_of_line = sol;
- return indents;
+ for (i = 0; i < num; i++)
+ do_free(p->stack[p->tos+i].sym, p->asn_stack[p->tos+i]);
}
### The heart of the parser.
int shift_since_err = 1;
void *ret = NULL;
- shift(&p, TK_eof, 0, 1, NULL, states);
+ shift(&p, TK_eof, NULL, states);
while (!accepted && p.tos > 0) {
struct token *err_tk;
struct frame *tos = &p.stack[p.tos-1];
tk, states, non_term, config->known_count);
if (tk->num == TK_in) {
- tos->indents += 1;
- tos->since_newline = 0;
- tos->since_indent = 0;
free(tk);
tk = NULL;
parser_trace_action(trace, "Record");
continue;
}
if (tk->num == TK_out) {
- if (states[tos->state].reduce_size >= 0 &&
- states[tos->state].reduce_size <= tos->since_indent)
- goto force_reduce;
if (1) {
// OK to cancel
- struct frame *in = tos - tos->since_indent;
- in->indents -= 1;
- if (in->indents == 0) {
- /* Reassess since_indent and newline_permitted */
- if (in > p.stack) {
- in->since_indent = in[-1].since_indent + 1;
- in->newline_permitted = in[-1].newline_permitted;
- } else {
- in->since_indent = 0;
- in->newline_permitted = 0;
- }
- while (in < tos) {
- in += 1;
- in->since_indent = in[-1].since_indent + 1;
- }
- }
+
free(tk);
tk = NULL;
parser_trace_action(trace, "Cancel");
// will fail.
}
if (tk->num == TK_newline) {
- if (!tos->newline_permitted) {
+ if (1) {
free(tk);
tk = NULL;
parser_trace_action(trace, "Discard");
continue;
}
- if (tos->since_newline > 1 &&
- states[tos->state].reduce_size >= 0 &&
- states[tos->state].reduce_size <= tos->since_newline)
- goto force_reduce;
}
- if (shift(&p, tk->num, 0, tk->num == TK_newline, tk, states)) {
+ if (shift(&p, tk->num, tk, states)) {
shift_since_err = 1;
tk = NULL;
parser_trace_action(trace, "Shift");
continue;
}
- force_reduce:
- if (states[tos->state].reduce_prod >= 0 &&
- !(tk->num == TK_newline ||
- tk->num == TK_eof ||
- tk->num == TK_out ||
- (tos->indents == 0 && tos->since_newline == 0))) {
- /* Anything other than newline or out or eof
- * in an error unless we are already at start
- * of line, as this production must end at EOL.
- */
- } else if (states[tos->state].reduce_prod >= 0) {
+
+ if (states[tos->state].reduce_prod >= 0) {
void **body;
void *res;
const struct state *nextstate = &states[tos->state];
int prod = nextstate->reduce_prod;
int size = nextstate->reduce_size;
int res_size = nextstate->result_size;
- short indents, start_of_line;
body = p.asn_stack + (p.tos - size);
res = res_size ? calloc(1, res_size) : NULL;
if (res_size != nextstate->result_size)
abort();
- indents = pop(&p, size, &start_of_line,
- do_free);
+ pop(&p, size, do_free);
if (!shift(&p, nextstate->reduce_sym,
- indents, start_of_line,
res, states)) {
if (prod != 0) abort();
accepted = 1;
* we find one that is acceptable.
*/
parser_trace_action(trace, "ERROR");
- short indents = 0, start_of_line;
err_tk = tok_copy(*tk);
while (p.tos > 0 &&
- shift(&p, TK_error, 0, 0,
- err_tk, states) == 0)
+ shift(&p, TK_error, err_tk, states) == 0)
// discard this state
- indents += pop(&p, 1, &start_of_line, do_free);
+ pop(&p, 1, do_free);
if (p.tos == 0) {
free(err_tk);
// no state accepted TK_error
free(tk);
tk = tok_copy(token_next(tokens));
shift_since_err = 1;
- if (tk->num == TK_in)
- indents += 1;
- if (tk->num == TK_out) {
- if (indents == 0)
- break;
- indents -= 1;
- // FIXME update since_indent here
- }
}
- tos->indents += indents;
}
free(tk);
- pop(&p, p.tos, NULL, do_free);
+ pop(&p, p.tos, do_free);
free(p.asn_stack);
free(p.stack);
return ret;
} else
fputs(non_term[sym - TK_reserved - knowns],
trace);
- if (f->indents)
- fprintf(trace, ".%d", f->indents);
- if (f->since_newline == 0)
- fputs("/", trace);
fputs(" ", trace);
}
fprintf(trace, "(%d) ", f->state);