So `shift` finds the next state. If that succeeds it extends the
allocations if needed and pushes all the information onto the stacks.
+An extra complication is added to `shift` by the `EOL` token. This
+token must be generated when a `NEWLINE` is seen, but an `EOL` is
+expected. When this happens, the `NEWLINE` is NOT consumed, so multiple
+EOL can appear before a NEWLINE. To indicate that the token was shifted
+by not consumed, `shift` can return the special value `2`. The token
+number for `EOL` cannot be statically declared, so when the parser
+starts we need to look through the array of non-terminals to find the
+EOL.
+
+###### parser state
+ int tk_eol;
+
+###### find eol
+ p.tk_eol = 0;
+ while (strcmp(non_term[p.tk_eol], "EOL") != 0)
+ p.tk_eol += 1;
+ p.tk_eol += TK_reserved + config->known_count;
+
+
###### parser functions
static int shift(struct parser *p,
const struct state states[])
{
struct frame next = {0};
+ int ret;
int newstate = p->tos
? search(&states[p->stack[p->tos-1].state],
sym)
: 0;
- if (newstate < 0)
+ if (newstate >= 0)
+ ret = 1;
+ else if (sym != TK_newline)
return 0;
+ else {
+ // have a NEWLINE, might need an EOL
+ sym = p->tk_eol;
+ newstate = p->tos
+ ? search(&states[p->stack[p->tos-1].state],
+ sym)
+ : 0;
+ if (newstate < 0)
+ return 0;
+ ret = 2;
+ asn = tok_copy(*(struct token*)asn);
+ }
+
if (p->tos >= p->stack_size) {
p->stack_size += 10;
p->stack = realloc(p->stack, p->stack_size
p->stack[p->tos] = next;
p->asn_stack[p->tos] = asn;
p->tos++;
- return 1;
+ return ret;
}
`pop` primarily moves the top of stack (`tos`) back down the required
### The heart of the parser.
Now we have the parser. For each token we might shift it, trigger a
-reduction, or start error handling. 2D tokens (IN, OUT, NEWLINE) might
-also be ignored. Ignoring tokens is combined with shifting.
+reduction, or start error handling. 2D tokens (IN, OUT, NEWLINE, EOL)
+might also be ignored. Ignoring tokens is combined with shifting.
###### parser vars
unsigned long ignored_indents;
int indent_depth;
-NEWLINE is ignored when in an indented section of text which was not
+NEWLINE/EOL is ignored when in an indented section of text which was not
explicitly expected by the grammar. So if the most recent indent is
ignored, so is any EOL token.
continue;
}
- if (tk->num == TK_newline) {
- if (1) {
- free(tk);
- tk = NULL;
- parser_trace_action(trace, "Discard");
- continue;
- }
- }
- if (shift(&p, tk->num, tk, states)) {
+ switch (shift(&p, tk->num, tk, states)) {
+ case 1:
if (tk->num == TK_out)
p.indent_depth -= 1;
if (tk->num == TK_in) {
p.ignored_indents &= ~(1 << p.indent_depth);
}
tk = NULL;
- parser_trace_action(trace, "Shift");
+ /* fallthrough */
+ case 2:
+ parser_trace_action(trace, tk ? "ShiftEOL" : "Shift");
## did shift
continue;
}
{
## parser vars
+ ## find eol
+
## heart of parser
free(tk);