From 5ff6fc7d1243fa3fdbd87e2523c3136d4faed853 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 24 Nov 2013 17:54:02 +1100 Subject: [PATCH] parsergen: add handling for TK_IN and TK_OUT Intents are tracked. The end of an indented region forces certain reductions. And indents are managed during error handling. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 64 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index eb63507..d2d70d5 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -2260,13 +2260,17 @@ We keep the stack as two separate allocations. One, `asn_stack` stores the production, and by keeping a separate `asn` stack, we can just pass a pointer into this stack. -The other allocation stores all other stack fields of which there are two. +The other allocation stores all other stack fields of which there are four. The `state` is the most important one and guides the parsing process. The `sym` is nearly unnecessary. However when we want to free entries from the `asn_stack`, it helps to know what type they are so we can call the right freeing function. The symbol leads us to the right free function through `do_free`. +The `indents` count and the `starts_indented` flag track the line +indents in the symbol. These are used to allow indent information to +guide parsing and error recovery. + As well as the stack of frames we have a `next` frame which is assembled from the incoming token and other information prior to pushing it onto the stack. @@ -2277,6 +2281,8 @@ pushing it onto the stack. struct frame { short state; short sym; + short starts_indented; + short indents; } *stack, next; void **asn_stack; int stack_size; @@ -2322,6 +2328,8 @@ if needed and pushed all the information onto the stacks. p->asn_stack[p->tos] = asn; p->tos++; p->next.state = newstate; + p->next.indents = 0; + p->next.starts_indented = 0; return 1; } @@ -2336,12 +2344,16 @@ reduce a production, just before we `shift` the nonterminal in. { int i; p->tos -= num; - for (i = 0; i < num; i++) + for (i = 0; i < num; i++) { + p->next.indents += p->stack[p->tos+i].indents; do_free(p->stack[p->tos+i].sym, p->asn_stack[p->tos+i]); + } - if (num) + if (num) { p->next.state = p->stack[p->tos].state; + p->next.starts_indented = p->stack[p->tos].starts_indented; + } } ### Memory allocation @@ -2385,6 +2397,17 @@ If we can neither shift nor reduce we have an error to handle. We pop single entries off the stack until we can shift the `TK_error` symbol, then drop input tokens until we find one we can shift into the new error state. +When we find `TK_in` and `TK_out` tokens which report indents we need +to handle them directly as the grammar cannot express what we want to +do with them. + +`TK_in` tokens are easy: we simply update the `next` stack frame to +record how many indents there are and that the next token started with +an indent. + +`TK_out` tokens must either be counted off against any pending indent, +or must force reductions until there is a pending indent which isn't +at the start of a production. ###### parser includes #include "parser.h" @@ -2408,6 +2431,25 @@ drop input tokens until we find one we can shift into the new error state. if (trace) parser_trace(trace, &p, tk, states, non_term, knowns); + if (p.next.sym == TK_in) { + p.next.starts_indented = 1; + p.next.indents = 1; + free(tk); + tk = NULL; + continue; + } + if (p.next.sym == TK_out) { + if (p.stack[p.tos-1].indents > p.stack[p.tos-1].starts_indented || + (p.stack[p.tos-1].indents == 1 && + states[p.next.state].reduce_size > 1)) { + p.stack[p.tos-1].indents -= 1; + free(tk); + tk = NULL; + continue; + } + // fall through and force a REDUCE (as 'shift' + // will fail). + } if (shift(&p, tk, states)) { tk = NULL; continue; @@ -2431,6 +2473,15 @@ drop input tokens until we find one we can shift into the new error state. accepted = 1; continue; } + if (tk->num == TK_out) { + // Indent problem - synthesise tokens to get us + // out of here. + fprintf(stderr, "Synthesize %d to handle indent problem\n", states[p.next.state].shift_sym); + p.next.sym = states[p.next.state].shift_sym; + shift(&p, tok_copy(*tk), states); + // FIXME need to report this error somehow + continue; + } /* Error. We walk up the stack until we * find a state which will accept TK_error. * We then shift in TK_error and see what state @@ -2454,6 +2505,13 @@ drop input tokens until we find one we can shift into the new error state. tk->num != TK_eof) { free(tk); tk = tok_copy(token_next(tokens)); + if (tk->num == TK_in) + p.next.indents += 1; + if (tk->num == TK_out) { + if (p.next.indents == 0) + break; + p.next.indents -= 1; + } } if (p.tos == 0 && tk->num == TK_eof) break; -- 2.43.0