]> ocean-lang.org Git - ocean/blobdiff - csrc/parsergen.mdc
pargergen: typo: i, not 1.
[ocean] / csrc / parsergen.mdc
index 19703bce82000b2ce642a29e9e6ad5c15bd91d3a..cfaa5c4124ac811ec67b974dac3ebb1e66dc1c71 100644 (file)
@@ -869,28 +869,32 @@ changes happen.
                }
        }
 
                }
        }
 
-### Setting `can_eol`
+### Setting `can_eol` and `line_like`
 
 In order to be able to ignore newline tokens when not relevant, but
 still include them in the parse when needed, we will need to know
 which states can start a "line-like" section of code.  We ignore
 newlines when there is an indent since the most recent start of a
 
 In order to be able to ignore newline tokens when not relevant, but
 still include them in the parse when needed, we will need to know
 which states can start a "line-like" section of code.  We ignore
 newlines when there is an indent since the most recent start of a
-line-like section.
+line-like symbol.
 
 
-To know what is line-like, we first need to know which symbols can end
-a line-like section, which is precisely those which can end with a
-newline token.  These symbols don't necessarily alway end with a
-newline, but they can.  Hence they are not described as "lines" but
-only "line-like".
+To know which symbols are line-like, we first need to know which
+symbols start with a NEWLINE token.  Any symbol which is followed by a
+NEWLINE, or anything that starts with a NEWLINE, is deemed to be a line-like symbol.
+Certainly when trying to parse one of these we must take not of NEWLINEs.
 
 
-Clearly the `TK_newline` token can end with a newline.  Any symbol
-which is the head of a production that contains a line-ending symbol
-followed only by nullable symbols is also a line-ending symbol.  We
-use a new field `can_eol` to record this attribute of symbols, and
-compute it in a repetitive manner similar to `set_nullable`.
+Clearly the `TK_newline` token can start with a NEWLINE.  Any symbol
+which is the head of a production that contains a starts-with-NEWLINE
+symbol preceeded only by nullable symbols is also a
+starts-with-NEWLINE symbol.  We use a new field `can_eol` to record
+this attribute of symbols, and compute it in a repetitive manner
+similar to `set_nullable`.
+
+Once we have that, we can determine which symbols are `line_like` be
+seeing which are followed by a `can_eol` symbol in any production.
 
 ###### symbol fields
        int can_eol;
 
 ###### symbol fields
        int can_eol;
+       int line_like;
 
 ###### functions
        static void set_can_eol(struct grammar *g)
 
 ###### functions
        static void set_can_eol(struct grammar *g)
@@ -907,7 +911,7 @@ compute it in a repetitive manner similar to `set_nullable`.
                                if (pr->head->can_eol)
                                        continue;
 
                                if (pr->head->can_eol)
                                        continue;
 
-                               for (s = pr->body_size - 1; s >= 0; s--) {
+                               for (s = 0 ; s < pr->body_size; s++) {
                                        if (pr->body[s]->can_eol) {
                                                pr->head->can_eol = 1;
                                                check_again = 1;
                                        if (pr->body[s]->can_eol) {
                                                pr->head->can_eol = 1;
                                                check_again = 1;
@@ -920,6 +924,19 @@ compute it in a repetitive manner similar to `set_nullable`.
                }
        }
 
                }
        }
 
+       static void set_line_like(struct grammar *g)
+       {
+               int p;
+               for (p = 0; p < g->production_count; p++) {
+                       struct production *pr = g->productions[p];
+                       int s;
+
+                       for (s = 1; s < pr->body_size; s++)
+                               if (pr->body[s]->can_eol)
+                                       pr->body[s-1]->line_like = 1;
+               }
+       }
+
 ### Building the `first` sets
 
 When calculating what can follow a particular non-terminal, we will need to
 ### Building the `first` sets
 
 When calculating what can follow a particular non-terminal, we will need to
@@ -1161,6 +1178,7 @@ should happen, so we don't need to search a second time.
                struct symset go_to;
                char completed;
                char starts_line;
                struct symset go_to;
                char completed;
                char starts_line;
+               int min_prefix;
        };
 
 ###### grammar fields
        };
 
 ###### grammar fields
@@ -1198,7 +1216,7 @@ recalculated and their LA sets updated.
 them to a data structure, of freeing them.
 
        static int add_itemset(struct grammar *g, struct symset ss,
 them to a data structure, of freeing them.
 
        static int add_itemset(struct grammar *g, struct symset ss,
-                              enum grammar_type type, int starts_line)
+                              enum grammar_type type)
        {
                struct itemset **where, *is;
                int i;
        {
                struct itemset **where, *is;
                int i;
@@ -1210,7 +1228,6 @@ them to a data structure, of freeing them.
                        is->items = ss;
                        is->next = *where;
                        is->go_to = INIT_DATASET;
                        is->items = ss;
                        is->next = *where;
                        is->go_to = INIT_DATASET;
-                       is->starts_line = starts_line;
                        *where = is;
                        return is->state;
                }
                        *where = is;
                        return is->state;
                }
@@ -1252,6 +1269,8 @@ be supplemented by the LA set for the item which produce the new item.
 
 We also collect a set of all symbols which follow "DOT" (in `done`) as this
 is used in the next stage.
 
 We also collect a set of all symbols which follow "DOT" (in `done`) as this
 is used in the next stage.
+If any of these symbols are flagged as starting a line, then this
+state must be a `starts_line` state so now is a good time to record that.
 
 NOTE: precedence handling should happen here - I haven't written this yet
 though.
 
 NOTE: precedence handling should happen here - I haven't written this yet
 though.
@@ -1266,11 +1285,17 @@ though.
                struct symset LA = INIT_SYMSET;
                unsigned short sn = 0;
 
                struct symset LA = INIT_SYMSET;
                unsigned short sn = 0;
 
+               if (is->min_prefix == 0 ||
+                   (bs > 0 && bs < is->min_prefix))
+                       is->min_prefix = bs;
                if (bs == pr->body_size)
                        continue;
                s = pr->body[bs];
                if (bs == pr->body_size)
                        continue;
                s = pr->body[bs];
-               if (symset_find(&done, s->num) < 0)
+               if (symset_find(&done, s->num) < 0) {
                        symset_add(&done, s->num, 0);
                        symset_add(&done, s->num, 0);
+                       if (s->line_like)
+                               is->starts_line = 1;
+               }
                if (s->type != Nonterminal)
                        continue;
                again = 1;
                if (s->type != Nonterminal)
                        continue;
                again = 1;
@@ -1324,15 +1349,11 @@ with a pre-existing itemset).
        for (i = 0; i < done.cnt; i++) {
                int j;
                unsigned short state;
        for (i = 0; i < done.cnt; i++) {
                int j;
                unsigned short state;
-               int starts_line = 0;
                struct symbol *sym = g->symtab[done.syms[i]];
                struct symset newitemset = INIT_SYMSET;
                if (type >= LALR)
                        newitemset = INIT_DATASET;
 
                struct symbol *sym = g->symtab[done.syms[i]];
                struct symset newitemset = INIT_SYMSET;
                if (type >= LALR)
                        newitemset = INIT_DATASET;
 
-               if (sym->can_eol ||
-                   (sym->nullable && is->starts_line))
-                       starts_line = 1;
                for (j = 0; j < is->items.cnt; j++) {
                        int itm = is->items.syms[j];
                        int p = item_prod(itm);
                for (j = 0; j < is->items.cnt; j++) {
                        int itm = is->items.syms[j];
                        int p = item_prod(itm);
@@ -1365,7 +1386,7 @@ with a pre-existing itemset).
                                }
                        }
                }
                                }
                        }
                }
-               state = add_itemset(g, newitemset, type, starts_line);
+               state = add_itemset(g, newitemset, type);
                if (symset_find(&is->go_to, done.syms[i]) < 0)
                        symset_add(&is->go_to, done.syms[i], state);
        }
                if (symset_find(&is->go_to, done.syms[i]) < 0)
                        symset_add(&is->go_to, done.syms[i], state);
        }
@@ -1389,7 +1410,7 @@ with `TK_eof` as the LA set.
                }
                // production 0, offset 0 (with no data)
                symset_add(&first, item_num(0, 0), la);
                }
                // production 0, offset 0 (with no data)
                symset_add(&first, item_num(0, 0), la);
-               add_itemset(g, first, type, g->productions[0]->body[0]->can_eol);
+               add_itemset(g, first, type);
                for (again = 0, is = g->items;
                     is;
                     is = is->next ?: again ? (again = 0, g->items) : NULL) {
                for (again = 0, is = g->items;
                     is;
                     is = is->next ?: again ? (again = 0, g->items) : NULL) {
@@ -1450,6 +1471,7 @@ changeover point in `first_nonterm`.
 
                set_nullable(g);
                set_can_eol(g);
 
                set_nullable(g);
                set_can_eol(g);
+               set_line_like(g);
                if (type >= SLR)
                        build_first(g);
 
                if (type >= SLR)
                        build_first(g);
 
@@ -1481,7 +1503,8 @@ all the tables that have been generated, plus a description of any conflicts.
 
 Firstly we have the complete list of symbols, together with the
 "FIRST" set if that was generated.  We add a mark to each symbol to
 
 Firstly we have the complete list of symbols, together with the
 "FIRST" set if that was generated.  We add a mark to each symbol to
-show if it can end in a newline (`>`), or if it is nullable (`.`).
+show if it can end in a newline (`>`), if it is considered to be
+"line-like" (`<`), or if it is nullable (`.`).
 
 ###### functions
 
 
 ###### functions
 
@@ -1498,9 +1521,10 @@ show if it can end in a newline (`>`), or if it is nullable (`.`).
                        if (!s)
                                continue;
 
                        if (!s)
                                continue;
 
-                       printf(" %c%c%3d%c: ",
+                       printf(" %c%c%c%3d%c: ",
                               s->nullable ? '.':' ',
                               s->can_eol ? '>':' ',
                               s->nullable ? '.':' ',
                               s->can_eol ? '>':' ',
+                              s->line_like ? '<':' ',
                               s->num, symtypes[s->type]);
                        prtxt(s->name);
                        if (s->precedence)
                               s->num, symtypes[s->type]);
                        prtxt(s->name);
                        if (s->precedence)
@@ -1609,7 +1633,8 @@ Now we can report all the item sets complete with items, LA sets, and GO TO.
                for (s = 0; s < g->states; s++) {
                        int j;
                        struct itemset *is = g->statetab[s];
                for (s = 0; s < g->states; s++) {
                        int j;
                        struct itemset *is = g->statetab[s];
-                       printf("  Itemset %d:%s\n", s, is->starts_line?" (startsline)":"");
+                       printf("  Itemset %d:%s min prefix=%d\n",
+                              s, is->starts_line?" (startsline)":"", is->min_prefix);
                        for (j = 0; j < is->items.cnt; j++) {
                                report_item(g, is->items.syms[j]);
                                if (is->items.data != NO_DATA)
                        for (j = 0; j < is->items.cnt; j++) {
                                report_item(g, is->items.syms[j]);
                                if (is->items.data != NO_DATA)
@@ -1873,6 +1898,7 @@ The go to table is stored in a simple array of `sym` and corresponding
                short reduce_sym;
                short shift_sym;
                short starts_line;
                short reduce_sym;
                short shift_sym;
                short starts_line;
+               short min_prefix;
        };
 
 
        };
 
 
@@ -1929,15 +1955,15 @@ The go to table is stored in a simple array of `sym` and corresponding
                        }
 
                        if (prod >= 0)
                        }
 
                        if (prod >= 0)
-                               fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0, %d },\n",
+                               fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, 0, %d, %d },\n",
                                        i, is->go_to.cnt, i, prod,
                                        g->productions[prod]->body_size,
                                        g->productions[prod]->head->num,
                                        i, is->go_to.cnt, i, prod,
                                        g->productions[prod]->body_size,
                                        g->productions[prod]->head->num,
-                                       is->starts_line);
+                                       is->starts_line, is->min_prefix);
                        else
                        else
-                               fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n",
+                               fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d, %d },\n",
                                        i, is->go_to.cnt, i, shift_sym,
                                        i, is->go_to.cnt, i, shift_sym,
-                                       is->starts_line);
+                                       is->starts_line, is->min_prefix);
                }
                fprintf(f, "};\n\n");
        }
                }
                fprintf(f, "};\n\n");
        }
@@ -2411,17 +2437,23 @@ The `state` is the most important one and guides the parsing process.  The
 freeing function.  The symbol leads us to the right free function through
 `do_free`.
 
 freeing function.  The symbol leads us to the right free function through
 `do_free`.
 
-The `indents` count and the `starts_indented` flag track the line
-indents in the symbol.  These are used to allow indent information to
+The `indents` count tracks the line indents with in the symbol or
+immediately follow it.  These are used to allow indent information to
 guide parsing and error recovery.
 
 guide parsing and error recovery.
 
+`since_newline` tracks how many stack frames since the last
+start-of-line (whether indented or not).  So if `since_newline` is
+zero, then this symbol is at the start of a line.  Similarly
+`since_indent` counts the number of states since an indent, it is zero
+precisely when `indents` is not zero.
+
 `newline_permitted` keeps track of whether newlines should be ignored
 `newline_permitted` keeps track of whether newlines should be ignored
-or not, and `starts_line` records if this state stated on a newline.
+or not.
 
 
-The stack is more properly seen as alternating states and symbols -
+The stack is most properly seen as alternating states and symbols -
 states, like the 'DOT' in items, are between symbols.  Each frame in
 our stack holds a state and the symbol that was before it.  The
 states, like the 'DOT' in items, are between symbols.  Each frame in
 our stack holds a state and the symbol that was before it.  The
-bottom of stack holds the start state, but no symbol, as nothing came
+bottom of stack holds the start state but no symbol, as nothing came
 before the beginning.
 
 ###### parser functions
 before the beginning.
 
 ###### parser functions
@@ -2432,8 +2464,9 @@ before the beginning.
                        short newline_permitted;
 
                        short sym;
                        short newline_permitted;
 
                        short sym;
-                       short starts_indented;
                        short indents;
                        short indents;
+                       short since_newline;
+                       short since_indent;
                } *stack;
                void **asn_stack;
                int stack_size;
                } *stack;
                void **asn_stack;
                int stack_size;
@@ -2444,12 +2477,15 @@ before the beginning.
 
 Two operations are needed on the stack - shift (which is like push) and pop.
 
 
 Two operations are needed on the stack - shift (which is like push) and pop.
 
-Shift applies not only to terminals but also to non-terminals.  When we
-reduce a production we will pop off entries corresponding to the body
-symbols, then push on an item for the head of the production.  This last is
-exactly the same process as shifting in a terminal so we use the same
-function for both.  In both cases we provide a stack frame which
-contains the symbol to shift and related indent information.
+Shift applies not only to terminals but also to non-terminals.  When
+we reduce a production we will pop off entries corresponding to the
+body symbols, then push on an item for the head of the production.
+This last is exactly the same process as shifting in a terminal so we
+use the same function for both.  In both cases we provide the symbol,
+the number of indents the symbol contains (which will be zero for a
+terminal symbol) and a flag indicating the the symbol was at (or was
+reduced from a symbol which was at) the start of a line.  The state is
+deduced from the current top-of-stack state and the new symbol.
 
 To simplify other code we arrange for `shift` to fail if there is no `goto`
 state for the symbol.  This is useful in basic parsing due to our design
 
 To simplify other code we arrange for `shift` to fail if there is no `goto`
 state for the symbol.  This is useful in basic parsing due to our design
@@ -2459,19 +2495,26 @@ function reports if it could.
 `shift` is also used to push state zero onto the stack, so if the
 stack is empty, it always chooses zero as the next state.
 
 `shift` is also used to push state zero onto the stack, so if the
 stack is empty, it always chooses zero as the next state.
 
-So `shift` finds the next state.  If that succeed it extends the allocations
-if needed and pushes all the information onto the stacks.
+So `shift` finds the next state.  If that succeeds it extends the
+allocations if needed and pushes all the information onto the stacks.
+
+Newlines are permitted after a `starts_line` state until an internal
+indent.  If the new frame has neither a `starts_line` state nor an
+indent, newlines are permitted if the previous stack frame permitted
+them.
 
 ###### parser functions
 
 
 ###### parser functions
 
-       static int shift(struct parser *p, struct frame *next,
+       static int shift(struct parser *p,
+                        short sym, short indents, short start_of_line,
                         void *asn,
                         const struct state states[])
        {
                // Push an entry onto the stack
                         void *asn,
                         const struct state states[])
        {
                // Push an entry onto the stack
+               struct frame next = {0};
                int newstate = p->tos
                        ? search(&states[p->stack[p->tos-1].state],
                int newstate = p->tos
                        ? search(&states[p->stack[p->tos-1].state],
-                                next->sym)
+                                sym)
                        : 0;
                if (newstate < 0)
                        return 0;
                        : 0;
                if (newstate < 0)
                        return 0;
@@ -2482,16 +2525,33 @@ if needed and pushes all the information onto the stacks.
                        p->asn_stack = realloc(p->asn_stack, p->stack_size
                                           * sizeof(p->asn_stack[0]));
                }
                        p->asn_stack = realloc(p->asn_stack, p->stack_size
                                           * sizeof(p->asn_stack[0]));
                }
-               next->state = newstate;
-               next->newline_permitted = 0;
-               if (p->tos)
-                       next->newline_permitted =
-                               p->stack[p->tos-1].newline_permitted;
-               if (next->indents)
-                       next->newline_permitted = 0;
+               next.sym = sym;
+               next.indents = indents;
+               next.state = newstate;
                if (states[newstate].starts_line)
                if (states[newstate].starts_line)
-                       next->newline_permitted = 1;
-               p->stack[p->tos] = *next;
+                       next.newline_permitted = 1;
+               else if (indents)
+                       next.newline_permitted = 0;
+               else if (p->tos)
+                       next.newline_permitted =
+                               p->stack[p->tos-1].newline_permitted;
+               else
+                       next.newline_permitted = 0;
+
+               if (!start_of_line) {
+                       if (p->tos)
+                               next.since_newline = p->stack[p->tos-1].since_newline + 1;
+                       else
+                               next.since_newline = 1;
+               }
+               if (indents)
+                       next.since_indent = 0;
+               else if (p->tos)
+                       next.since_indent = p->stack[p->tos-1].since_indent + 1;
+               else
+                       next.since_indent = 1;
+
+               p->stack[p->tos] = next;
                p->asn_stack[p->tos] = asn;
                p->tos++;
                return 1;
                p->asn_stack[p->tos] = asn;
                p->tos++;
                return 1;
@@ -2499,26 +2559,30 @@ if needed and pushes all the information onto the stacks.
 
 `pop` primarily moves the top of stack (`tos`) back down the required
 amount and frees any `asn` entries that need to be freed.  It also
 
 `pop` primarily moves the top of stack (`tos`) back down the required
 amount and frees any `asn` entries that need to be freed.  It also
-collects a summary of the indents in the symbols that are being
-removed. It is called _after_ we reduce a production, just before we
-`shift` the nonterminal in.
-
-`pop` is only called if there are entries to remove, so `num` is never zero.
+collects a summary of the indents and line starts in the symbols that
+are being removed. It is called _after_ we reduce a production, just
+before we `shift` the nonterminal in.
 
 ###### parser functions
 
 
 ###### parser functions
 
-       static void pop(struct parser *p, int num, struct frame *next,
-                       void(*do_free)(short sym, void *asn))
+       static int pop(struct parser *p, int num,
+                      short *start_of_line,
+                      void(*do_free)(short sym, void *asn))
        {
                int i;
        {
                int i;
+               short indents = 0;
+               int sol = 0;
+
                p->tos -= num;
                p->tos -= num;
-               next->starts_indented = p->stack[p->tos].starts_indented;
-               next->indents = 0;
                for (i = 0; i < num; i++) {
                for (i = 0; i < num; i++) {
-                       next->indents += p->stack[p->tos+i].indents;
+                       sol |= !p->stack[p->tos+i].since_newline;
+                       indents += p->stack[p->tos+i].indents;
                        do_free(p->stack[p->tos+i].sym,
                                p->asn_stack[p->tos+i]);
                }
                        do_free(p->stack[p->tos+i].sym,
                                p->asn_stack[p->tos+i]);
                }
+               if (start_of_line)
+                       *start_of_line = sol;
+               return indents;
        }
 
 ### Memory allocation
        }
 
 ### Memory allocation
@@ -2552,9 +2616,9 @@ copying, hence `memdup` and `tokcopy`.
 
 ### The heart of the parser.
 
 
 ### The heart of the parser.
 
-Now we have the parser.  If we can shift, we do, though newlines and
-reducing indenting may block that.  If not and we can reduce we do.
-If the production we reduced was production zero, then we have
+Now we have the parser.  If we can shift we do, though newlines and
+reducing indenting may block that.  If not and we can reduce we do
+that.  If the production we reduced was production zero, then we have
 accepted the input and can finish.
 
 We return whatever `asn` was returned by reducing production zero.
 accepted the input and can finish.
 
 We return whatever `asn` was returned by reducing production zero.
@@ -2567,16 +2631,23 @@ When we find `TK_in` and `TK_out` tokens which report indents we need
 to handle them directly as the grammar cannot express what we want to
 do with them.
 
 to handle them directly as the grammar cannot express what we want to
 do with them.
 
-`TK_in` tokens are easy: we simply update the `next` stack frame to
-record how many indents there are and that the next token started with
-an indent.
+`TK_in` tokens are easy: we simply update indent count in the top stack frame to
+record how many indents there are following the previous token.
 
 
-`TK_out` tokens must either be counted off against any pending indent,
-or must force reductions until there is a pending indent which isn't
-at the start of a production.
+`TK_out` tokens must either be canceled against an indent count
+within the stack.  If we can reduce some symbols that are all since
+the most recent indent, then we do that first.  If the minimum prefix
+of the current state then extents back before the most recent indent,
+that indent can be cancelled.  If the minimum prefix is shorter then
+the indent is premature and we must start error handling, which
+currently doesn't work at all.
 
 
-`TK_newline` tokens are ignored precisely if there has been an indent
-since the last state which could have been at the start of a line.
+`TK_newline` tokens are ignored unless the top stack frame records
+that they are permitted.  In that case they will not be considered for
+shifting if it is possible to reduce some symbols that are all since
+the most recent start of line.  This is how a newline forcible
+terminates any line-like structure - we try to reduce down to at most
+one symbol for each line where newlines are allowed.
 
 ###### parser includes
        #include "parser.h"
 
 ###### parser includes
        #include "parser.h"
@@ -2589,60 +2660,84 @@ since the last state which could have been at the start of a line.
                         struct token_config *config)
        {
                struct parser p = { 0 };
                         struct token_config *config)
        {
                struct parser p = { 0 };
-               struct frame next = { 0 };
                struct token *tk = NULL;
                int accepted = 0;
                void *ret = NULL;
 
                struct token *tk = NULL;
                int accepted = 0;
                void *ret = NULL;
 
-               shift(&p, &next, NULL, states);
+               shift(&p, TK_eof, 0, 1, NULL, states);
                while (!accepted) {
                        struct token *err_tk;
                        struct frame *tos = &p.stack[p.tos-1];
                        if (!tk)
                                tk = tok_copy(token_next(tokens));
                while (!accepted) {
                        struct token *err_tk;
                        struct frame *tos = &p.stack[p.tos-1];
                        if (!tk)
                                tk = tok_copy(token_next(tokens));
-                       next.sym = tk->num;
-                       if (trace)
-                               parser_trace(trace, &p, &next, tk, states, non_term, config->known_count);
-
-                       if (next.sym == TK_in) {
-                               next.starts_indented = 1;
-                               next.indents = 1;
+                       parser_trace(trace, &p,
+                                    tk, states, non_term, config->known_count);
+
+                       if (tk->num == TK_in) {
+                               tos->indents += 1;
+                               tos->since_newline = 0;
+                               tos->since_indent = 0;
+                               if (!states[tos->state].starts_line)
+                                       tos->newline_permitted = 0;
                                free(tk);
                                tk = NULL;
                                free(tk);
                                tk = NULL;
+                               parser_trace_action(trace, "Record");
                                continue;
                        }
                                continue;
                        }
-                       if (next.sym == TK_out) {
-                               if (tos->indents > tos->starts_indented ||
-                                   (tos->indents == 1 &&
-                                    states[tos->state].reduce_size != 1)) {
-                                       tos->indents -= 1;
-                                       if (tos->indents <= tos->starts_indented) {
-                                               // no internal indent any more, reassess 'newline_permitted'
-                                               if (states[tos->state].starts_line)
-                                                       tos->newline_permitted = 1;
-                                               else if (p.tos > 1)
-                                                       tos->newline_permitted = p.stack[p.tos-2].newline_permitted;
+                       if (tk->num == TK_out) {
+                               if (states[tos->state].reduce_size >= 0 &&
+                                   states[tos->state].reduce_size <= tos->since_indent)
+                                       goto force_reduce;
+                               if (states[tos->state].min_prefix >= tos->since_indent) {
+                                       // OK to cancel
+                                       struct frame *in = tos - tos->since_indent;
+                                       in->indents -= 1;
+                                       if (in->indents == 0) {
+                                               /* Reassess since_indent and newline_permitted */
+                                               if (in > p.stack) {
+                                                       in->since_indent = in[-1].since_indent + 1;
+                                                       in->newline_permitted = in[-1].newline_permitted;
+                                               } else {
+                                                       in->since_indent = 0;
+                                                       in->newline_permitted = 0;
+                                               }
+                                               if (states[in->state].starts_line)
+                                                       in->newline_permitted = 1;
+                                               while (in < tos) {
+                                                       in += 1;
+                                                       in->since_indent = in[-1].since_indent + 1;
+                                                       if (states[in->state].starts_line)
+                                                               in->newline_permitted = 1;
+                                                       else
+                                                               in->newline_permitted = in[-1].newline_permitted;
+                                               }
                                        }
                                        free(tk);
                                        tk = NULL;
                                        }
                                        free(tk);
                                        tk = NULL;
+                                       parser_trace_action(trace, "Cancel");
                                        continue;
                                }
                                // fall through and force a REDUCE (as 'shift'
                                // will fail).
                        }
                                        continue;
                                }
                                // fall through and force a REDUCE (as 'shift'
                                // will fail).
                        }
-                       if (next.sym == TK_newline) {
-                               if (! tos->newline_permitted) {
+                       if (tk->num == TK_newline) {
+                               if (!tos->newline_permitted) {
                                        free(tk);
                                        tk = NULL;
                                        free(tk);
                                        tk = NULL;
+                                       parser_trace_action(trace, "Discard");
                                        continue;
                                }
                                        continue;
                                }
+                               if (tos->since_newline > 1 &&
+                                   states[tos->state].reduce_size >= 0 &&
+                                   states[tos->state].reduce_size <= tos->since_newline)
+                                       goto force_reduce;
                        }
                        }
-                       if (shift(&p, &next, tk, states)) {
+                       if (shift(&p, tk->num, 0, tk->num == TK_newline, tk, states)) {
                                tk = NULL;
                                tk = NULL;
-                               next.starts_indented = 0;
-                               next.indents = 0;
+                               parser_trace_action(trace, "Shift");
                                continue;
                        }
                                continue;
                        }
+               force_reduce:
                        if (states[tos->state].reduce_prod >= 0) {
                                void **body;
                                void *res;
                        if (states[tos->state].reduce_prod >= 0) {
                                void **body;
                                void *res;
@@ -2651,38 +2746,34 @@ since the last state which could have been at the start of a line.
                                int size = nextstate->reduce_size;
                                int bufsize;
                                static char buf[16*1024];
                                int size = nextstate->reduce_size;
                                int bufsize;
                                static char buf[16*1024];
-                               struct frame frame;
-                               frame.sym = nextstate->reduce_sym;
+                               short indents, start_of_line;
 
                                body = p.asn_stack + (p.tos - size);
 
                                bufsize = do_reduce(prod, body, config, buf);
 
 
                                body = p.asn_stack + (p.tos - size);
 
                                bufsize = do_reduce(prod, body, config, buf);
 
-                               if (size)
-                                       pop(&p, size, &frame, do_free);
-                               else {
-                                       frame.indents = next.indents;
-                                       frame.starts_indented = frame.indents;
-                                       next.indents = 0;
-                                       next.starts_indented = 0;
-                               }
+                               indents = pop(&p, size, &start_of_line,
+                                             do_free);
                                res = memdup(buf, bufsize);
                                memset(buf, 0, bufsize);
                                res = memdup(buf, bufsize);
                                memset(buf, 0, bufsize);
-                               if (!shift(&p, &frame, res, states)) {
+                               if (!shift(&p, nextstate->reduce_sym,
+                                          indents, start_of_line,
+                                          res, states)) {
                                        if (prod != 0) abort();
                                        accepted = 1;
                                        ret = res;
                                }
                                        if (prod != 0) abort();
                                        accepted = 1;
                                        ret = res;
                                }
+                               parser_trace_action(trace, "Reduce");
                                continue;
                        }
                        if (tk->num == TK_out) {
                                // Indent problem - synthesise tokens to get us
                                // out of here.
                                continue;
                        }
                        if (tk->num == TK_out) {
                                // Indent problem - synthesise tokens to get us
                                // out of here.
-                               struct frame frame = { 0 };
                                fprintf(stderr, "Synthesize %d to handle indent problem\n", states[tos->state].shift_sym);
                                fprintf(stderr, "Synthesize %d to handle indent problem\n", states[tos->state].shift_sym);
-                               frame.sym = states[tos->state].shift_sym;
-                               shift(&p, &frame, tok_copy(*tk), states);
+                               shift(&p, states[tos->state].shift_sym,
+                                     0, 1, tok_copy(*tk), states);
                                // FIXME need to report this error somehow
                                // FIXME need to report this error somehow
+                               parser_trace_action(trace, "Synthesize");
                                continue;
                        }
                        /* Error. We walk up the stack until we
                                continue;
                        }
                        /* Error. We walk up the stack until we
@@ -2692,13 +2783,15 @@ since the last state which could have been at the start of a line.
                         * Then we discard input tokens until
                         * we find one that is acceptable.
                         */
                         * Then we discard input tokens until
                         * we find one that is acceptable.
                         */
+                       parser_trace_action(trace, "ERROR");
+                       short indents = 0, start_of_line;
 
                        err_tk = tok_copy(*tk);
 
                        err_tk = tok_copy(*tk);
-                       next.sym = TK_error;
-                       while (shift(&p, &next, err_tk, states) == 0
+                       while (shift(&p, TK_error, 0, 0,
+                                    err_tk, states) == 0
                               && p.tos > 0)
                                // discard this state
                               && p.tos > 0)
                                // discard this state
-                               pop(&p, 1, &next, do_free);
+                               indents += pop(&p, 1, &start_of_line, do_free);
                        if (p.tos == 0) {
                                free(err_tk);
                                // no state accepted TK_error
                        if (p.tos == 0) {
                                free(err_tk);
                                // no state accepted TK_error
@@ -2710,19 +2803,21 @@ since the last state which could have been at the start of a line.
                                free(tk);
                                tk = tok_copy(token_next(tokens));
                                if (tk->num == TK_in)
                                free(tk);
                                tk = tok_copy(token_next(tokens));
                                if (tk->num == TK_in)
-                                       next.indents += 1;
+                                       indents += 1;
                                if (tk->num == TK_out) {
                                if (tk->num == TK_out) {
-                                       if (next.indents == 0)
+                                       if (indents == 0)
                                                break;
                                                break;
-                                       next.indents -= 1;
+                                       indents -= 1;
+                                       // FIXME update since_indent here
                                }
                        }
                        if (p.tos == 0 && tk->num == TK_eof)
                                break;
                                }
                        }
                        if (p.tos == 0 && tk->num == TK_eof)
                                break;
+                       tos = &p.stack[p.tos-1];
+                       tos->indents += indents;
                }
                free(tk);
                }
                free(tk);
-               if (p.tos)
-                       pop(&p, p.tos, &next, do_free);
+               pop(&p, p.tos, NULL, do_free);
                free(p.asn_stack);
                free(p.stack);
                return ret;
                free(p.asn_stack);
                free(p.stack);
                return ret;
@@ -2768,15 +2863,17 @@ end inside square brackets.
                if (states[f->state].starts_line)
                        fprintf(trace, "s");
                if (f->newline_permitted)
                if (states[f->state].starts_line)
                        fprintf(trace, "s");
                if (f->newline_permitted)
-                       fprintf(trace, "n");
+                       fprintf(trace, "n%d", f->since_newline);
                fprintf(trace, ") ");
        }
 
                fprintf(trace, ") ");
        }
 
-       void parser_trace(FILE *trace, struct parser *p, struct frame *n,
+       void parser_trace(FILE *trace, struct parser *p,
                          struct token *tk, const struct state states[],
                          const char *non_term[], int knowns)
        {
                int i;
                          struct token *tk, const struct state states[],
                          const char *non_term[], int knowns)
        {
                int i;
+               if (!trace)
+                       return;
                for (i = 0; i < p->tos; i++) {
                        struct frame *f = &p->stack[i];
                        if (i) {
                for (i = 0; i < p->tos; i++) {
                        struct frame *f = &p->stack[i];
                        if (i) {
@@ -2791,8 +2888,9 @@ end inside square brackets.
                                        fputs(non_term[sym - TK_reserved - knowns],
                                              trace);
                                if (f->indents)
                                        fputs(non_term[sym - TK_reserved - knowns],
                                              trace);
                                if (f->indents)
-                                       fprintf(trace, "%c%d", f->starts_indented?':':'.',
-                                               f->indents);
+                                       fprintf(trace, ".%d", f->indents);
+                               if (f->since_newline == 0)
+                                       fputs("/", trace);
                                fputs(" ", trace);
                        }
                        parser_trace_state(trace, f, states);
                                fputs(" ", trace);
                        }
                        parser_trace_state(trace, f, states);
@@ -2803,10 +2901,13 @@ end inside square brackets.
                        fputs(reserved_words[tk->num], trace);
                else
                        text_dump(trace, tk->txt, 20);
                        fputs(reserved_words[tk->num], trace);
                else
                        text_dump(trace, tk->txt, 20);
-               if (n->indents)
-                       fprintf(trace, "%c%d", n->starts_indented?':':'.',
-                               n->indents);
-               fputs("]\n", trace);
+               fputs("]", trace);
+       }
+
+       void parser_trace_action(FILE *trace, char *action)
+       {
+               if (trace)
+                       fprintf(trace, " - %s\n", action);
        }
 
 # A Worked Example
        }
 
 # A Worked Example