]> ocean-lang.org Git - ocean/commitdiff
parsergen: remove tracking of indents and line starts etc.
authorNeilBrown <neil@brown.name>
Fri, 5 Mar 2021 10:01:17 +0000 (21:01 +1100)
committerNeilBrown <neil@brown.name>
Wed, 10 Mar 2021 01:00:32 +0000 (12:00 +1100)
We will want some of this back, but in a very different form.
So remove it all for now.

Signed-off-by: NeilBrown <neil@brown.name>
csrc/parsergen.mdc

index fe129e6a335da426a579001d6e31bdb5286a5910..0f4bf1f4eb301b3ed9a5109b0307e8ee95d77871 100644 (file)
@@ -2617,19 +2617,6 @@ helps to know what type they are so we can call the right freeing
 function.  The symbol leads us to the right free function through
 `do_free`.
 
-The `indents` count tracks the line indents with-in the symbol or
-immediately follow it.  These are used to allow indent information to
-guide parsing and error recovery.
-
-`since_newline` tracks how many stack frames since the last
-start-of-line (whether indented or not).  So if `since_newline` is
-zero, then this symbol is at the start of a line.  Similarly
-`since_indent` counts the number of states since an indent, it is zero
-precisely when `indents` is not zero.
-
-`newline_permitted` keeps track of whether newlines should be ignored
-or not.
-
 The stack is most properly seen as alternating states and symbols -
 states, like the 'DOT' in items, are between symbols.  Each frame in
 our stack holds a state and the symbol that was before it.  The
@@ -2642,12 +2629,7 @@ to mark the beginning of the file as well as the end.
        struct parser {
                struct frame {
                        short state;
-                       short newline_permitted;
-
                        short sym;
-                       short indents;
-                       short since_newline;
-                       short since_indent;
                } *stack;
                void **asn_stack;
                int stack_size;
@@ -2662,11 +2644,8 @@ Shift applies not only to terminals but also to non-terminals.  When we
 reduce a production we will pop off frames corresponding to the body
 symbols, then push on a frame for the head of the production.  This last
 is exactly the same process as shifting in a terminal so we use the same
-function for both.  In both cases we provide the symbol, the number of
-indents the symbol contains (which will be zero for a terminal symbol)
-and a flag indicating the the symbol was at (or was reduced from a
-symbol which was at) the start of a line.  The state is deduced from the
-current top-of-stack state and the new symbol.
+function for both.  In both cases we provide the symbol.  The state is
+deduced from the current top-of-stack state and the new symbol.
 
 To simplify other code we arrange for `shift` to fail if there is no `goto`
 state for the symbol.  This is useful in basic parsing due to our design
@@ -2682,11 +2661,9 @@ allocations if needed and pushes all the information onto the stacks.
 ###### parser functions
 
        static int shift(struct parser *p,
-                        short sym, short indents, short start_of_line,
-                        void *asn,
+                        short sym, void *asn,
                         const struct state states[])
        {
-               // Push an entry onto the stack
                struct frame next = {0};
                int newstate = p->tos
                        ? search(&states[p->stack[p->tos-1].state],
@@ -2702,22 +2679,8 @@ allocations if needed and pushes all the information onto the stacks.
                                           * sizeof(p->asn_stack[0]));
                }
                next.sym = sym;
-               next.indents = indents;
                next.state = newstate;
 
-               if (!start_of_line) {
-                       if (p->tos)
-                               next.since_newline = p->stack[p->tos-1].since_newline + 1;
-                       else
-                               next.since_newline = 1;
-               }
-               if (indents)
-                       next.since_indent = 0;
-               else if (p->tos)
-                       next.since_indent = p->stack[p->tos-1].since_indent + 1;
-               else
-                       next.since_indent = 1;
-
                p->stack[p->tos] = next;
                p->asn_stack[p->tos] = asn;
                p->tos++;
@@ -2725,31 +2688,20 @@ allocations if needed and pushes all the information onto the stacks.
        }
 
 `pop` primarily moves the top of stack (`tos`) back down the required
-amount and frees any `asn` entries that need to be freed.  It also
-collects a summary of the indents and line starts in the symbols that
-are being removed. It is called _after_ we reduce a production, just
-before we `shift` the nonterminal in.
+amount and frees any `asn` entries that need to be freed.  It is called
+_after_ we reduce a production, just before we `shift` the nonterminal
+in.
 
 ###### parser functions
 
-       static int pop(struct parser *p, int num,
-                      short *start_of_line,
-                      void(*do_free)(short sym, void *asn))
+       static void pop(struct parser *p, int num,
+                       void(*do_free)(short sym, void *asn))
        {
                int i;
-               short indents = 0;
-               int sol = 0;
 
                p->tos -= num;
-               for (i = 0; i < num; i++) {
-                       sol |= !p->stack[p->tos+i].since_newline;
-                       indents += p->stack[p->tos+i].indents;
-                       do_free(p->stack[p->tos+i].sym,
-                               p->asn_stack[p->tos+i]);
-               }
-               if (start_of_line)
-                       *start_of_line = sol;
-               return indents;
+               for (i = 0; i < num; i++)
+                       do_free(p->stack[p->tos+i].sym, p->asn_stack[p->tos+i]);
        }
 
 ### The heart of the parser.
@@ -2849,7 +2801,7 @@ checks if a given token is in any of these look-ahead sets.
                int shift_since_err = 1;
                void *ret = NULL;
 
-               shift(&p, TK_eof, 0, 1, NULL, states);
+               shift(&p, TK_eof, NULL, states);
                while (!accepted && p.tos > 0) {
                        struct token *err_tk;
                        struct frame *tos = &p.stack[p.tos-1];
@@ -2859,36 +2811,15 @@ checks if a given token is in any of these look-ahead sets.
                                     tk, states, non_term, config->known_count);
 
                        if (tk->num == TK_in) {
-                               tos->indents += 1;
-                               tos->since_newline = 0;
-                               tos->since_indent = 0;
                                free(tk);
                                tk = NULL;
                                parser_trace_action(trace, "Record");
                                continue;
                        }
                        if (tk->num == TK_out) {
-                               if (states[tos->state].reduce_size >= 0 &&
-                                   states[tos->state].reduce_size <= tos->since_indent)
-                                       goto force_reduce;
                                if (1) {
                                        // OK to cancel
-                                       struct frame *in = tos - tos->since_indent;
-                                       in->indents -= 1;
-                                       if (in->indents == 0) {
-                                               /* Reassess since_indent and newline_permitted */
-                                               if (in > p.stack) {
-                                                       in->since_indent = in[-1].since_indent + 1;
-                                                       in->newline_permitted = in[-1].newline_permitted;
-                                               } else {
-                                                       in->since_indent = 0;
-                                                       in->newline_permitted = 0;
-                                               }
-                                               while (in < tos) {
-                                                       in += 1;
-                                                       in->since_indent = in[-1].since_indent + 1;
-                                               }
-                                       }
+
                                        free(tk);
                                        tk = NULL;
                                        parser_trace_action(trace, "Cancel");
@@ -2898,41 +2829,27 @@ checks if a given token is in any of these look-ahead sets.
                                // will fail.
                        }
                        if (tk->num == TK_newline) {
-                               if (!tos->newline_permitted) {
+                               if (1) {
                                        free(tk);
                                        tk = NULL;
                                        parser_trace_action(trace, "Discard");
                                        continue;
                                }
-                               if (tos->since_newline > 1 &&
-                                   states[tos->state].reduce_size >= 0 &&
-                                   states[tos->state].reduce_size <= tos->since_newline)
-                                       goto force_reduce;
                        }
-                       if (shift(&p, tk->num, 0, tk->num == TK_newline, tk, states)) {
+                       if (shift(&p, tk->num, tk, states)) {
                                shift_since_err = 1;
                                tk = NULL;
                                parser_trace_action(trace, "Shift");
                                continue;
                        }
-               force_reduce:
-                       if (states[tos->state].reduce_prod >= 0 &&
-                           !(tk->num == TK_newline ||
-                             tk->num == TK_eof ||
-                             tk->num == TK_out ||
-                             (tos->indents == 0 && tos->since_newline == 0))) {
-                               /* Anything other than newline or out or eof
-                                * in an error unless we are already at start
-                                * of line, as this production must end at EOL.
-                                */
-                       } else if (states[tos->state].reduce_prod >= 0) {
+
+                       if (states[tos->state].reduce_prod >= 0) {
                                void **body;
                                void *res;
                                const struct state *nextstate = &states[tos->state];
                                int prod = nextstate->reduce_prod;
                                int size = nextstate->reduce_size;
                                int res_size = nextstate->result_size;
-                               short indents, start_of_line;
 
                                body = p.asn_stack + (p.tos - size);
                                res = res_size ? calloc(1, res_size) : NULL;
@@ -2940,11 +2857,9 @@ checks if a given token is in any of these look-ahead sets.
                                if (res_size != nextstate->result_size)
                                        abort();
 
-                               indents = pop(&p, size, &start_of_line,
-                                             do_free);
+                               pop(&p, size, do_free);
 
                                if (!shift(&p, nextstate->reduce_sym,
-                                          indents, start_of_line,
                                           res, states)) {
                                        if (prod != 0) abort();
                                        accepted = 1;
@@ -2961,14 +2876,12 @@ checks if a given token is in any of these look-ahead sets.
                         * we find one that is acceptable.
                         */
                        parser_trace_action(trace, "ERROR");
-                       short indents = 0, start_of_line;
 
                        err_tk = tok_copy(*tk);
                        while (p.tos > 0 &&
-                              shift(&p, TK_error, 0, 0,
-                                    err_tk, states) == 0)
+                              shift(&p, TK_error, err_tk, states) == 0)
                                // discard this state
-                               indents += pop(&p, 1, &start_of_line, do_free);
+                               pop(&p, 1, do_free);
                        if (p.tos == 0) {
                                free(err_tk);
                                // no state accepted TK_error
@@ -2990,19 +2903,10 @@ checks if a given token is in any of these look-ahead sets.
                                free(tk);
                                tk = tok_copy(token_next(tokens));
                                shift_since_err = 1;
-                               if (tk->num == TK_in)
-                                       indents += 1;
-                               if (tk->num == TK_out) {
-                                       if (indents == 0)
-                                               break;
-                                       indents -= 1;
-                                       // FIXME update since_indent here
-                               }
                        }
-                       tos->indents += indents;
                }
                free(tk);
-               pop(&p, p.tos, NULL, do_free);
+               pop(&p, p.tos, do_free);
                free(p.asn_stack);
                free(p.stack);
                return ret;
@@ -3063,10 +2967,6 @@ end inside square brackets.
                                } else
                                        fputs(non_term[sym - TK_reserved - knowns],
                                              trace);
-                               if (f->indents)
-                                       fprintf(trace, ".%d", f->indents);
-                               if (f->since_newline == 0)
-                                       fputs("/", trace);
                                fputs(" ", trace);
                        }
                        fprintf(trace, "(%d) ", f->state);