]> ocean-lang.org Git - ocean/blobdiff - csrc/parsergen.mdc
parsergen: revise rule for NEWLINE forcing reduce
[ocean] / csrc / parsergen.mdc
index 45fb09e5228f26ce3240a8daa46f1d6d092ffa7d..48c4d62c6fe58f44164ac1382121806ea1255c20 100644 (file)
@@ -2437,9 +2437,8 @@ The `state` is the most important one and guides the parsing process.  The
 freeing function.  The symbol leads us to the right free function through
 `do_free`.
 
-The `indents` count and the `starts_indented` flag track the line
-indents in the symbol.  These are used to allow indent information to
-guide parsing and error recovery.
+The `indents` count tracks the line indents in the symbol.  These are
+used to allow indent information to guide parsing and error recovery.
 
 `since_newline` tracks how many stack frames since the last
 start-of-line (whether indented or not).  So if `since_newline` is
@@ -2462,9 +2461,9 @@ before the beginning.
                        short newline_permitted;
 
                        short sym;
-                       short starts_indented;
                        short indents;
                        short since_newline;
+                       short since_indent;
                } *stack;
                void **asn_stack;
                int stack_size;
@@ -2504,14 +2503,16 @@ So we walk down:
 
 ###### parser functions
 
-       static int shift(struct parser *p, struct frame *next,
+       static int shift(struct parser *p,
+                        short sym, short indents, short start_of_line,
                         void *asn,
                         const struct state states[])
        {
                // Push an entry onto the stack
+               struct frame next = {0};
                int newstate = p->tos
                        ? search(&states[p->stack[p->tos-1].state],
-                                next->sym)
+                                sym)
                        : 0;
                if (newstate < 0)
                        return 0;
@@ -2522,24 +2523,33 @@ So we walk down:
                        p->asn_stack = realloc(p->asn_stack, p->stack_size
                                           * sizeof(p->asn_stack[0]));
                }
-               next->state = newstate;
+               next.sym = sym;
+               next.indents = indents;
+               next.state = newstate;
                if (states[newstate].starts_line)
-                       next->newline_permitted = 1;
-               else if (next->indents)
-                       next->newline_permitted = 0;
+                       next.newline_permitted = 1;
+               else if (indents)
+                       next.newline_permitted = 0;
                else if (p->tos)
-                       next->newline_permitted =
+                       next.newline_permitted =
                                p->stack[p->tos-1].newline_permitted;
                else
-                       next->newline_permitted = 0;
+                       next.newline_permitted = 0;
 
-               if (next->since_newline) {
+               if (!start_of_line) {
                        if (p->tos)
-                               next->since_newline = p->stack[p->tos-1].since_newline + 1;
+                               next.since_newline = p->stack[p->tos-1].since_newline + 1;
                        else
-                               next->since_newline = 1;
+                               next.since_newline = 1;
                }
-               p->stack[p->tos] = *next;
+               if (indents)
+                       next.since_indent = 0;
+               else if (p->tos)
+                       next.since_indent = p->stack[p->tos-1].since_indent + 1;
+               else
+                       next.since_indent = 1;
+
+               p->stack[p->tos] = next;
                p->asn_stack[p->tos] = asn;
                p->tos++;
                return 1;
@@ -2555,21 +2565,24 @@ removed. It is called _after_ we reduce a production, just before we
 
 ###### parser functions
 
-       static void pop(struct parser *p, int num, struct frame *next,
-                       void(*do_free)(short sym, void *asn))
+       static int pop(struct parser *p, int num,
+                      short *start_of_line,
+                      void(*do_free)(short sym, void *asn))
        {
                int i;
+               short indents = 0;
+               int sol = 0;
+
                p->tos -= num;
-               next->starts_indented =
-                       p->stack[p->tos].starts_indented;
-               next->since_newline =
-                       p->stack[p->tos].since_newline;
-               next->indents = 0;
                for (i = 0; i < num; i++) {
-                       next->indents += p->stack[p->tos+i].indents;
+                       sol |= !p->stack[p->tos+1].since_newline;
+                       indents += p->stack[p->tos+i].indents;
                        do_free(p->stack[p->tos+i].sym,
                                p->asn_stack[p->tos+i]);
                }
+               if (start_of_line)
+                       *start_of_line = sol;
+               return indents;
        }
 
 ### Memory allocation
@@ -2640,42 +2653,57 @@ since the last state which could have been at the start of a line.
                         struct token_config *config)
        {
                struct parser p = { 0 };
-               struct frame next = { 0 };
                struct token *tk = NULL;
                int accepted = 0;
                void *ret = NULL;
 
-               shift(&p, &next, NULL, states);
+               shift(&p, TK_eof, 0, 1, NULL, states);
                while (!accepted) {
                        struct token *err_tk;
                        struct frame *tos = &p.stack[p.tos-1];
                        if (!tk)
                                tk = tok_copy(token_next(tokens));
-                       next.sym = tk->num;
-                       parser_trace(trace, &p, &next, tk, states, non_term, config->known_count);
-
-                       if (next.sym == TK_in) {
-                               next.starts_indented = 1;
-                               next.indents = 1;
-                               next.since_newline = 0;
+                       parser_trace(trace, &p,
+                                    tk, states, non_term, config->known_count);
+
+                       if (tk->num == TK_in) {
+                               tos->indents += 1;
+                               tos->since_newline = 0;
+                               tos->since_indent = 0;
+                               if (!states[tos->state].starts_line)
+                                       tos->newline_permitted = 0;
                                free(tk);
                                tk = NULL;
                                parser_trace_action(trace, "Record");
                                continue;
                        }
-                       if (next.sym == TK_out) {
-                               if (tos->indents > tos->starts_indented ||
-                                   (tos->indents == 1 &&
-                                    states[tos->state].reduce_size != 1)) {
-                                       tos->indents -= 1;
-                                       if (tos->indents <= tos->starts_indented) {
-                                               // no internal indent any more, reassess 'newline_permitted'
-                                               if (states[tos->state].starts_line)
-                                                       tos->newline_permitted = 1;
-                                               else if (p.tos > 1)
-                                                       tos->newline_permitted = p.stack[p.tos-2].newline_permitted;
-                                               else
-                                                       tos->newline_permitted = 0;
+                       if (tk->num == TK_out) {
+                               if (states[tos->state].reduce_size >= 0 &&
+                                   states[tos->state].reduce_size <= tos->since_indent)
+                                       goto force_reduce;
+                               if (states[tos->state].min_prefix >= tos->since_indent) {
+                                       // OK to cancel
+                                       struct frame *in = tos - tos->since_indent;
+                                       in->indents -= 1;
+                                       if (in->indents == 0) {
+                                               /* Reassess since_indent and newline_permitted */
+                                               if (in > p.stack) {
+                                                       in->since_indent = in[-1].since_indent + 1;
+                                                       in->newline_permitted = in[-1].newline_permitted;
+                                               } else {
+                                                       in->since_indent = 0;
+                                                       in->newline_permitted = 0;
+                                               }
+                                               if (states[in->state].starts_line)
+                                                       in->newline_permitted = 1;
+                                               while (in < tos) {
+                                                       in += 1;
+                                                       in->since_indent = in[-1].since_indent + 1;
+                                                       if (states[in->state].starts_line)
+                                                               in->newline_permitted = 1;
+                                                       else
+                                                               in->newline_permitted = in[-1].newline_permitted;
+                                               }
                                        }
                                        free(tk);
                                        tk = NULL;
@@ -2685,21 +2713,19 @@ since the last state which could have been at the start of a line.
                                // fall through and force a REDUCE (as 'shift'
                                // will fail).
                        }
-                       if (next.sym == TK_newline) {
+                       if (tk->num == TK_newline) {
                                if (!tos->newline_permitted) {
                                        free(tk);
                                        tk = NULL;
                                        parser_trace_action(trace, "Discard");
                                        continue;
                                }
-                               if (states[tos->state].reduce_size > 0 &&
-                                   states[tos->state].reduce_size < tos->since_newline)
+                               if (tos->since_newline > 1 &&
+                                   states[tos->state].reduce_size >= 0 &&
+                                   states[tos->state].reduce_size <= tos->since_newline)
                                        goto force_reduce;
                        }
-                       if (shift(&p, &next, tk, states)) {
-                               next.since_newline = !(tk->num == TK_newline);
-                               next.starts_indented = 0;
-                               next.indents = 0;
+                       if (shift(&p, tk->num, 0, tk->num == TK_newline, tk, states)) {
                                tk = NULL;
                                parser_trace_action(trace, "Shift");
                                continue;
@@ -2713,25 +2739,24 @@ since the last state which could have been at the start of a line.
                                int size = nextstate->reduce_size;
                                int bufsize;
                                static char buf[16*1024];
-                               struct frame frame;
-                               frame.sym = nextstate->reduce_sym;
+                               short indents, start_of_line;
 
                                body = p.asn_stack + (p.tos - size);
 
                                bufsize = do_reduce(prod, body, config, buf);
 
                                if (size)
-                                       pop(&p, size, &frame, do_free);
+                                       indents = pop(&p, size, &start_of_line,
+                                                     do_free);
                                else {
-                                       frame.indents = next.indents;
-                                       frame.starts_indented = frame.indents;
-                                       frame.since_newline = 1;
-                                       next.indents = 0;
-                                       next.starts_indented = 0;
+                                       indents = 0;
+                                       start_of_line = 0;
                                }
                                res = memdup(buf, bufsize);
                                memset(buf, 0, bufsize);
-                               if (!shift(&p, &frame, res, states)) {
+                               if (!shift(&p, nextstate->reduce_sym,
+                                          indents, start_of_line,
+                                          res, states)) {
                                        if (prod != 0) abort();
                                        accepted = 1;
                                        ret = res;
@@ -2742,11 +2767,9 @@ since the last state which could have been at the start of a line.
                        if (tk->num == TK_out) {
                                // Indent problem - synthesise tokens to get us
                                // out of here.
-                               struct frame frame = { 0 };
                                fprintf(stderr, "Synthesize %d to handle indent problem\n", states[tos->state].shift_sym);
-                               frame.sym = states[tos->state].shift_sym;
-                               frame.since_newline = 1;
-                               shift(&p, &frame, tok_copy(*tk), states);
+                               shift(&p, states[tos->state].shift_sym,
+                                     0, 1, tok_copy(*tk), states);
                                // FIXME need to report this error somehow
                                parser_trace_action(trace, "Synthesize");
                                continue;
@@ -2759,13 +2782,14 @@ since the last state which could have been at the start of a line.
                         * we find one that is acceptable.
                         */
                        parser_trace_action(trace, "ERROR");
+                       short indents = 0, start_of_line;
 
                        err_tk = tok_copy(*tk);
-                       next.sym = TK_error;
-                       while (shift(&p, &next, err_tk, states) == 0
+                       while (shift(&p, TK_error, 0, 0,
+                                    err_tk, states) == 0
                               && p.tos > 0)
                                // discard this state
-                               pop(&p, 1, &next, do_free);
+                               indents += pop(&p, 1, &start_of_line, do_free);
                        if (p.tos == 0) {
                                free(err_tk);
                                // no state accepted TK_error
@@ -2777,19 +2801,22 @@ since the last state which could have been at the start of a line.
                                free(tk);
                                tk = tok_copy(token_next(tokens));
                                if (tk->num == TK_in)
-                                       next.indents += 1;
+                                       indents += 1;
                                if (tk->num == TK_out) {
-                                       if (next.indents == 0)
+                                       if (indents == 0)
                                                break;
-                                       next.indents -= 1;
+                                       indents -= 1;
+                                       // FIXME update since_indent here
                                }
                        }
                        if (p.tos == 0 && tk->num == TK_eof)
                                break;
+                       tos = &p.stack[p.tos-1];
+                       tos->indents += indents;
                }
                free(tk);
                if (p.tos)
-                       pop(&p, p.tos, &next, do_free);
+                       pop(&p, p.tos, NULL, do_free);
                free(p.asn_stack);
                free(p.stack);
                return ret;
@@ -2839,7 +2866,7 @@ end inside square brackets.
                fprintf(trace, ") ");
        }
 
-       void parser_trace(FILE *trace, struct parser *p, struct frame *n,
+       void parser_trace(FILE *trace, struct parser *p,
                          struct token *tk, const struct state states[],
                          const char *non_term[], int knowns)
        {
@@ -2860,8 +2887,7 @@ end inside square brackets.
                                        fputs(non_term[sym - TK_reserved - knowns],
                                              trace);
                                if (f->indents)
-                                       fprintf(trace, "%c%d", f->starts_indented?':':'.',
-                                               f->indents);
+                                       fprintf(trace, ".%d", f->indents);
                                if (f->since_newline == 0)
                                        fputs("/", trace);
                                fputs(" ", trace);
@@ -2874,11 +2900,6 @@ end inside square brackets.
                        fputs(reserved_words[tk->num], trace);
                else
                        text_dump(trace, tk->txt, 20);
-               if (n->indents)
-                       fprintf(trace, "%c%d", n->starts_indented?':':'.',
-                               n->indents);
-               if (n->since_newline == 0)
-                       fputs("/", trace);
                fputs("]", trace);
        }