]> ocean-lang.org Git - ocean/blobdiff - csrc/parsergen.mdc
parsergen: revise "newline_permitted" definition.
[ocean] / csrc / parsergen.mdc
index 2aa296d9508d3ffe44e2d035f0c1898f68cb6db5..573f6def52c12c3889fdcc97eab140850b884dac 100644 (file)
@@ -869,29 +869,32 @@ changes happen.
                }
        }
 
-### Setting `can_eol` and `starts_line`
+### Setting `can_eol` and `line_like`
 
 In order to be able to ignore newline tokens when not relevant, but
 still include them in the parse when needed, we will need to know
 which states can start a "line-like" section of code.  We ignore
 newlines when there is an indent since the most recent start of a
-line-like section.
+line-like symbol.
 
-To know what is line-like, we first need to know which symbols can end
-a line-like section, which is precisely those which can end with a
-newline token.  These symbols don't necessarily alway end with a
-newline, but they can.  Hence they are not described as "lines" but
-only "line-like".
+To know which symbols are line-like, we first need to know which
+symbols start with a NEWLINE token.  Any symbol which is followed by a
+NEWLINE, or anything that starts with a NEWLINE, is deemed to be a line-like symbol.
+Certainly when trying to parse one of these we must take not of NEWLINEs.
 
-Clearly the `TK_newline` token can end with a newline.  Any symbol
-which is the head of a production that contains a line-ending symbol
-followed only by nullable symbols is also a line-ending symbol.  We
-use a new field `can_eol` to record this attribute of symbols, and
-compute it in a repetitive manner similar to `set_nullable`.
+Clearly the `TK_newline` token can start with a NEWLINE.  Any symbol
+which is the head of a production that contains a starts-with-NEWLINE
+symbol preceeded only by nullable symbols is also a
+starts-with-NEWLINE symbol.  We use a new field `can_eol` to record
+this attribute of symbols, and compute it in a repetitive manner
+similar to `set_nullable`.
+
+Once we have that, we can determine which symbols are `line_like` be
+seeing which are followed by a `can_eol` symbol in any production.
 
 ###### symbol fields
        int can_eol;
-       int starts_line;
+       int line_like;
 
 ###### functions
        static void set_can_eol(struct grammar *g)
@@ -908,7 +911,7 @@ compute it in a repetitive manner similar to `set_nullable`.
                                if (pr->head->can_eol)
                                        continue;
 
-                               for (s = pr->body_size - 1; s >= 0; s--) {
+                               for (s = 0 ; s < pr->body_size; s++) {
                                        if (pr->body[s]->can_eol) {
                                                pr->head->can_eol = 1;
                                                check_again = 1;
@@ -921,16 +924,16 @@ compute it in a repetitive manner similar to `set_nullable`.
                }
        }
 
-       static void set_starts_line(struct grammar *g)
+       static void set_line_like(struct grammar *g)
        {
                int p;
                for (p = 0; p < g->production_count; p++) {
                        struct production *pr = g->productions[p];
                        int s;
 
-                       for (s = 0; s < pr->body_size - 1; s++)
+                       for (s = 1; s < pr->body_size; s++)
                                if (pr->body[s]->can_eol)
-                                       pr->body[s+1]->starts_line = 1;
+                                       pr->body[s-1]->line_like = 1;
                }
        }
 
@@ -1286,7 +1289,7 @@ though.
                s = pr->body[bs];
                if (symset_find(&done, s->num) < 0) {
                        symset_add(&done, s->num, 0);
-                       if (s->starts_line)
+                       if (s->line_like)
                                is->starts_line = 1;
                }
                if (s->type != Nonterminal)
@@ -1464,7 +1467,7 @@ changeover point in `first_nonterm`.
 
                set_nullable(g);
                set_can_eol(g);
-               set_starts_line(g);
+               set_line_like(g);
                if (type >= SLR)
                        build_first(g);
 
@@ -1517,7 +1520,7 @@ line (`<`), or if it is nullable (`.`).
                        printf(" %c%c%c%3d%c: ",
                               s->nullable ? '.':' ',
                               s->can_eol ? '>':' ',
-                              s->starts_line ? '<':' ',
+                              s->line_like ? '<':' ',
                               s->num, symtypes[s->type]);
                        prtxt(s->name);
                        if (s->precedence)
@@ -2455,7 +2458,7 @@ before the beginning.
                        short sym;
                        short starts_indented;
                        short indents;
-                       short starts_newline;
+                       short since_newline;
                } *stack;
                void **asn_stack;
                int stack_size;
@@ -2514,16 +2517,22 @@ So we walk down:
                                           * sizeof(p->asn_stack[0]));
                }
                next->state = newstate;
-               next->newline_permitted = 0;
-               if (p->tos)
-                       next->newline_permitted =
-                               (p->stack[p->tos-1].newline_permitted?:-1)+1;
-               if (next->indents > next->starts_indented)
-                       next->newline_permitted = 0;
-               if (next->indents && next->newline_permitted > 2)
-                       next->newline_permitted = 0;
                if (states[newstate].starts_line)
                        next->newline_permitted = 1;
+               else if (next->indents)
+                       next->newline_permitted = 0;
+               else if (p->tos)
+                       next->newline_permitted =
+                               p->stack[p->tos-1].newline_permitted;
+               else
+                       next->newline_permitted = 0;
+
+               if (next->since_newline) {
+                       if (p->tos)
+                               next->since_newline = p->stack[p->tos-1].since_newline + 1;
+                       else
+                               next->since_newline = 1;
+               }
                p->stack[p->tos] = *next;
                p->asn_stack[p->tos] = asn;
                p->tos++;
@@ -2547,8 +2556,8 @@ removed. It is called _after_ we reduce a production, just before we
                p->tos -= num;
                next->starts_indented =
                        p->stack[p->tos].starts_indented;
-               next->starts_newline =
-                       p->stack[p->tos].starts_newline;
+               next->since_newline =
+                       p->stack[p->tos].since_newline;
                next->indents = 0;
                for (i = 0; i < num; i++) {
                        next->indents += p->stack[p->tos+i].indents;
@@ -2630,7 +2639,6 @@ since the last state which could have been at the start of a line.
                int accepted = 0;
                void *ret = NULL;
 
-               next.starts_newline = 1;
                shift(&p, &next, NULL, states);
                while (!accepted) {
                        struct token *err_tk;
@@ -2643,7 +2651,7 @@ since the last state which could have been at the start of a line.
                        if (next.sym == TK_in) {
                                next.starts_indented = 1;
                                next.indents = 1;
-                               next.starts_newline = 1;
+                               next.since_newline = 0;
                                free(tk);
                                tk = NULL;
                                parser_trace_action(trace, "Record");
@@ -2659,7 +2667,9 @@ since the last state which could have been at the start of a line.
                                                if (states[tos->state].starts_line)
                                                        tos->newline_permitted = 1;
                                                else if (p.tos > 1)
-                                                       tos->newline_permitted = (p.stack[p.tos-2].newline_permitted ?:-1)+1;
+                                                       tos->newline_permitted = p.stack[p.tos-2].newline_permitted;
+                                               else
+                                                       tos->newline_permitted = 0;
                                        }
                                        free(tk);
                                        tk = NULL;
@@ -2670,22 +2680,25 @@ since the last state which could have been at the start of a line.
                                // will fail).
                        }
                        if (next.sym == TK_newline) {
-                               if (! tos->newline_permitted) {
+                               if (!tos->newline_permitted) {
                                        free(tk);
                                        tk = NULL;
                                        parser_trace_action(trace, "Discard");
                                        continue;
                                }
+                               if (states[tos->state].reduce_size > 0 &&
+                                   states[tos->state].reduce_size < tos->since_newline)
+                                       goto force_reduce;
                        }
                        if (shift(&p, &next, tk, states)) {
-                               next.starts_newline =
-                                       tk->num == TK_newline;
+                               next.since_newline = !(tk->num == TK_newline);
                                next.starts_indented = 0;
                                next.indents = 0;
                                tk = NULL;
                                parser_trace_action(trace, "Shift");
                                continue;
                        }
+               force_reduce:
                        if (states[tos->state].reduce_prod >= 0) {
                                void **body;
                                void *res;
@@ -2706,7 +2719,7 @@ since the last state which could have been at the start of a line.
                                else {
                                        frame.indents = next.indents;
                                        frame.starts_indented = frame.indents;
-                                       frame.starts_newline = 0;
+                                       frame.since_newline = 1;
                                        next.indents = 0;
                                        next.starts_indented = 0;
                                }
@@ -2726,6 +2739,7 @@ since the last state which could have been at the start of a line.
                                struct frame frame = { 0 };
                                fprintf(stderr, "Synthesize %d to handle indent problem\n", states[tos->state].shift_sym);
                                frame.sym = states[tos->state].shift_sym;
+                               frame.since_newline = 1;
                                shift(&p, &frame, tok_copy(*tk), states);
                                // FIXME need to report this error somehow
                                parser_trace_action(trace, "Synthesize");
@@ -2842,7 +2856,7 @@ end inside square brackets.
                                if (f->indents)
                                        fprintf(trace, "%c%d", f->starts_indented?':':'.',
                                                f->indents);
-                               if (f->starts_newline)
+                               if (f->since_newline == 0)
                                        fputs("/", trace);
                                fputs(" ", trace);
                        }
@@ -2857,7 +2871,7 @@ end inside square brackets.
                if (n->indents)
                        fprintf(trace, "%c%d", n->starts_indented?':':'.',
                                n->indents);
-               if (n->starts_newline)
+               if (n->since_newline == 0)
                        fputs("/", trace);
                fputs("]", trace);
        }