]> ocean-lang.org Git - ocean/commitdiff
parsergen: introuce $$NEWINE pseudo-precedence.
authorNeilBrown <neil@brown.name>
Sat, 15 Jun 2019 22:29:16 +0000 (08:29 +1000)
committerNeilBrown <neil@brown.name>
Sat, 15 Jun 2019 23:04:44 +0000 (09:04 +1000)
Sometimes we need a produce to be terminated by a newline, but we
don't want to consume the newline with a "shift".
Case in point is:
   Block -> : StatementList

Which can be used with
   Statement -> if Expression Block
   StatementList -> Statement

I want this to parse:

   if something: if otherthing: action

which might seem a little odd, but is syntactically sensible.
The NEWLINE at the end is requred, and must close both nested Statements.
The NEWLINE will already cause a REDUCE, but if we don't have
   Block -> : Statementlist NEWLINE
then something else could force a reduce, and we don't want that.
So introduce a marking "$$NEWLINE" which is similar to imposing a precedence
on a production.  Now

   Block -> : StatementList $$NEWLINE

means that a NEWLINE is required to end a Block, but it isn't
shifted.  If anything else if found here, it is an error.

We also allow $eof and OUT to reduce this production.

Signed-off-by: NeilBrown <neil@brown.name>
csrc/parsergen.mdc

index d30275e4e4d383a22905acd1e415c72d89fc56de..82d09b3ed1b497362b77ff7a264dff837499c39a 100644 (file)
@@ -100,6 +100,7 @@ symbol.
        struct production {
                unsigned short precedence;
                enum assoc assoc;
+               char line_like;
                ## production fields
        };
        struct grammar {
@@ -496,12 +497,15 @@ Now we have all the bits we need to parse a full production.
                                goto abort;
                        }
                        vs = sym_find(g, tk.txt);
-                       if (vs->type != Virtual) {
+                       if (vs->num == TK_newline)
+                               p.line_like = 1;
+                       else if (vs->type != Virtual) {
                                err = "symbol after $$ must be virtual";
                                goto abort;
+                       } else {
+                               p.precedence = vs->precedence;
+                               p.assoc = vs->assoc;
                        }
-                       p.precedence = vs->precedence;
-                       p.assoc = vs->assoc;
                        tk = token_next(state);
                }
                if (tk.num == TK_open) {
@@ -1337,8 +1341,12 @@ into the go to set, so the item is ineffective.
                        int to_end;
                        add_first(pr, bs+1, &LA, g, &to_end);
                        if (to_end) {
-                               struct symset ss = set_find(g, is->items.data[i]);
-                               symset_union(&LA, &ss);
+                               if (pr->line_like)
+                                       symset_add(&LA, TK_newline, 0);
+                               else {
+                                       struct symset ss = set_find(g, is->items.data[i]);
+                                       symset_union(&LA, &ss);
+                               }
                        }
                        sn = save_set(g, LA);
                        LA = set_find(g, sn);
@@ -1635,6 +1643,8 @@ it up a bit.  First the items, with production number and associativity.
                        printf(" [%d%s]", s->precedence,
                               assoc_names[s->assoc]);
                }
+               if (pr->line_like)
+                       printf(" $$NEWLINE");
                printf("\n");
        }
 
@@ -1833,7 +1843,7 @@ counted, and are reported as non-critical.  This will not affect a
                                int k;
                                for (k = 0; k < la.cnt; k++) {
                                        int pos = symset_find(&shifts, la.syms[k]);
-                                       if (pos >= 0) {
+                                       if (pos >= 0 && la.syms[k] != TK_newline) {
                                                if (symset_find(&la, TK_newline) < 0) {
                                                        printf("  State %d has SHIFT/REDUCE conflict on ", i);
                                                        cnt++;
@@ -1964,7 +1974,8 @@ The go to table is stored in a simple array of `sym` and corresponding
                short reduce_prod;
                short reduce_size;
                short reduce_sym;
-               short starts_line;
+               char starts_line;
+               char newline_only;
                short min_prefix;
        };
 
@@ -2013,13 +2024,15 @@ The go to table is stored in a simple array of `sym` and corresponding
                        }
 
                        if (prod >= 0)
-                               fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d },\n",
+                               fprintf(f, "\t[%d] = { %d, goto_%d, %d, %d, %d, %d, %d, %d },\n",
                                        i, is->go_to.cnt, i, prod,
                                        g->productions[prod]->body_size,
                                        g->productions[prod]->head->num,
-                                       is->starts_line, is->min_prefix);
+                                       is->starts_line,
+                                       g->productions[prod]->line_like,
+                                       is->min_prefix);
                        else
-                               fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, %d },\n",
+                               fprintf(f, "\t[%d] = { %d, goto_%d, -1, -1, -1, %d, 0, %d },\n",
                                        i, is->go_to.cnt, i,
                                        is->starts_line, is->min_prefix);
                }
@@ -2851,7 +2864,13 @@ checks if a given token is in any of these look-ahead sets.
                                continue;
                        }
                force_reduce:
-                       if (states[tos->state].reduce_prod >= 0) {
+                       if (states[tos->state].reduce_prod >= 0 &&
+                           states[tos->state].newline_only &&
+                           tk->num != TK_newline && tk->num != TK_eof && tk->num != TK_out) {
+                               /* Anything other than newline in an error as this
+                                * production must end at EOL
+                                */
+                       } else if (states[tos->state].reduce_prod >= 0) {
                                void **body;
                                void *res;
                                const struct state *nextstate = &states[tos->state];