X-Git-Url: https://ocean-lang.org/code/?p=ocean;a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=e74054d7996a883d59c2907e9f17ea04164e38e7;hp=a140e37f8c000eac4cb94278d1528c309da88e2b;hb=abd08191347dfb83ac7e1aacf6a6aa3f8f796fcd;hpb=0b4b292a5e6744bcf7f35e012466c969c115c1b5 diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index a140e37..e74054d 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -105,7 +105,6 @@ symbol. struct production { unsigned short precedence; enum assoc assoc; - char line_like; ## production fields }; struct grammar { @@ -527,11 +526,7 @@ Now we have all the bits we need to parse a full production. goto abort; } vs = sym_find(g, tk.txt); - if (vs->num == TK_newline) - p.line_like = 1; - else if (vs->num == TK_out) - p.line_like = 2; - else if (vs->precedence == 0) { + if (vs->precedence == 0) { err = "symbol after $$ must have precedence"; goto abort; } else { @@ -940,54 +935,6 @@ changes happen. } } -### Setting `line_like` - -In order to be able to ignore newline tokens when not relevant, but -still include them in the parse when needed, we will need to know -which states can start a "line-like" section of code. We ignore -newlines when there is an indent since the most recent start of a -line-like symbol. - -A "line_like" symbol is simply any symbol that can derive a NEWLINE. -If a symbol cannot derive a NEWLINE, then it is only part of a line - -so is word-like. If it can derive a NEWLINE, then we consider it to -be like a line. - -Clearly the `TK_newline` token can derive a NEWLINE. Any symbol which -is the head of a production that contains a line_like symbol is also a -line-like symbol. We use a new field `line_like` to record this -attribute of symbols, and compute it in a repetitive manner similar to -`set_nullable`. - -###### symbol fields - int line_like; - -###### functions - static void set_line_like(struct grammar *g) - { - int check_again = 1; - g->symtab[TK_newline]->line_like = 1; - while (check_again) { - int p; - check_again = 0; - for (p = 0; p < g->production_count; p++) { - struct production *pr = g->productions[p]; - int s; - - if (pr->head->line_like) - continue; - - for (s = 0 ; s < pr->body_size; s++) { - if (pr->body[s]->line_like) { - pr->head->line_like = 1; - check_again = 1; - break; - } - } - } - } - } - ### Building the `first` sets When calculating what can follow a particular non-terminal, we will need @@ -1230,11 +1177,6 @@ particularly for LALR where itemsets get merged, at which point they need to be consider for completion again. So a `completed` flag is needed. -For correct handling of `TK_newline` when parsing, we will need to -know which states (itemsets) can occur at the start of a line, so we -will record a `starts_line` flag too whenever DOT is at the start of a -`line_like` symbol. - Finally, for handling `TK_out` we need to know whether productions in the current state started *before* the most recent indent. A state doesn't usually keep details of individual productions, so we need to @@ -1357,9 +1299,7 @@ may be supplemented by the LA set for the item which produced the new item. We also collect a set of all symbols which follow "DOT" (in `done`) as -this is used in the next stage. If any of these symbols are flagged as -`line_like`, then this state must be a `starts_line` state so now is a -good time to record that. +this is used in the next stage. When itemsets are created we assign a precedence to the itemset from the complete item, if there is one. We ignore the possibility of there @@ -1380,8 +1320,6 @@ so the item is ineffective. struct symbol *s; struct symset LA = INIT_SYMSET; unsigned short sn = 0; - struct symset LAnl = INIT_SYMSET; - unsigned short snnl = 0; if (is->min_prefix == 0 || (bs > 0 && bs < is->min_prefix)) @@ -1401,13 +1339,11 @@ so the item is ineffective. * not Right-associative, so we mustn't shift it. */ continue; - if (symset_find(&done, s->num) < 0) { + if (symset_find(&done, s->num) < 0) symset_add(&done, s->num, 0); - } + if (s->type != Nonterminal) continue; - if (s->line_like) - is->starts_line = 1; check_again = 1; if (type >= LALR) { // Need the LA set. @@ -1419,10 +1355,6 @@ so the item is ineffective. } sn = save_set(g, LA); LA = set_find(g, sn); - if (symset_find(&LA, TK_newline)) - symset_add(&LAnl, TK_newline, 0); - snnl = save_set(g, LAnl); - LAnl = set_find(g, snnl); } /* Add productions for this symbol */ @@ -1433,10 +1365,7 @@ so the item is ineffective. int itm = item_num(p2, 0); int pos = symset_find(&is->items, itm); if (pos < 0) { - if (g->productions[p2]->line_like) - symset_add(&is->items, itm, snnl); - else - symset_add(&is->items, itm, sn); + symset_add(&is->items, itm, sn); /* Will have re-ordered, so start * from beginning again */ i = -1; @@ -1445,8 +1374,6 @@ so the item is ineffective. struct symset tmp = INIT_SYMSET; struct symset *la = &LA; - if (g->productions[p2]->line_like) - la = &LAnl; symset_union(&tmp, &ss); if (symset_union(&tmp, la)) { is->items.data[pos] = save_set(g, tmp); @@ -1607,7 +1534,6 @@ and we record the changeover point in `first_nonterm`. g->symtab[s->num] = s; set_nullable(g); - set_line_like(g); if (type >= SLR) build_first(g); @@ -1639,7 +1565,7 @@ all the tables that have been generated, plus a description of any conflicts. Firstly we have the complete list of symbols, together with the "FIRST" set if that was generated. We add a mark to each symbol to -show if it is considered to be "line-like" (`<`), or if it is nullable (`.`). +show if it is nullable (`.`). ###### functions @@ -1656,9 +1582,8 @@ show if it is considered to be "line-like" (`<`), or if it is nullable (`.`). if (!s) continue; - printf(" %c%c%3d%c: ", + printf(" %c%3d%c: ", s->nullable ? '.':' ', - s->line_like ? '<':' ', s->num, symtypes[s->type]); prtxt(s->name); if (s->precedence) @@ -1729,10 +1654,6 @@ it up a bit. First the items, with production number and associativity. printf(" [%d%s]", s->precedence, assoc_names[s->assoc]); } - if (pr->line_like == 1) - printf(" $$NEWLINE"); - else if (pr->line_like) - printf(" $$OUT"); printf("\n"); } @@ -2104,7 +2025,7 @@ The go to table is stored in a simple array of `sym` and corresponding } } if (is->go_to.cnt) - fprintf(f, "\t[%d] = { %d, goto_%d, ", + fprintf(f, "\t[%d] = { %d, goto_", i, is->go_to.cnt, i); else fprintf(f, "\t[%d] = { 0, NULL, ", i); @@ -2115,7 +2036,6 @@ The go to table is stored in a simple array of `sym` and corresponding pr->body_size, pr->head->num, is->starts_line, - pr->line_like, is->min_prefix); if (hd->struct_name.txt == NULL) fprintf(f, "0 },\n"); @@ -2125,7 +2045,7 @@ The go to table is stored in a simple array of `sym` and corresponding hd->struct_name.txt, hd->isref ? "*" : ""); } else - fprintf(f, "-1, -1, -1, %d, 0, %d, -1 },\n", + fprintf(f, "-1, -1, -1, %d, %d, -1 },\n", is->starts_line, is->min_prefix); } fprintf(f, "};\n\n");