X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;f=csrc%2Fparsergen.mdc;h=3f36df9a6873c3c94f2f127d8a0ca6419e50603c;hb=850a39a0a761e0af89c15253f075ecd9e9ecc6ee;hp=1981c901ac013afe1a2ab932dbc2d8742666b964;hpb=cbbbc8ec0230cf82765da33ecff7be9824006519;p=ocean diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index 1981c90..3f36df9 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -499,6 +499,8 @@ Now we have all the bits we need to parse a full production. vs = sym_find(g, tk.txt); if (vs->num == TK_newline) p.line_like = 1; + else if (vs->num == TK_out) + p.line_like = 2; else if (vs->precedence == 0) { err = "symbol after $$ must have precedence"; goto abort; @@ -634,6 +636,11 @@ to produce errors that the parser is better positioned to handle. } else if (tk.num == TK_mark && text_is(tk.txt, "$*")) { err = dollar_line(state, g, 1); + } else if (tk.num == TK_mark + && text_is(tk.txt, "//")) { + while (tk.num != TK_newline && + tk.num != TK_eof) + tk = token_next(state); } else { err = "Unrecognised token at start of line."; } @@ -1332,11 +1339,11 @@ into the go to set, so the item is ineffective. continue; if (symset_find(&done, s->num) < 0) { symset_add(&done, s->num, 0); - if (s->line_like) - is->starts_line = 1; } if (s->type != Nonterminal) continue; + if (s->line_like) + is->starts_line = 1; again = 1; if (type >= LALR) { // Need the LA set. @@ -1348,7 +1355,8 @@ into the go to set, so the item is ineffective. } sn = save_set(g, LA); LA = set_find(g, sn); - symset_add(&LAnl, TK_newline, 0); + if (symset_find(&LA, TK_newline)) + symset_add(&LAnl, TK_newline, 0); snnl = save_set(g, LAnl); LAnl = set_find(g, snnl); } @@ -1650,8 +1658,10 @@ it up a bit. First the items, with production number and associativity. printf(" [%d%s]", s->precedence, assoc_names[s->assoc]); } - if (pr->line_like) + if (pr->line_like == 1) printf(" $$NEWLINE"); + else if (pr->line_like) + printf(" $$OUT"); printf("\n"); } @@ -1790,14 +1800,10 @@ terminals to items where that terminal could be shifted and another which maps terminals to items that could be reduced when the terminal is in look-ahead. We report when we get conflicts between the two. -As a special case, if we find a SHIFT/REDUCE conflict, where a -terminal that could be shifted is in the lookahead set of some -reducable item, then set check if the reducable item also have -`TK_newline` in its lookahead set. If it does, then a newline will -force the reduction, but anything else can reasonably be shifted, so -that isn't really a conflict. Such apparent conflicts do not get -counted, and are reported as non-critical. This will not affect a -"traditional" grammar that does not include newlines as token. +As a special case, if we find a SHIFT/REDUCE conflict, on the NEWLINE +terminal, we ignore it. NEWLINES are handled specially with its own +rules for when to shift and when to reduce. Conflicts are expected, +but handled internally. static int conflicts_slr(struct grammar *g, enum grammar_type type) { @@ -1851,12 +1857,9 @@ counted, and are reported as non-critical. This will not affect a for (k = 0; k < la.cnt; k++) { int pos = symset_find(&shifts, la.syms[k]); if (pos >= 0 && la.syms[k] != TK_newline) { - if (symset_find(&la, TK_newline) < 0) { - printf(" State %d has SHIFT/REDUCE conflict on ", i); - cnt++; - } else - printf(" State %d has non-critical SHIFT/REDUCE conflict on ", i); - prtxt(g->symtab[la.syms[k]]->name); + printf(" State %d has SHIFT/REDUCE conflict on ", i); + cnt++; + prtxt(g->symtab[la.syms[k]]->name); printf(":\n"); report_item(g, shifts.data[pos]); report_item(g, itm); @@ -1913,7 +1916,6 @@ pieces of code provided in the grammar file, so they are generated first. fprintf(f, "\tstruct token_state *tokens;\n"); fprintf(f, "\tconfig->words_marks = known;\n"); fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n"); - fprintf(f, "\tconfig->ignored |= (1 << TK_line_comment) | (1 << TK_block_comment);\n"); fprintf(f, "\ttokens = token_open(code, config);\n"); fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config);\n"); fprintf(f, "\ttoken_close(tokens);\n"); @@ -2873,9 +2875,13 @@ checks if a given token is in any of these look-ahead sets. force_reduce: if (states[tos->state].reduce_prod >= 0 && states[tos->state].newline_only && - tk->num != TK_newline && tk->num != TK_eof && tk->num != TK_out) { - /* Anything other than newline in an error as this - * production must end at EOL + !(tk->num == TK_newline || + tk->num == TK_eof || + tk->num == TK_out || + (tos->indents == 0 && tos->since_newline == 0))) { + /* Anything other than newline or out or eof + * in an error unless we are already at start + * of line, as this production must end at EOL. */ } else if (states[tos->state].reduce_prod >= 0) { void **body; @@ -3027,7 +3033,7 @@ end inside square brackets. fputs(reserved_words[tk->num], trace); else text_dump(trace, tk->txt, 20); - fputs("]", trace); + fprintf(trace, ":%d:%d]", tk->line, tk->col); } void parser_trace_action(FILE *trace, char *action) @@ -3058,7 +3064,7 @@ an error. # calc: header - #include "number.h" + #include "parse_number.h" // what do we use for a demo-grammar? A calculator of course. struct number { mpq_t val; @@ -3078,7 +3084,6 @@ an error. #include #include "mdcode.h" #include "scanner.h" - #include "number.h" #include "parser.h" #include "calc.h" @@ -3104,7 +3109,6 @@ an error. struct section *s; struct token_config config = { .ignored = (1 << TK_line_comment) - | (1 << TK_block_comment) | (1 << TK_in) | (1 << TK_out), .number_chars = ".,_+-", @@ -3126,7 +3130,7 @@ an error. # calc: grammar $LEFT + - - $LEFT * / + $LEFT * / // Session -> Session Line | Line @@ -3154,6 +3158,16 @@ an error. | Expression - Expression ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$ | Expression * Expression ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$ | Expression / Expression ${ mpq_init($0.val); mpq_div($0.val, $1.val, $3.val); }$ + | Expression // Expression ${ { + mpz_t z0, z1, z2; + mpq_init($0.val); + mpz_init(z0); mpz_init(z1); mpz_init(z2); + mpz_tdiv_q(z1, mpq_numref($1.val), mpq_denref($1.val)); + mpz_tdiv_q(z2, mpq_numref($3.val), mpq_denref($3.val)); + mpz_tdiv_q(z0, z1, z2); + mpq_set_z($0.val, z0); + mpz_clear(z0); mpz_clear(z1); mpz_clear(z2); + } }$ | NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$ | ( Expression ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$ @@ -3166,4 +3180,6 @@ an error. 10 * 9 / 2 1 * 1000 + 2 * 100 + 3 * 10 + 4 * 1 + 355//113 + error