marking discussed later, and sometimes we won't know what type a symbol
is yet.
+To help with code safety it is possible to declare the terminal symbols.
+If this is done, then any symbol used in a production that does not
+appear in a head and is not declared is treated as an error.
+
###### forward declarations
enum symtype { Unknown, Virtual, Terminal, Nonterminal };
char *symtypes = "UVTN";
###### symbol fields
enum symtype type;
+###### grammar fields
+ int terminals_declared;
Symbols can be either `TK_ident` or `TK_mark`. They are saved in a
table of known symbols and the resulting parser will report them as
### Data types and precedence.
-Data type specification and precedence specification are both
-introduced by a dollar sign at the start of the line. If the next
-word is `LEFT`, `RIGHT` or `NON`, then the line specifies a
+Data type specification, precedence specification, and declaration of
+terminals are all introduced by a dollar sign at the start of the line.
+If the next word is `LEFT`, `RIGHT` or `NON`, then the line specifies a
+precedence, if it is `TERM` the the line declares terminals without
precedence, otherwise it specifies a data type.
The data type name is simply stored and applied to the head of all
struct token t = token_next(ts);
char *err;
enum assoc assoc;
+ int term = 0;
int found;
if (t.num != TK_ident) {
assoc = Right;
else if (text_is(t.txt, "NON"))
assoc = Non;
- else {
+ else if (text_is(t.txt, "TERM")) {
+ term = 1;
+ g->terminals_declared = 1;
+ } else {
g->current_type = t.txt;
g->type_isref = isref;
if (text_is(t.txt, "void"))
goto abort;
}
- // This is a precedence line, need some symbols.
+ // This is a precedence or TERM line, need some symbols.
found = 0;
g->prec_levels += 1;
t = token_next(ts);
err = "$$ must be followed by a word";
goto abort;
}
+ if (term) {
+ err = "Virtual symbols not permitted on $TERM line";
+ goto abort;
+ }
} else if (t.num != TK_ident &&
t.num != TK_mark) {
err = "Illegal token in precedence line";
}
s = sym_find(g, t.txt);
if (s->type != Unknown) {
- err = "Symbols in precedence line must not already be known.";
+ err = "Symbols in precedence/TERM line must not already be known.";
goto abort;
}
s->type = type;
- s->precedence = g->prec_levels;
- s->assoc = assoc;
+ if (!term) {
+ s->precedence = g->prec_levels;
+ s->assoc = assoc;
+ }
found += 1;
t = token_next(ts);
}
if (found == 0)
- err = "No symbols given on precedence line";
+ err = "No symbols given on precedence/TERM line";
goto abort;
return NULL;
abort:
tk = token_next(state);
while (tk.num == TK_ident || tk.num == TK_mark) {
struct symbol *bs = sym_find(g, tk.txt);
- if (bs->type == Unknown)
- bs->type = Terminal;
+ if (bs->type == Unknown) {
+ if (!g->terminals_declared)
+ bs->type = Terminal;
+ }
if (bs->type == Virtual) {
err = "Virtual symbol not permitted in production";
goto abort;
goto abort;
}
token_close(state);
+ if (g->terminals_declared) {
+ struct symbol *s;
+ int errs = 0;
+ for (s = g->syms; s; s = s->next) {
+ if (s->type != Unknown)
+ continue;
+ errs += 1;
+ fprintf(stderr, "Token %.*s not declared\n",
+ s->name.len, s->name.txt);
+ }
+ if (errs) {
+ free(g);
+ g = NULL;
+ }
+ }
return g;
abort:
fprintf(stderr, "Error at line %d: %s\n",