different token types that `scanner` can report.
###### declarations
- static char *reserved_words[] = {
- [TK_error] = "ERROR",
- [TK_number] = "NUMBER",
- [TK_ident] = "IDENTIFIER",
- [TK_mark] = "MARK",
- [TK_string] = "STRING",
- [TK_multi_string] = "MULTI_STRING",
- [TK_in] = "IN",
- [TK_out] = "OUT",
- [TK_newline] = "NEWLINE",
- [TK_eof] = "$eof",
+
+ static struct { int num; char *name; } reserved_words[] = {
+ { TK_error, "ERROR" },
+ { TK_number, "NUMBER" },
+ { TK_ident, "IDENTIFIER" },
+ { TK_mark, "MARK" },
+ { TK_string, "STRING" },
+ { TK_multi_string, "MULTI_STRING" },
+ { TK_in, "IN" },
+ { TK_out, "OUT" },
+ { TK_newline, "NEWLINE" },
+ { TK_eof, "$eof" },
};
###### symbol fields
short num;
for (i = 0; i < entries; i++) {
struct text t;
struct symbol *s;
- t.txt = reserved_words[i];
- if (!t.txt)
- continue;
+ t.txt = reserved_words[i].name;
t.len = strlen(t.txt);
s = sym_find(g, t);
s->type = Terminal;
- s->num = i;
+ s->num = reserved_words[i].num;
}
}
will not automatically be freed. The effect of using '<' is that the
variable is cleareed to all-zeros.
-Symbols that are left-recursive are a little special. These are symbols
-that both the head of a production and the first body symbol of the same
-production. They are problematic when they appear in other productions
-elsewhere than at the end, and when indenting is used to describe
-structure. In this case there is no way for a smaller indent to ensure
-the left-recursive symbol cannot be extended. When it appears at the
-end of a production, that production can be reduced to ensure the symbol
-isn't extended. So we record left-recursive symbols while reading the
-grammar, and produce a warning when reporting the grammar if they are
-found in an unsuitable place.
-
-A symbol that is only left recursive in a production where it is
-followed by newline does not cause these problems because the newline
-will effectively terminate it.
-
While building productions we will need to add to an array which needs to
grow dynamically.
###### symbol fields
int first_production;
- int left_recursive;
###### functions
static char *parse_production(struct grammar *g,
}
tk = token_next(state);
}
- if (p.body_size >= 2 &&
- p.body[0] == p.head &&
- p.body[1]->num != TK_newline)
- p.head->left_recursive = 1;
if (tk.num != TK_newline && tk.num != TK_eof) {
err = "stray tokens at end of line";
the first "0".
###### declarations
- static inline unsigned short item_num(int production, int index)
+ static inline unsigned short item_num(int production, int dot)
{
- return production | ((31-index) << 11);
+ return production | ((31-dot) << 11);
}
static inline int item_prod(unsigned short item)
{
return item & 0x7ff;
}
- static inline int item_index(unsigned short item)
+ static inline int item_dot(unsigned short item)
{
return (31-(item >> 11)) & 0x1f;
}
for (i = 0;
i < a.cnt && i < b.cnt &&
- item_index(a.syms[i]) > 0 &&
- item_index(b.syms[i]) > 0;
+ item_dot(a.syms[i]) > 0 &&
+ item_dot(b.syms[i]) > 0;
i++) {
int diff = a.syms[i] - b.syms[i];
if (diff)
return diff;
}
}
- if (i == a.cnt || item_index(a.syms[i]) == 0)
+ if (i == a.cnt || item_dot(a.syms[i]) == 0)
av = -1;
else
av = a.syms[i];
- if (i == b.cnt || item_index(b.syms[i]) == 0)
+ if (i == b.cnt || item_dot(b.syms[i]) == 0)
bv = -1;
else
bv = b.syms[i];
###### complete itemset
for (i = 0; i < is->items.cnt; i++) {
int p = item_prod(is->items.syms[i]);
- int bs = item_index(is->items.syms[i]);
+ int bs = item_dot(is->items.syms[i]);
struct production *pr = g->productions[p];
int p2;
struct symbol *s;
for (j = 0; j < is->items.cnt; j++) {
int itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
unsigned short la = 0;
int pos;
if (g->follow)
report_follow(g);
report_itemsets(g);
- return report_conflicts(g, type) + report_left_recursive(g);
+ return report_conflicts(g, type);
}
Firstly we have the complete list of symbols, together with the
static void report_item(struct grammar *g, int itm)
{
int p = item_prod(itm);
- int dot = item_index(itm);
+ int dot = item_dot(itm);
struct production *pr = g->productions[p];
int i;
for (j = 0; j < is->items.cnt; j++) {
int itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
if (bp == pr->body_size) {
for (j = 0; j < is->items.cnt; j++) {
unsigned short itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
struct symbol *s;
for (j = 0; j < is->items.cnt; j++) {
unsigned short itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
if (bp < pr->body_size)
}
-### Reporting non-final left-recursive symbols.
-
-Left recursive symbols are a problem for parses that honour indentation
-when they appear other than at the end of the production. So we need to
-report these when asked.
-
-###### functions
-
- static int report_left_recursive(struct grammar *g)
- {
- int p;
- int bad_left_recursive = 0;
-
- for (p = 0; p < g->production_count; p++) {
- struct production *pr = g->productions[p];
- int sn;
-
- for (sn = 0; sn < pr->body_size - 1; sn++) {
- struct symbol *s = pr->body[sn];
-
- if (s->left_recursive == 1 &&
- s != pr->head) {
- if (!bad_left_recursive) {
- bad_left_recursive = 1;
- printf("Misplaced left recursive symbols.\n");
- }
- printf(" ");
- prtxt(s->name);
- printf(" in production [%d]\n", p);
- s->left_recursive = 2;
- }
- }
- }
- return bad_left_recursive;
- }
-
## Generating the parser
The exported part of the parser is the `parse_XX` function, where the name
for (j = 0; j < is->items.cnt; j++) {
int itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
if (bp < pr->body_size)
void *ret = NULL;
shift(&p, TK_eof, 0, 1, NULL, states);
- while (!accepted) {
+ while (!accepted && p.tos > 0) {
struct token *err_tk;
struct frame *tos = &p.stack[p.tos-1];
if (!tk)