at the end of a line.
Text in the code fragment will undergo substitutions where `$N` or
-`$<N`,for some numeric `N`, will be replaced with a variable holding
-the parse information for the particular symbol in the production.
-`$0` is the head of the production, `$1` is the first symbol of the
-body, etc. The type of `$N` for a terminal symbol is `struct token`.
-For a non-terminal, it is whatever has been declared for that symbol.
-The `<` may be included for symbols declared as storing a reference
-(not a structure) and means that the reference is being moved out, so
-it will not automatically be freed.
+`$<N`,for some numeric `N` (or non-numeric indicator as described
+later), will be replaced with a variable holding the parse information
+for the particular symbol in the production. `$0` is the head of the
+production, `$1` is the first symbol of the body, etc. The type of `$N`
+for a terminal symbol is `struct token`. For a non-terminal, it is
+whatever has been declared for that symbol. The `<` may be included and
+means that the value (usually a reference) is being moved out, so it
+will not automatically be freed. The effect of using '<' is that the
+variable is cleareed to all-zeros.
Symbols that are left-recursive are a little special. These are symbols
that both the head of a production and the first body symbol of the same
the first "0".
###### declarations
- static inline unsigned short item_num(int production, int index)
+ static inline unsigned short item_num(int production, int dot)
{
- return production | ((31-index) << 11);
+ return production | ((31-dot) << 11);
}
static inline int item_prod(unsigned short item)
{
return item & 0x7ff;
}
- static inline int item_index(unsigned short item)
+ static inline int item_dot(unsigned short item)
{
return (31-(item >> 11)) & 0x1f;
}
for (i = 0;
i < a.cnt && i < b.cnt &&
- item_index(a.syms[i]) > 0 &&
- item_index(b.syms[i]) > 0;
+ item_dot(a.syms[i]) > 0 &&
+ item_dot(b.syms[i]) > 0;
i++) {
int diff = a.syms[i] - b.syms[i];
if (diff)
return diff;
}
}
- if (i == a.cnt || item_index(a.syms[i]) == 0)
+ if (i == a.cnt || item_dot(a.syms[i]) == 0)
av = -1;
else
av = a.syms[i];
- if (i == b.cnt || item_index(b.syms[i]) == 0)
+ if (i == b.cnt || item_dot(b.syms[i]) == 0)
bv = -1;
else
bv = b.syms[i];
###### complete itemset
for (i = 0; i < is->items.cnt; i++) {
int p = item_prod(is->items.syms[i]);
- int bs = item_index(is->items.syms[i]);
+ int bs = item_dot(is->items.syms[i]);
struct production *pr = g->productions[p];
int p2;
struct symbol *s;
for (j = 0; j < is->items.cnt; j++) {
int itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
unsigned short la = 0;
int pos;
static void report_item(struct grammar *g, int itm)
{
int p = item_prod(itm);
- int dot = item_index(itm);
+ int dot = item_dot(itm);
struct production *pr = g->productions[p];
int i;
for (j = 0; j < is->items.cnt; j++) {
int itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
if (bp == pr->body_size) {
for (j = 0; j < is->items.cnt; j++) {
unsigned short itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
struct symbol *s;
for (j = 0; j < is->items.cnt; j++) {
unsigned short itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
if (bp < pr->body_size)
for (j = 0; j < is->items.cnt; j++) {
int itm = is->items.syms[j];
int p = item_prod(itm);
- int bp = item_index(itm);
+ int bp = item_dot(itm);
struct production *pr = g->productions[p];
if (bp < pr->body_size)
to the appropriate type for each access. All this is handled in
`gen_code`.
-`gen_code` also allows symbol references to contain a '`<`' as in '`$<2`'.
-This applied only to symbols with references (or pointers), not those with structures.
-The `<` implies that the reference it being moved out, so the object will not be
-automatically freed. This is equivalent to assigning `NULL` to the pointer.
+`gen_code` also allows symbol references to contain a '`<`' as in
+'`$<2`'. This is particularly useful for references (or pointers), but
+can be used with structures too. The `<` implies that the value it
+being moved out, so the object will not be automatically freed. It is
+equivalent to assigning `NULL` to the pointer or filling a structure
+with zeros.
+
+Instead of a number `N`, the `$` or `$<` can be followed by some letters
+and possibly a number. A number by itself (other than zero) selects a
+symbol from the body of the production. A sequence of letters selects
+the shortest symbol in the body which contains those letters in the given
+order. If a number follows the letters, then a later occurrence of
+that symbol is chosen. So "`$AB2`" will refer to the structure attached
+to the second occurrence of the shortest symbol which contains an `A`
+followed by a `B`. If there is no unique shortest system, or if the
+number given is too large, then the symbol reference is not transformed,
+and will cause an error when the code is compiled.
###### functions
+ static int textchr(struct text t, char c, int s)
+ {
+ int i;
+ for (i = s; i < t.len; i++)
+ if (t.txt[i] == c)
+ return i;
+ return -1;
+ }
+
+ static int subseq_match(char *seq, int slen, struct text name)
+ {
+ int st = 0;
+ while (slen > 0) {
+ st = textchr(name, *seq, st);
+ if (st < 0)
+ return 0;
+ slen -= 1;
+ seq += 1;
+ st += 1;
+ }
+ return 1;
+ }
+
+ static int choose_sym(char **namep, int len, struct production *p)
+ {
+ char *name = *namep;
+ char *nam = name;
+ int namlen;
+ int n = 0;
+ int i, s, slen;
+ char c;
+
+ c = *name;
+ while (len > 0 &&
+ ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
+ name += 1;
+ len -= 1;
+ c = *name;
+ }
+ namlen = name-nam;
+ while (len > 0 && (c >= '0' && c <= '9')) {
+ name += 1;
+ len -= 1;
+ n = n * 10 + (c - '0');
+ c = *name;
+ }
+ if (namlen == 0) {
+ if (name == *namep)
+ return -1;
+ *namep = name;
+ return n;
+ }
+ slen = 0; s = -1;
+ for (i = 0; i < p->body_size; i++) {
+ if (!subseq_match(nam, namlen, p->body[i]->name))
+ continue;
+ if (slen == 0 || p->body[i]->name.len < slen)
+ s = i;
+ if (s >= 0 && p->body[i] != p->body[s] &&
+ p->body[i]->name.len == p->body[s]->name.len)
+ /* not unique, so s cannot be used */
+ s = -1;
+ }
+ if (s < 0)
+ return -1;
+ if (n == 0);
+ n = 1;
+ for (i = 0; i < p->body_size; i++)
+ if (p->body[i] == p->body[s]) {
+ n -= 1;
+ if (n == 0)
+ break;
+ }
+ if (n > 1)
+ return -1;
+ *namep = name;
+ return i + 1;
+ }
+
static void gen_code(struct production *p, FILE *f, struct grammar *g)
{
char *c;
use = 1;
c++;
}
- if (*c < '0' || *c > '9') {
+ n = choose_sym(&c, p->code.txt + p->code.len - c, p);
+ if (n < 0) {
+ fputc('$', f);
if (use)
fputc('<', f);
fputc(*c, f);
continue;
}
- n = *c - '0';
- while (c[1] >= '0' && c[1] <= '9') {
- c += 1;
- n = n * 10 + *c - '0';
- }
if (n == 0)
fprintf(f, "(*(struct %.*s*%s)ret)",
p->head->struct_name.len,
p->head->struct_name.txt,
p->head->isref ? "*":"");
- else if (n > p->body_size)
- fprintf(f, "$%d", n);
else if (p->body[n-1]->type == Terminal)
fprintf(f, "(*(struct token *)body[%d])",
n-1);
p->body[n-1]->isref ? "*":"", n-1);
used[n-1] = use;
}
+ c -= 1;
}
fputs("\n", f);
for (i = 0; i < p->body_size; i++) {