$*statement
Newlines -> NEWLINE
| Newlines NEWLINE
- Statementlist -> Statements ${ $0 = $<1; }$
- | Newlines Statements ${ $0 = $<2; }$
+ Statementlist -> Statements ${ $0 = $<S; }$
+ | Newlines Statements ${ $0 = $<S1; }$
Statements -> Statements Statement ${
{
| Newlines {
Close -> }
| Newlines }
- Block -> Open Statementlist Close ${ $0 = $<2; }$
- | Open SimpleStatements } ${ $0 = $<2; }$
- | : SimpleStatements ${ $0 = $<2; }$
- | : StatementBlock ${ $0 = $<2; }$
- StatementBlock -> Statementlist $$OUT ${ $0 = $<1; }$
+ Block -> Open Statementlist Close ${ $0 = $<S; }$
+ | Open SimpleStatements } ${ $0 = $<S; }$
+ | : SimpleStatements ${ $0 = $<SS; }$
+ | : StatementBlock ${ $0 = $<SB; }$
+ StatementBlock -> Statementlist $$OUT ${ $0 = $<Sl; }$
SimpleStatements -> SimpleStatements ; SimpleStatement ${
{
struct statement **s;
- $0 = $<1;
+ $0 = $<SSs;
s = &$0;
while (*s)
s = &(*s)->next;
- *s = $<3;
+ *s = $<SS;
}
}$
| SimpleStatement ${ $0 = $<1; }$
at the end of a line.
Text in the code fragment will undergo substitutions where `$N` or
-`$<N`,for some numeric `N`, will be replaced with a variable holding the
-parse information for the particular symbol in the production. `$0` is
-the head of the production, `$1` is the first symbol of the body, etc.
-The type of `$N` for a terminal symbol is `struct token`. For a
-non-terminal, it is whatever has been declared for that symbol. The `<`
-may be included and means that the value (usually a reference) is being
-moved out, so it will not automatically be freed. The effect of using
-'<' is that the variable is cleareed to all-zeros.
+`$<N`,for some numeric `N` (or non-numeric indicator as described
+later), will be replaced with a variable holding the parse information
+for the particular symbol in the production. `$0` is the head of the
+production, `$1` is the first symbol of the body, etc. The type of `$N`
+for a terminal symbol is `struct token`. For a non-terminal, it is
+whatever has been declared for that symbol. The `<` may be included and
+means that the value (usually a reference) is being moved out, so it
+will not automatically be freed. The effect of using '<' is that the
+variable is cleareed to all-zeros.
Symbols that are left-recursive are a little special. These are symbols
that both the head of a production and the first body symbol of the same
equivalent to assigning `NULL` to the pointer or filling a structure
with zeros.
+Instead of a number `N`, the `$` or `$<` can be followed by some letters
+and possibly a number. A number by itself (other than zero) selects a
+symbol from the body of the production. A sequence of letters selects
+the shortest symbol in the body which contains those letters in the given
+order. If a number follows the letters, then a later occurrence of
+that symbol is chosen. So "`$AB2`" will refer to the structure attached
+to the second occurrence of the shortest symbol which contains an `A`
+followed by a `B`. If there is no unique shortest system, or if the
+number given is too large, then the symbol reference is not transformed,
+and will cause an error when the code is compiled.
+
###### functions
+ static int textchr(struct text t, char c, int s)
+ {
+ int i;
+ for (i = s; i < t.len; i++)
+ if (t.txt[i] == c)
+ return i;
+ return -1;
+ }
+
+ static int subseq_match(char *seq, int slen, struct text name)
+ {
+ int st = 0;
+ while (slen > 0) {
+ st = textchr(name, *seq, st);
+ if (st < 0)
+ return 0;
+ slen -= 1;
+ seq += 1;
+ st += 1;
+ }
+ return 1;
+ }
+
+ static int choose_sym(char **namep, int len, struct production *p)
+ {
+ char *name = *namep;
+ char *nam = name;
+ int namlen;
+ int n = 0;
+ int i, s, slen;
+ char c;
+
+ c = *name;
+ while (len > 0 &&
+ ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
+ name += 1;
+ len -= 1;
+ c = *name;
+ }
+ namlen = name-nam;
+ while (len > 0 && (c >= '0' && c <= '9')) {
+ name += 1;
+ len -= 1;
+ n = n * 10 + (c - '0');
+ c = *name;
+ }
+ if (namlen == 0) {
+ if (name == *namep)
+ return -1;
+ *namep = name;
+ return n;
+ }
+ slen = 0; s = -1;
+ for (i = 0; i < p->body_size; i++) {
+ if (!subseq_match(nam, namlen, p->body[i]->name))
+ continue;
+ if (slen == 0 || p->body[i]->name.len < slen)
+ s = i;
+ if (s >= 0 && p->body[i] != p->body[s] &&
+ p->body[i]->name.len == p->body[s]->name.len)
+ /* not unique, so s cannot be used */
+ s = -1;
+ }
+ if (s < 0)
+ return -1;
+ if (n == 0);
+ n = 1;
+ for (i = 0; i < p->body_size; i++)
+ if (p->body[i] == p->body[s]) {
+ n -= 1;
+ if (n == 0)
+ break;
+ }
+ if (n > 1)
+ return -1;
+ *namep = name;
+ return i + 1;
+ }
+
static void gen_code(struct production *p, FILE *f, struct grammar *g)
{
char *c;
use = 1;
c++;
}
- if (*c < '0' || *c > '9') {
+ n = choose_sym(&c, p->code.txt + p->code.len - c, p);
+ if (n < 0) {
+ fputc('$', f);
if (use)
fputc('<', f);
fputc(*c, f);
continue;
}
- n = *c - '0';
- while (c[1] >= '0' && c[1] <= '9') {
- c += 1;
- n = n * 10 + *c - '0';
- }
if (n == 0)
fprintf(f, "(*(struct %.*s*%s)ret)",
p->head->struct_name.len,
p->head->struct_name.txt,
p->head->isref ? "*":"");
- else if (n > p->body_size)
- fprintf(f, "$%d", n);
else if (p->body[n-1]->type == Terminal)
fprintf(f, "(*(struct token *)body[%d])",
n-1);
p->body[n-1]->isref ? "*":"", n-1);
used[n-1] = use;
}
+ c -= 1;
}
fputs("\n", f);
for (i = 0; i < p->body_size; i++) {