From d7f2c9af259a43cbdb8def0ebe8040deed480848 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 11 Oct 2020 14:49:07 +1100 Subject: [PATCH] parsergen: add more power to symbol references in generated code As well as symbol references like "$2", you can now use references with letters like "$Ss". This will find the shortest symbol in the production that contains all the given letters in the given order. There must be a unique shortest symbol. If that same symbol occurs multiple times, later instances can be given with a numeric suffix such as "$Ss2". Signed-off-by: NeilBrown --- csrc/indent_test.mdc | 18 +++---- csrc/parsergen.mdc | 119 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 112 insertions(+), 25 deletions(-) diff --git a/csrc/indent_test.mdc b/csrc/indent_test.mdc index 087df32..fddb64b 100644 --- a/csrc/indent_test.mdc +++ b/csrc/indent_test.mdc @@ -139,8 +139,8 @@ Program -> Statementlist ${ print_statement($1, 0); }$ $*statement Newlines -> NEWLINE | Newlines NEWLINE - Statementlist -> Statements ${ $0 = $<1; }$ - | Newlines Statements ${ $0 = $<2; }$ + Statementlist -> Statements ${ $0 = $ Statements Statement ${ { @@ -159,20 +159,20 @@ $*statement | Newlines { Close -> } | Newlines } - Block -> Open Statementlist Close ${ $0 = $<2; }$ - | Open SimpleStatements } ${ $0 = $<2; }$ - | : SimpleStatements ${ $0 = $<2; }$ - | : StatementBlock ${ $0 = $<2; }$ - StatementBlock -> Statementlist $$OUT ${ $0 = $<1; }$ + Block -> Open Statementlist Close ${ $0 = $ Statementlist $$OUT ${ $0 = $ SimpleStatements ; SimpleStatement ${ { struct statement **s; - $0 = $<1; + $0 = $next; - *s = $<3; + *s = $ 0) { + st = textchr(name, *seq, st); + if (st < 0) + return 0; + slen -= 1; + seq += 1; + st += 1; + } + return 1; + } + + static int choose_sym(char **namep, int len, struct production *p) + { + char *name = *namep; + char *nam = name; + int namlen; + int n = 0; + int i, s, slen; + char c; + + c = *name; + while (len > 0 && + ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { + name += 1; + len -= 1; + c = *name; + } + namlen = name-nam; + while (len > 0 && (c >= '0' && c <= '9')) { + name += 1; + len -= 1; + n = n * 10 + (c - '0'); + c = *name; + } + if (namlen == 0) { + if (name == *namep) + return -1; + *namep = name; + return n; + } + slen = 0; s = -1; + for (i = 0; i < p->body_size; i++) { + if (!subseq_match(nam, namlen, p->body[i]->name)) + continue; + if (slen == 0 || p->body[i]->name.len < slen) + s = i; + if (s >= 0 && p->body[i] != p->body[s] && + p->body[i]->name.len == p->body[s]->name.len) + /* not unique, so s cannot be used */ + s = -1; + } + if (s < 0) + return -1; + if (n == 0); + n = 1; + for (i = 0; i < p->body_size; i++) + if (p->body[i] == p->body[s]) { + n -= 1; + if (n == 0) + break; + } + if (n > 1) + return -1; + *namep = name; + return i + 1; + } + static void gen_code(struct production *p, FILE *f, struct grammar *g) { char *c; @@ -2199,24 +2290,19 @@ with zeros. use = 1; c++; } - if (*c < '0' || *c > '9') { + n = choose_sym(&c, p->code.txt + p->code.len - c, p); + if (n < 0) { + fputc('$', f); if (use) fputc('<', f); fputc(*c, f); continue; } - n = *c - '0'; - while (c[1] >= '0' && c[1] <= '9') { - c += 1; - n = n * 10 + *c - '0'; - } if (n == 0) fprintf(f, "(*(struct %.*s*%s)ret)", p->head->struct_name.len, p->head->struct_name.txt, p->head->isref ? "*":""); - else if (n > p->body_size) - fprintf(f, "$%d", n); else if (p->body[n-1]->type == Terminal) fprintf(f, "(*(struct token *)body[%d])", n-1); @@ -2229,6 +2315,7 @@ with zeros. p->body[n-1]->isref ? "*":"", n-1); used[n-1] = use; } + c -= 1; } fputs("\n", f); for (i = 0; i < p->body_size; i++) { -- 2.43.0