From 57ba38b1b121bc615ea1bbafcb0ac04ae8d4f99f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 11 May 2014 15:30:53 +1000 Subject: [PATCH] parsergen: allow pointers as well as struct to be associated with nonterminals. This makes it a lot easier when building up an AST. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 68 +++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index 8193965..19d1b5a 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -81,9 +81,10 @@ declarations, and data type declarations. These are all parsed with _ad hoc_ parsing as we don't have a parser generator yet. The precedence and associativity can be set for each production, but -can be inherited from symbols. The data type is potentially defined -for each non-terminal and describes what C structure is used to store -information about each symbol. +can be inherited from symbols. The data type (either structure or a +reference to a structure) is potentially defined for each non-terminal +and describes what C structure is used to store information about each +symbol. ###### declarations enum assoc {Left, Right, Non}; @@ -91,6 +92,7 @@ information about each symbol. struct symbol { struct text struct_name; + int isref; enum assoc assoc; unsigned short precedence; ## symbol fields @@ -256,14 +258,17 @@ word is `LEFT`, `RIGHT` or `NON`, then the line specifies a precedence, otherwise it specifies a data type. The data type name is simply stored and applied to the head of all -subsequent productions. It must be the name of a structure, so `$expr` -maps to `struct expr`. - -Any productions given before the first data type will have no data type -and can carry no information. In order to allow other non-terminals to -have no type, the data type `$void` can be given. This does *not* mean -that `struct void` will be used, but rather than no type will be -associated with future non-terminals. +subsequent productions. It must be the name of a structure optionally +preceded by an asterisk which means a reference or "pointer". So +`$expression` maps to `struct expression` and `$*statement` maps to +`struct statement *`. + +Any productions given before the first data type declaration will have +no data type associated with them and can carry no information. In +order to allow other non-terminals to have no type, the data type +`$void` can be given. This does *not* mean that `struct void` will be +used, but rather than no type will be associated with future +non-terminals. The precedence line must contain a list of symbols - typically terminal symbols, but not necessarily. It can only contain symbols @@ -286,6 +291,7 @@ production inherits from the last symbol which has a precedence. ###### grammar fields struct text current_type; + int type_isref; int prec_levels; ###### declarations @@ -293,7 +299,7 @@ production inherits from the last symbol which has a precedence. static const char *known[] = { "$$", "${", "}$" }; ###### functions - static char *dollar_line(struct token_state *ts, struct grammar *g) + static char *dollar_line(struct token_state *ts, struct grammar *g, int isref) { struct token t = token_next(ts); char *err; @@ -312,6 +318,7 @@ production inherits from the last symbol which has a precedence. assoc = Non; else { g->current_type = t.txt; + g->type_isref = isref; if (text_is(t.txt, "void")) g->current_type.txt = NULL; t = token_next(ts); @@ -322,6 +329,11 @@ production inherits from the last symbol which has a precedence. return NULL; } + if (isref) { + err = "$* cannot be followed by a precedence"; + goto abort; + } + // This is a precedence line, need some symbols. found = 0; g->prec_levels += 1; @@ -587,6 +599,7 @@ Now we are ready to read in the grammar. else { head->type = Nonterminal; head->struct_name = g->current_type; + head->isref = g->type_isref; if (g->production_count == 0) { ## create production zero } @@ -607,7 +620,10 @@ Now we are ready to read in the grammar. err = "First production must have a head"; } else if (tk.num == TK_mark && text_is(tk.txt, "$")) { - err = dollar_line(state, g); + err = dollar_line(state, g, 0); + } else if (tk.num == TK_mark + && text_is(tk.txt, "$*")) { + err = dollar_line(state, g, 1); } else { err = "Unrecognised token at start of line."; } @@ -1973,9 +1989,10 @@ to the appropriate type for each access. All this is handling in n = n * 10 + *c - '0'; } if (n == 0) - fprintf(f, "(*(struct %.*s*)ret)", + fprintf(f, "(*(struct %.*s*%s)ret)", p->head->struct_name.len, - p->head->struct_name.txt); + p->head->struct_name.txt, + p->head->isref ? "*":""); else if (n > p->body_size) fprintf(f, "$%d", n); else if (p->body[n-1]->type == Terminal) @@ -1984,9 +2001,10 @@ to the appropriate type for each access. All this is handling in else if (p->body[n-1]->struct_name.txt == NULL) fprintf(f, "$%d", n); else - fprintf(f, "(*(struct %.*s*)body[%d])", + fprintf(f, "(*(struct %.*s*%s)body[%d])", p->body[n-1]->struct_name.len, - p->body[n-1]->struct_name.txt, n-1); + p->body[n-1]->struct_name.txt, + p->body[n-1]->isref ? "*":"", n-1); } fputs("\n", f); } @@ -2010,9 +2028,10 @@ to the appropriate type for each access. All this is handling in gen_code(p, f, g); if (p->head->struct_name.txt) - fprintf(f, "\t\tret_size = sizeof(struct %.*s);\n", + fprintf(f, "\t\tret_size = sizeof(struct %.*s%s);\n", p->head->struct_name.len, - p->head->struct_name.txt); + p->head->struct_name.txt, + p->head->isref ? "*":""); fprintf(f, "\t\tbreak;\n"); } @@ -2055,9 +2074,14 @@ appropriate for tokens` on any terminal symbol. continue; fprintf(f, "\tcase %d:\n", s->num); - fprintf(f, "\t\tfree_%.*s(asn);\n", - s->struct_name.len, - s->struct_name.txt); + if (s->isref) + fprintf(f, "\t\tfree_%.*s(*(void**)asn);\n", + s->struct_name.len, + s->struct_name.txt); + else + fprintf(f, "\t\tfree_%.*s(asn);\n", + s->struct_name.len, + s->struct_name.txt); fprintf(f, "\t\tbreak;\n"); } fprintf(f, "\t}\n}\n\n"); -- 2.43.0