]> ocean-lang.org Git - ocean/commitdiff
parsergen: allow pointers as well as struct to be associated with nonterminals.
authorNeilBrown <neilb@suse.de>
Sun, 11 May 2014 05:30:53 +0000 (15:30 +1000)
committerNeilBrown <neilb@suse.de>
Sun, 15 Jun 2014 07:13:19 +0000 (17:13 +1000)
This makes it a lot easier when building up an AST.

Signed-off-by: NeilBrown <neilb@suse.de>
csrc/parsergen.mdc

index 819396551cf265c1a97293f018d6ff4f873df55e..19d1b5a4e94f6c894853202b8108419668663664 100644 (file)
@@ -81,9 +81,10 @@ declarations, and data type declarations.  These are all parsed with
 _ad hoc_ parsing as we don't have a parser generator yet.
 
 The precedence and associativity can be set for each production, but
-can be inherited from symbols.  The data type is potentially defined
-for each non-terminal and describes what C structure is used to store
-information about each symbol.
+can be inherited from symbols.  The data type (either structure or a
+reference to a structure) is potentially defined for each non-terminal
+and describes what C structure is used to store information about each
+symbol.
 
 ###### declarations
        enum assoc {Left, Right, Non};
@@ -91,6 +92,7 @@ information about each symbol.
 
        struct symbol {
                struct text struct_name;
+               int isref;
                enum assoc assoc;
                unsigned short precedence;
                ## symbol fields
@@ -256,14 +258,17 @@ word is `LEFT`, `RIGHT` or `NON`, then the line specifies a
 precedence, otherwise it specifies a data type.
 
 The data type name is simply stored and applied to the head of all
-subsequent productions.  It must be the name of a structure, so `$expr`
-maps to `struct expr`.
-
-Any productions given before the first data type will have no data type
-and can carry no information.  In order to allow other non-terminals to
-have no type, the data type `$void` can be given.  This does *not* mean
-that `struct void` will be used, but rather than no type will be
-associated with future non-terminals.
+subsequent productions.  It must be the name of a structure optionally
+preceded by an asterisk which means a reference or "pointer".  So
+`$expression` maps to `struct expression` and `$*statement` maps to
+`struct statement *`.
+
+Any productions given before the first data type declaration will have
+no data type associated with them and can carry no information.  In
+order to allow other non-terminals to have no type, the data type
+`$void` can be given.  This does *not* mean that `struct void` will be
+used, but rather than no type will be associated with future
+non-terminals.
 
 The precedence line must contain a list of symbols - typically
 terminal symbols, but not necessarily.  It can only contain symbols
@@ -286,6 +291,7 @@ production inherits from the last symbol which has a precedence.
 
 ###### grammar fields
        struct text current_type;
+       int type_isref;
        int prec_levels;
 
 ###### declarations
@@ -293,7 +299,7 @@ production inherits from the last symbol which has a precedence.
        static const char *known[] = { "$$", "${", "}$" };
 
 ###### functions
-       static char *dollar_line(struct token_state *ts, struct grammar *g)
+       static char *dollar_line(struct token_state *ts, struct grammar *g, int isref)
        {
                struct token t = token_next(ts);
                char *err;
@@ -312,6 +318,7 @@ production inherits from the last symbol which has a precedence.
                        assoc = Non;
                else {
                        g->current_type = t.txt;
+                       g->type_isref = isref;
                        if (text_is(t.txt, "void"))
                                g->current_type.txt = NULL;
                        t = token_next(ts);
@@ -322,6 +329,11 @@ production inherits from the last symbol which has a precedence.
                        return NULL;
                }
 
+               if (isref) {
+                       err = "$* cannot be followed by a precedence";
+                       goto abort;
+               }
+
                // This is a precedence line, need some symbols.
                found = 0;
                g->prec_levels += 1;
@@ -587,6 +599,7 @@ Now we are ready to read in the grammar.
                                else {
                                        head->type = Nonterminal;
                                        head->struct_name = g->current_type;
+                                       head->isref = g->type_isref;
                                        if (g->production_count == 0) {
                                                ## create production zero
                                        }
@@ -607,7 +620,10 @@ Now we are ready to read in the grammar.
                                        err = "First production must have a head";
                        } else if (tk.num == TK_mark
                                   && text_is(tk.txt, "$")) {
-                               err = dollar_line(state, g);
+                               err = dollar_line(state, g, 0);
+                       } else if (tk.num == TK_mark
+                                  && text_is(tk.txt, "$*")) {
+                               err = dollar_line(state, g, 1);
                        } else {
                                err = "Unrecognised token at start of line.";
                        }
@@ -1973,9 +1989,10 @@ to the appropriate type for each access.  All this is handling in
                                n = n * 10 + *c - '0';
                        }
                        if (n == 0)
-                               fprintf(f, "(*(struct %.*s*)ret)",
+                               fprintf(f, "(*(struct %.*s*%s)ret)",
                                        p->head->struct_name.len,
-                                       p->head->struct_name.txt);
+                                       p->head->struct_name.txt,
+                                       p->head->isref ? "*":"");
                        else if (n > p->body_size)
                                fprintf(f, "$%d", n);
                        else if (p->body[n-1]->type == Terminal)
@@ -1984,9 +2001,10 @@ to the appropriate type for each access.  All this is handling in
                        else if (p->body[n-1]->struct_name.txt == NULL)
                                fprintf(f, "$%d", n);
                        else
-                               fprintf(f, "(*(struct %.*s*)body[%d])",
+                               fprintf(f, "(*(struct %.*s*%s)body[%d])",
                                        p->body[n-1]->struct_name.len,
-                                       p->body[n-1]->struct_name.txt, n-1);
+                                       p->body[n-1]->struct_name.txt,
+                                       p->body[n-1]->isref ? "*":"", n-1);
                }
                fputs("\n", f);
        }
@@ -2010,9 +2028,10 @@ to the appropriate type for each access.  All this is handling in
                                gen_code(p, f, g);
 
                        if (p->head->struct_name.txt)
-                               fprintf(f, "\t\tret_size = sizeof(struct %.*s);\n",
+                               fprintf(f, "\t\tret_size = sizeof(struct %.*s%s);\n",
                                        p->head->struct_name.len,
-                                       p->head->struct_name.txt);
+                                       p->head->struct_name.txt,
+                                       p->head->isref ? "*":"");
 
                        fprintf(f, "\t\tbreak;\n");
                }
@@ -2055,9 +2074,14 @@ appropriate for tokens` on any terminal symbol.
                                continue;
 
                        fprintf(f, "\tcase %d:\n", s->num);
-                       fprintf(f, "\t\tfree_%.*s(asn);\n",
-                               s->struct_name.len,
-                               s->struct_name.txt);
+                       if (s->isref)
+                               fprintf(f, "\t\tfree_%.*s(*(void**)asn);\n",
+                                       s->struct_name.len,
+                                       s->struct_name.txt);
+                       else
+                               fprintf(f, "\t\tfree_%.*s(asn);\n",
+                                       s->struct_name.len,
+                                       s->struct_name.txt);
                        fprintf(f, "\t\tbreak;\n");
                }
                fprintf(f, "\t}\n}\n\n");