]> ocean-lang.org Git - ocean/blobdiff - csrc/parsergen.mdc
parsergen: work-around for indent parsing problem.
[ocean] / csrc / parsergen.mdc
index b3d1cf638b3d47a4ff84add7b05c89eef59d86de..b94bd3bbc8ff823180d0030a07191c82f8f72462 100644 (file)
@@ -201,18 +201,6 @@ symbol, but its type might be `Unknown`.
        int num_syms;
 
 ###### functions
-       static int text_cmp(struct text a, struct text b)
-       {
-               int len = a.len;
-               if (a.len > b.len)
-                       len = b.len;
-               int cmp = strncmp(a.txt, b.txt, len);
-               if (cmp)
-                       return cmp;
-               else
-                       return a.len - b.len;
-       }
-
        static struct symbol *sym_find(struct grammar *g, struct text s)
        {
                struct symbol **l = &g->syms;
@@ -552,8 +540,13 @@ where `START` is the first non-terminal given.
        struct production *p = calloc(1,sizeof(*p));
        struct text start = {"$start",6};
        struct text eof = {"$eof",4};
+       struct text code = {"$0 = $<1;", 9};
        p->head = sym_find(g, start);
        p->head->type = Nonterminal;
+       p->head->struct_name = g->current_type;
+       p->head->isref = g->type_isref;
+       if (g->current_type.txt)
+               p->code = code;
        array_add(&p->body, &p->body_size, head);
        array_add(&p->body, &p->body_size, sym_find(g, eof));
        p->head->first_production = g->production_count;
@@ -1814,7 +1807,7 @@ pieces of code provided in the grammar file, so they are generated first.
                fprintf(f, "\tconfig->known_count = sizeof(known)/sizeof(known[0]);\n");
                fprintf(f, "\tconfig->ignored |= (1 << TK_line_comment) | (1 << TK_block_comment);\n");
                fprintf(f, "\ttokens = token_open(code, config);\n");
-               fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config->known_count);\n");
+               fprintf(f, "\tvoid *rv = parser_run(tokens, states, do_reduce, do_free, trace, non_term, config);\n");
                fprintf(f, "\ttoken_close(tokens);\n");
                fprintf(f, "\treturn rv;\n");
                fprintf(f, "}\n\n");
@@ -1962,6 +1955,11 @@ This code needs to be able to store data somewhere.  Rather than requiring
 `do_reduce` to `malloc` that "somewhere", we pass in a large buffer and have
 `do_reduce` return the size to be saved.
 
+In order for the code to access "global" context, we pass in the
+"config" pointer that was passed to parser function.  If the `struct
+token_config` is embedded in some larger structure, the reducing code
+can access the larger structure using pointer manipulation.
+
 The code fragment requires translation when written out.  Any `$N` needs to
 be converted to a reference either to that buffer (if `$0`) or to the
 structure returned by a previous reduction.  These pointers need to be cast
@@ -2044,7 +2042,7 @@ automatically freed.  This is equivalent to assigning `NULL` to the pointer.
        {
                int i;
                fprintf(f, "#line 0 \"gen_reduce\"\n");
-               fprintf(f, "static int do_reduce(int prod, void **body, void *ret)\n");
+               fprintf(f, "static int do_reduce(int prod, void **body, struct token_config *config, void *ret)\n");
                fprintf(f, "{\n");
                fprintf(f, "\tint ret_size = 0;\n");
 
@@ -2103,11 +2101,12 @@ appropriate for tokens on any terminal symbol.
                                continue;
 
                        fprintf(f, "\tcase %d:\n", s->num);
-                       if (s->isref)
+                       if (s->isref) {
                                fprintf(f, "\t\tfree_%.*s(*(void**)asn);\n",
                                        s->struct_name.len,
                                        s->struct_name.txt);
-                       else
+                               fprintf(f, "\t\tfree(asn);\n");
+                       } else
                                fprintf(f, "\t\tfree_%.*s(asn);\n",
                                        s->struct_name.len,
                                        s->struct_name.txt);
@@ -2572,14 +2571,15 @@ since the last state which could have been at the start of a line.
 ###### parser_run
        void *parser_run(struct token_state *tokens,
                         const struct state states[],
-                        int (*do_reduce)(int, void**, void*),
+                        int (*do_reduce)(int, void**, struct token_config*, void*),
                         void (*do_free)(short, void*),
-                        FILE *trace, const char *non_term[], int knowns)
+                        FILE *trace, const char *non_term[],
+                        struct token_config *config)
        {
                struct parser p = { 0 };
                struct token *tk = NULL;
                int accepted = 0;
-               void *ret;
+               void *ret = NULL;
 
                p.next.newline_permitted = states[0].starts_line;
                while (!accepted) {
@@ -2588,7 +2588,7 @@ since the last state which could have been at the start of a line.
                                tk = tok_copy(token_next(tokens));
                        p.next.sym = tk->num;
                        if (trace)
-                               parser_trace(trace, &p, tk, states, non_term, knowns);
+                               parser_trace(trace, &p, tk, states, non_term, config->known_count);
 
                        if (p.next.sym == TK_in) {
                                p.next.starts_indented = 1;
@@ -2600,7 +2600,7 @@ since the last state which could have been at the start of a line.
                        if (p.next.sym == TK_out) {
                                if (p.stack[p.tos-1].indents > p.stack[p.tos-1].starts_indented ||
                                    (p.stack[p.tos-1].indents == 1 &&
-                                    states[p.next.state].reduce_size > 1)) {
+                                    states[p.next.state].reduce_size != 1)) {
                                        p.stack[p.tos-1].indents -= 1;
                                        if (p.stack[p.tos-1].indents == p.stack[p.tos-1].starts_indented) {
                                                // no internal indent any more, reassess 'newline_permitted'
@@ -2629,6 +2629,7 @@ since the last state which could have been at the start of a line.
                        }
                        if (states[p.next.state].reduce_prod >= 0) {
                                void **body;
+                               void *res;
                                int prod = states[p.next.state].reduce_prod;
                                int size = states[p.next.state].reduce_size;
                                int bufsize;
@@ -2638,12 +2639,16 @@ since the last state which could have been at the start of a line.
                                body = p.asn_stack +
                                        (p.tos - states[p.next.state].reduce_size);
 
-                               bufsize = do_reduce(prod, body, buf);
+                               bufsize = do_reduce(prod, body, config, buf);
 
                                pop(&p, size, do_free);
-                               shift(&p, memdup(buf, bufsize), states);
-                               if (prod == 0)
+                               res = memdup(buf, bufsize);
+                               memset(buf, 0, bufsize);
+                               if (!shift(&p, res, states)) {
+                                       if (prod != 0) abort();
                                        accepted = 1;
+                                       ret = res;
+                               }
                                continue;
                        }
                        if (tk->num == TK_out) {
@@ -2690,10 +2695,7 @@ since the last state which could have been at the start of a line.
                                break;
                }
                free(tk);
-               if (accepted)
-                       ret = p.asn_stack[0];
-               else
-                       pop(&p, p.tos, do_free);
+               pop(&p, p.tos, do_free);
                free(p.asn_stack);
                free(p.stack);
                return ret;
@@ -2702,9 +2704,10 @@ since the last state which could have been at the start of a line.
 ###### exported functions
        void *parser_run(struct token_state *tokens,
                         const struct state states[],
-                        int (*do_reduce)(int, void**, void*),
+                        int (*do_reduce)(int, void**, struct token_config*, void*),
                         void (*do_free)(short, void*),
-                        FILE *trace, const char *non_term[], int knowns);
+                        FILE *trace, const char *non_term[],
+                        struct token_config *config);
 
 ### Tracing
 
@@ -2839,6 +2842,12 @@ an error.
                        .word_cont = "",
                };
                parse_calc(s->code, &config, argc > 2 ? stderr : NULL);
+               while (s) {
+                       struct section *t = s->next;
+                       code_free(s->code);
+                       free(s);
+                       s = t;
+               }
                exit(0);
        }