parsergen: fix some problems with choose_sym()

[ocean] / csrc / parsergen.mdc
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index f2bbe0151d75977dd2b2ae9f16031f4c171a5949..ad20195f51b8f355bc0bca3eebe021eb495106e0 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -1072,6 +1072,9 @@ and we find the set of possible "first" symbols after there.  This is
  done using `add_first` above and only needs to be done once as the
  "first" sets are now stable and will not change.
  
+###### grammar fields
+       struct symset *follow;
+
  ###### follow code
  
         for (p = 0; p < g->production_count; p++) {
@@ -1121,9 +1124,6 @@ combine these two functions into a single loop.
  We now just need to create and initialise the `follow` list to get a
  complete function.
  
-###### grammar fields
-       struct symset *follow;
-
  ###### functions
         static void build_follow(struct grammar *g)
         {
@@ -2073,8 +2073,6 @@ The go to table is stored in a simple array of `sym` and corresponding
                 }
         }
  
-###### functions
-
         static void gen_states(FILE *f, struct grammar *g)
         {
                 int i;
@@ -2206,7 +2204,7 @@ transformed, and will cause an error when the code is compiled.
                         c = *name;
                 }
                 if (namlen == 0) {
-                       if (name == *namep)
+                       if (name == *namep || n > p->body_size)
                                 return -1;
                         *namep = name;
                         return n;
@@ -2215,8 +2213,10 @@ transformed, and will cause an error when the code is compiled.
                 for (i = 0; i < p->body_size; i++) {
                         if (!subseq_match(nam, namlen, p->body[i]->name))
                                 continue;
-                       if (slen == 0 || p->body[i]->name.len < slen)
+                       if (slen == 0 || p->body[i]->name.len < slen) {
                                 s = i;
+                               slen = p->body[i]->name.len;
+                       }
                         if (s >= 0 && p->body[i] != p->body[s] &&
                             p->body[i]->name.len == p->body[s]->name.len)
                                 /* not unique, so s cannot be used */
@@ -2224,7 +2224,7 @@ transformed, and will cause an error when the code is compiled.
                 }
                 if (s < 0)
                         return -1;
-               if (n == 0);
+               if (n == 0)
                         n = 1;
                 for (i = 0; i < p->body_size; i++)
                         if (p->body[i] == p->body[s]) {
@@ -2232,7 +2232,7 @@ transformed, and will cause an error when the code is compiled.
                                 if (n == 0)
                                         break;
                         }
-               if (n > 1)
+               if (n > 0 || i == p->body_size)
                         return -1;
                 *namep = name;
                 return i + 1;
@@ -2662,6 +2662,35 @@ table.
                         return -1;
         }
  
+### Memory allocation
+
+The `scanner` returns tokens in a local variable - we want them in allocated
+memory so they can live in the `asn_stack`.  Similarly the `asn` produced by
+a reduce is in a large buffer.  Both of these require some allocation and
+copying, hence `memdup` and `tok_copy`.
+
+###### parser includes
+       #include <memory.h>
+
+###### parser functions
+
+       void *memdup(void *m, int len)
+       {
+               void *ret;
+               if (len == 0)
+                       return NULL;
+               ret = malloc(len);
+               memcpy(ret, m, len);
+               return ret;
+       }
+
+       static struct token *tok_copy(struct token tk)
+       {
+               struct token *new = malloc(sizeof(*new));
+               *new = tk;
+               return new;
+       }
+
  ### The state stack.
  
  The core data structure for the parser is the stack.  This tracks all
@@ -2834,35 +2863,6 @@ before we `shift` the nonterminal in.
                 return indents;
         }
  
-### Memory allocation
-
-The `scanner` returns tokens in a local variable - we want them in allocated
-memory so they can live in the `asn_stack`.  Similarly the `asn` produced by
-a reduce is in a large buffer.  Both of these require some allocation and
-copying, hence `memdup` and `tokcopy`.
-
-###### parser includes
-       #include <memory.h>
-
-###### parser functions
-
-       void *memdup(void *m, int len)
-       {
-               void *ret;
-               if (len == 0)
-                       return NULL;
-               ret = malloc(len);
-               memcpy(ret, m, len);
-               return ret;
-       }
-
-       static struct token *tok_copy(struct token tk)
-       {
-               struct token *new = malloc(sizeof(*new));
-               *new = tk;
-               return new;
-       }
-
  ### The heart of the parser.
  
  Now we have the parser.  For each token we might shift it, trigger a