parsergen: various cosmetic fixes

author NeilBrown <neilb@suse.de>

Sun, 24 Nov 2013 05:53:34 +0000 (16:53 +1100)

committer NeilBrown <neilb@suse.de>

Sun, 24 Nov 2013 05:53:34 +0000 (16:53 +1100)
author NeilBrown <neilb@suse.de>
Sun, 24 Nov 2013 05:53:34 +0000 (16:53 +1100)
committer NeilBrown <neilb@suse.de>
Sun, 24 Nov 2013 05:53:34 +0000 (16:53 +1100)
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index eac325584db3fc58a67d9b0a0f9ca5bb3af7961a..7b6664d9cb250125a1f508e9c99402e9e49542cd 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -41,7 +41,7 @@ There are several distinct sections.
         #include <stdio.h>
         ## parser includes
         ## parser functions
-       ## parser
+       ## parser_run
  ###### File: calc.cgm
         ## demo grammar
  ###### File: parsergen.mk
@@ -98,7 +98,7 @@ information about each symbol.
         };
  
  The strings reported by `mdcode` and `scanner` are `struct text` which have
-length rather than being null terminated.  To help with printing an
+length rather than being null terminated.  To help with printing and
  comparing we define `text_is` and `prtxt`, which should possibly go in
  `mdcode`.  `scanner` does provide `text_dump` which is useful for strings
  which might contain control characters.
@@ -352,7 +352,7 @@ precedence of the production is that for the virtual symbol.  If none
  is given, the precedence is inherited from the last symbol in the
  production which has a precedence specified.
  
-After the optional precedence may come  the `${` mark.  This indicates
+After the optional precedence may come the `${` mark.  This indicates
  the start of a code fragment.  If present, this must be on the same
  line as the start of the production.
  
@@ -366,7 +366,7 @@ some numeric `N` will be replaced with a variable holding the parse
  information for the particular symbol in the production.  `$0` is the
  head of the production, `$1` is the first symbol of the body, etc.
  The type of `$N` for a terminal symbol is `struct token`.  For
-non-terminal, it is whatever has been declared for that symbol.
+a non-terminal, it is whatever has been declared for that symbol.
  
  While building productions we will need to add to an array which needs to
  grow dynamically.
@@ -498,15 +498,16 @@ With the ability to parse production and dollar-lines, we have nearly all
  that we need to parse a grammar from a `code_node`.
  
  The head of the first production will effectively be the `start` symbol of
-the grammar.  However it wont _actually_ be so.  Processing the grammar is
+the grammar.  However it won't _actually_ be so.  Processing the grammar is
  greatly simplified if the real start symbol only has a single production,
-and expect `$eof` as the final terminal.  So when we find the first explicit
-production we insert an extra production as production zero which looks like
+and expects `$eof` as the final terminal.  So when we find the first
+explicit production we insert an extra production as production zero which
+looks like
  
  ###### Example: production 0
         $start -> START $eof
  
-where `START` is the first non-terminal give.
+where `START` is the first non-terminal given.
  
  ###### create production zero
         struct production *p = calloc(1,sizeof(*p));
@@ -516,7 +517,7 @@ where `START` is the first non-terminal give.
         p->head->type = Nonterminal;
         array_add(&p->body, &p->body_size, head);
         array_add(&p->body, &p->body_size, sym_find(g, eof));
-       g->start  = p->head->num;
+       g->start = p->head->num;
         p->head->first_production = g->production_count;
         array_add(&g->productions, &g->production_count, p);
  
@@ -585,7 +586,7 @@ Now we are ready to read in the grammar.
                                 else
                                         err = "First production must have a head";
                         } else if (tk.num == TK_mark
-                                  &&  text_is(tk.txt, "$")) {
+                                  && text_is(tk.txt, "$")) {
                                 err = dollar_line(state, g);
                         } else {
                                 err = "Unrecognised token at start of line.";
@@ -984,7 +985,7 @@ building the itemsets and states for the LR grammar.  They are:
  1. LR(0) or SLR(1), where no look-ahead is considered.
  2. LALR(1) where we build look-ahead sets with each item and merge
     the LA sets when we find two paths to the same "kernel" set of items.
-3. LR(1) where different look-ahead for any item in the code means
+3. LR(1) where different look-ahead for any item in the set means
     a different state must be created.
  
  ###### forward declarations
@@ -1006,8 +1007,9 @@ as we want to do the lookup after generating the "kernel" of an
  itemset, so we need to ignore the offset=zero items which are added during
  completion.
  
-To facilitate this, we modify the "DOT" number so that "0" sorts to the end of
-the list in the symset, and then only compare items before the first "0".
+To facilitate this, we modify the "DOT" number so that "0" sorts to
+the end of the list in the symset, and then only compare items before
+the first "0".
  
  ###### declarations
         static inline unsigned short item_num(int production, int index)
@@ -1294,7 +1296,7 @@ with `TK_eof` as the LA set.
                         la = save_set(g, eof);
                         first = INIT_DATASET;
                 }
-               // production 0, offset 0  (with no data)
+               // production 0, offset 0 (with no data)
                 symset_add(&first, item_num(0, 0), la);
                 add_itemset(g, first, type);
                 for (again = 0, is = g->items;
@@ -1687,7 +1689,7 @@ known words added and then is used with the `code_node` to initialize the
  scanner.
  
  `parse_XX` then call the library function `parser_run` to actually complete
-the parse,  This needs the `states` table and function to call the various
+the parse.  This needs the `states` table and function to call the various
  pieces of code provided in the grammar file, so they are generated first.
  
  ###### parser_generate
@@ -2229,7 +2231,7 @@ table.
  
  ### The state stack.
  
-The core data structure for the parser is the stack.  This track all the
+The core data structure for the parser is the stack.  This tracks all the
  symbols that have been recognised or partially recognised.
  
  The stack usually won't grow very large - maybe a few tens of entries.  So
@@ -2242,7 +2244,7 @@ We keep the stack as two separate allocations.  One, `asn_stack` stores the
  production, and by keeping a separate `asn` stack, we can just pass a
  pointer into this stack.
  
-The other allocation store all other stack fields of which there are two.
+The other allocation stores all other stack fields of which there are two.
  The `state` is the most important one and guides the parsing process.  The
  `sym` is nearly unnecessary.  However when we want to free entries from the
  `asn_stack`, it helps to know what type they are so we can call the right
@@ -2264,9 +2266,9 @@ freeing function.  The symbol leads us to the right free function through
  
  #### Shift and pop
  
-The operations are needed on the stack - shift (which is like push) and pop.
+Two operations are needed on the stack - shift (which is like push) and pop.
  
-Shift applies no only to terminals but also to non-terminals.  When we
+Shift applies not only to terminals but also to non-terminals.  When we
  reduce a production we will pop off entries corresponding to the body
  symbols, then push on an item for the head of the production.  This last is
  exactly the same process as shifting in a terminal so we use the same
@@ -2306,7 +2308,7 @@ if needed and pushed all the information onto the stacks.
         }
  
  `pop` simply moves the top of stack (`tos`) back down the required amount
-and frees and `asn` entries that need to be freed.  It is called _after_ we
+and frees any `asn` entries that need to be freed.  It is called _after_ we
  reduce a production, just before we `shift` the nonterminal in.
  
  ###### parser functions
@@ -2326,7 +2328,7 @@ reduce a production, just before we `shift` the nonterminal in.
  ### Memory allocation
  
  The `scanner` returns tokens in a local variable - we want them in allocated
-memory so they can live in the `asn_stack`.  Similarly the `asn` produce by
+memory so they can live in the `asn_stack`.  Similarly the `asn` produced by
  a reduce is in a large buffer.  Both of these require some allocation and
  copying, hence `memdup` and `tokcopy`.
  
@@ -2358,15 +2360,16 @@ Now we have the parser.  If we can shift, we do.  If not and we can reduce
  we do.  If the production we reduced was production zero, then we have
  accepted the input and can finish.
  
+We return whatever `asn` was returned by reducing production zero.
+
  If we can neither shift nor reduce we have an error to handle.  We pop
-single entries off the stack until we can shift the `TK_error` symbol, the
+single entries off the stack until we can shift the `TK_error` symbol, then
  drop input tokens until we find one we can shift into the new error state.
  
-We return whatever `asn` was returned by reducing production zero.
  
  ###### parser includes
         #include "parser.h"
-###### parser
+###### parser_run
         void *parser_run(struct token_state *tokens,
                          const struct state states[],
                          int (*do_reduce)(int, void**, void*),
@@ -2406,7 +2409,7 @@ We return whatever `asn` was returned by reducing production zero.
                                         accepted = 1;
                                 continue;
                         }
-                       /* Error. we walk up the stack until we
+                       /* Error. We walk up the stack until we
                          * find a state which will accept TK_error.
                          * We then shift in TK_error and see what state
                          * that takes us too.
@@ -2596,12 +2599,12 @@ something like this.
                                 exit(1);
                         }
                 }$
-               |  NEWLINE ${ printf("Blank line\n"); }$
+               | NEWLINE ${ printf("Blank line\n"); }$
                 | ERROR NEWLINE ${ printf("Skipped a bad line\n"); }$
  
         $number
-       Expression -> Expression +  Term ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
-               | Expression -  Term ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
+       Expression -> Expression + Term ${ mpq_init($0.val); mpq_add($0.val, $1.val, $3.val); }$
+               | Expression - Term ${ mpq_init($0.val); mpq_sub($0.val, $1.val, $3.val); }$
                 | Term ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
  
         Term -> Term * Factor ${ mpq_init($0.val); mpq_mul($0.val, $1.val, $3.val); }$
@@ -2609,4 +2612,4 @@ something like this.
                 | Factor ${ mpq_init($0.val); mpq_set($0.val, $1.val); }$
  
         Factor -> NUMBER ${ if (number_parse($0.val, $0.tail, $1.txt) == 0) mpq_init($0.val); }$
-               | ( Expression  ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$
+               | ( Expression ) ${ mpq_init($0.val); mpq_set($0.val, $2.val); }$
author	NeilBrown <neilb@suse.de>
	Sun, 24 Nov 2013 05:53:34 +0000 (16:53 +1100)
committer	NeilBrown <neilb@suse.de>
	Sun, 24 Nov 2013 05:53:34 +0000 (16:53 +1100)