Remove excess blank lines

[ocean] / csrc / parsergen.mdc
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 1d9d611e7f4f2c60134e54a72b01239b7db6016e..94c29df1d2c7665f61903d413918d0cbee44e720 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -21,7 +21,6 @@ There are several distinct sections.
     `parsergen` program built from the C code in this file can extract
     that grammar directly from this file and process it.
  
-
  ###### File: parsergen.c
         #include <unistd.h>
         #include <stdlib.h>
@@ -839,7 +838,6 @@ array like the productions.
                 return sl->ss;
         }
  
-
  ### Setting `nullable`
  
  We set `nullable` on the head symbol for any production for which all
@@ -877,7 +875,7 @@ changes happen.
                 }
         }
  
-### Setting `can_eol` and `line_like`
+### Setting `line_like`
  
  In order to be able to ignore newline tokens when not relevant, but
  still include them in the parse when needed, we will need to know
@@ -885,30 +883,25 @@ which states can start a "line-like" section of code.  We ignore
  newlines when there is an indent since the most recent start of a
  line-like symbol.
  
-To know which symbols are line-like, we first need to know which
-symbols start with a NEWLINE token.  Any symbol which is followed by a
-NEWLINE, or anything that starts with a NEWLINE, is deemed to be a line-like symbol.
-Certainly when trying to parse one of these we must take note of NEWLINEs.
+A "line_like" symbol is simply any symbol that can derive a NEWLINE.
+If a symbol cannot derive a NEWLINE, then it is only part of a line -
+so is word-like.  If it can derive a NEWLINE, then we consider it to
+be like a line.
  
-Clearly the `TK_newline` token can start with a NEWLINE.  Any symbol
-which is the head of a production that contains a starts-with-NEWLINE
-symbol preceeded only by nullable symbols is also a
-starts-with-NEWLINE symbol.  We use a new field `can_eol` to record
-this attribute of symbols, and compute it in a repetitive manner
-similar to `set_nullable`.
-
-Once we have that, we can determine which symbols are `line_like` by
-seeing which are followed by a `can_eol` symbol in any production.
+Clearly the `TK_newline` token can derive a NEWLINE.  Any symbol which
+is the head of a production that contains a line_like symbol is also a
+line-like symbol.  We use a new field `line_like` to record this
+attribute of symbols, and compute it in a repetitive manner similar to
+`set_nullable`.
  
  ###### symbol fields
-       int can_eol;
         int line_like;
  
  ###### functions
-       static void set_can_eol(struct grammar *g)
+       static void set_line_like(struct grammar *g)
         {
                 int check_again = 1;
-               g->symtab[TK_newline]->can_eol = 1;
+               g->symtab[TK_newline]->line_like = 1;
                 while (check_again) {
                         int p;
                         check_again = 0;
@@ -916,35 +909,20 @@ seeing which are followed by a `can_eol` symbol in any production.
                                 struct production *pr = g->productions[p];
                                 int s;
  
-                               if (pr->head->can_eol)
+                               if (pr->head->line_like)
                                         continue;
  
                                 for (s = 0 ; s < pr->body_size; s++) {
-                                       if (pr->body[s]->can_eol) {
-                                               pr->head->can_eol = 1;
+                                       if (pr->body[s]->line_like) {
+                                               pr->head->line_like = 1;
                                                 check_again = 1;
                                                 break;
                                         }
-                                       if (!pr->body[s]->nullable)
-                                               break;
                                 }
                         }
                 }
         }
  
-       static void set_line_like(struct grammar *g)
-       {
-               int p;
-               for (p = 0; p < g->production_count; p++) {
-                       struct production *pr = g->productions[p];
-                       int s;
-
-                       for (s = 1; s < pr->body_size; s++)
-                               if (pr->body[s]->can_eol)
-                                       pr->body[s-1]->line_like = 1;
-               }
-       }
-
  ### Building the `first` sets
  
  When calculating what can follow a particular non-terminal, we will need to
@@ -1180,9 +1158,10 @@ need to be consider for completion again.  So  a `completed` flag is needed.
  
  For correct handling of `TK_newline` when parsing, we will need to
  know which states (itemsets) can occur at the start of a line, so we
-will record a `starts_line` flag too.
+will record a `starts_line` flag too whenever DOT is at the start of a
+`line_like` symbol.
  
-Finally, for handling `TK_out` we need to know where production in the
+Finally, for handling `TK_out` we need to know whether productions in the
  current state started *before* the most recent indent.  A state
  doesn't usually keep details of individual productions, so we need to
  add one extra detail. `min_prefix` is the smallest non-zero number of
@@ -1301,7 +1280,7 @@ be supplemented by the LA set for the item which produce the new item.
  
  We also collect a set of all symbols which follow "DOT" (in `done`) as this
  is used in the next stage.
-If any of these symbols are flagged as starting a line, then this
+If any of these symbols are flagged as `line_like`, then this
  state must be a `starts_line` state so now is a good time to record that.
  
  When itemsets are created we assign a precedence to the itemset from
@@ -1532,7 +1511,6 @@ changeover point in `first_nonterm`.
                         g->symtab[s->num] = s;
  
                 set_nullable(g);
-               set_can_eol(g);
                 set_line_like(g);
                 if (type >= SLR)
                         build_first(g);
@@ -1583,9 +1561,8 @@ show if it can end in a newline (`>`), if it is considered to be
                         if (!s)
                                 continue;
  
-                       printf(" %c%c%c%3d%c: ",
+                       printf(" %c%c%3d%c: ",
                                s->nullable ? '.':' ',
-                              s->can_eol ? '>':' ',
                                s->line_like ? '<':' ',
                                s->num, symtypes[s->type]);
                         prtxt(s->name);
@@ -1679,7 +1656,6 @@ The LA sets which are (possibly) reported with each item:
  
  Then the go to sets:
  
-
         static void report_goto(struct grammar *g, struct symset gt)
         {
                 int i;
@@ -1877,7 +1853,6 @@ include newlines as token.
                 return cnt;
         }
  
-
  ## Generating the parser
  
  The exported part of the parser is the `parse_XX` function, where the name
@@ -1980,7 +1955,6 @@ The go to table is stored in a simple array of `sym` and corresponding
                 short min_prefix;
         };
  
-
  ###### functions
  
         static void gen_goto(FILE *f, struct grammar *g)
@@ -3011,7 +2985,7 @@ an error.
                 $(CC) $(CFLAGS) -o calc calc.o libparser.o libscanner.o libmdcode.o libnumber.o -licuuc -lgmp
         calctest : calc
                 ./calc parsergen.mdc
-       tests :: calctest
+       demos :: calctest
  
  # calc: header