parsergen - fix newline parsing (again)

author NeilBrown <neil@brown.name>

Sun, 26 May 2019 05:04:43 +0000 (15:04 +1000)

committer NeilBrown <neil@brown.name>

Sun, 26 May 2019 05:04:43 +0000 (15:04 +1000)
author NeilBrown <neil@brown.name>
Sun, 26 May 2019 05:04:43 +0000 (15:04 +1000)
committer NeilBrown <neil@brown.name>
Sun, 26 May 2019 05:04:43 +0000 (15:04 +1000)
diff --git a/csrc/oceani-tests.mdc b/csrc/oceani-tests.mdc

index dfb753df6b900abb4fb5e78df710169b8ab6c879..04b9458dbf422cc4659fcc8596cba089a66043c9 100644 (file)
--- a/csrc/oceani-tests.mdc
+++ b/csrc/oceani-tests.mdc
@@ -115,6 +115,13 @@ calculations on them.
  
                 aconst :: string = "unchanging"
  
  
                 aconst :: string = "unchanging"
  
+               // Check wrapping
+               print
+                 a + b
+                 + (a*2)
+                 + b1
+                 + b
+
  ###### output: valvar
  
         23 12 35 11 276 1.91667 11
  ###### output: valvar
  
         23 12 35 11 276 1.91667 11
@@ -122,6 +129,7 @@ calculations on them.
         23 12 12 -23 -12 12
         False True True False False False
         This is a string  field theory This is a string field theory
         23 12 12 -23 -12 12
         False True True False False False
         This is a string  field theory This is a string field theory
+       81
  
  Next we change the value of variables
  
  
  Next we change the value of variables
  
diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc

index 1d9d611e7f4f2c60134e54a72b01239b7db6016e..78ff5435b8086fcd720674cfcb72c13b3a99d919 100644 (file)
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -877,7 +877,7 @@ changes happen.
                 }
         }
  
                 }
         }
  
-### Setting `can_eol` and `line_like`
+### Setting `line_like`
  
  In order to be able to ignore newline tokens when not relevant, but
  still include them in the parse when needed, we will need to know
  
  In order to be able to ignore newline tokens when not relevant, but
  still include them in the parse when needed, we will need to know
@@ -885,30 +885,26 @@ which states can start a "line-like" section of code.  We ignore
  newlines when there is an indent since the most recent start of a
  line-like symbol.
  
  newlines when there is an indent since the most recent start of a
  line-like symbol.
  
-To know which symbols are line-like, we first need to know which
-symbols start with a NEWLINE token.  Any symbol which is followed by a
-NEWLINE, or anything that starts with a NEWLINE, is deemed to be a line-like symbol.
-Certainly when trying to parse one of these we must take note of NEWLINEs.
+A "line_like" symbol is simply any symbol that can derive a NEWLINE.
+If a symbol cannot derive a NEWLINE, then it is only part of a line -
+so is word-like.  If it can derive a NEWLINE, then we consider it to
+be like a line.
  
  
-Clearly the `TK_newline` token can start with a NEWLINE.  Any symbol
-which is the head of a production that contains a starts-with-NEWLINE
-symbol preceeded only by nullable symbols is also a
-starts-with-NEWLINE symbol.  We use a new field `can_eol` to record
-this attribute of symbols, and compute it in a repetitive manner
-similar to `set_nullable`.
  
  
-Once we have that, we can determine which symbols are `line_like` by
-seeing which are followed by a `can_eol` symbol in any production.
+Clearly the `TK_newline` token can derive a NEWLINE.  Any symbol which
+is the head of a production that contains a line_like symbol is also a
+line-like symbol.  We use a new field `line_like` to record this
+attribute of symbols, and compute it in a repetitive manner similar to
+`set_nullable`.
  
  ###### symbol fields
  
  ###### symbol fields
-       int can_eol;
         int line_like;
  
  ###### functions
         int line_like;
  
  ###### functions
-       static void set_can_eol(struct grammar *g)
+       static void set_line_like(struct grammar *g)
         {
                 int check_again = 1;
         {
                 int check_again = 1;
-               g->symtab[TK_newline]->can_eol = 1;
+               g->symtab[TK_newline]->line_like = 1;
                 while (check_again) {
                         int p;
                         check_again = 0;
                 while (check_again) {
                         int p;
                         check_again = 0;
@@ -916,35 +912,20 @@ seeing which are followed by a `can_eol` symbol in any production.
                                 struct production *pr = g->productions[p];
                                 int s;
  
                                 struct production *pr = g->productions[p];
                                 int s;
  
-                               if (pr->head->can_eol)
+                               if (pr->head->line_like)
                                         continue;
  
                                 for (s = 0 ; s < pr->body_size; s++) {
                                         continue;
  
                                 for (s = 0 ; s < pr->body_size; s++) {
-                                       if (pr->body[s]->can_eol) {
-                                               pr->head->can_eol = 1;
+                                       if (pr->body[s]->line_like) {
+                                               pr->head->line_like = 1;
                                                 check_again = 1;
                                                 break;
                                         }
                                                 check_again = 1;
                                                 break;
                                         }
-                                       if (!pr->body[s]->nullable)
-                                               break;
                                 }
                         }
                 }
         }
  
                                 }
                         }
                 }
         }
  
-       static void set_line_like(struct grammar *g)
-       {
-               int p;
-               for (p = 0; p < g->production_count; p++) {
-                       struct production *pr = g->productions[p];
-                       int s;
-
-                       for (s = 1; s < pr->body_size; s++)
-                               if (pr->body[s]->can_eol)
-                                       pr->body[s-1]->line_like = 1;
-               }
-       }
-
  ### Building the `first` sets
  
  When calculating what can follow a particular non-terminal, we will need to
  ### Building the `first` sets
  
  When calculating what can follow a particular non-terminal, we will need to
@@ -1180,9 +1161,10 @@ need to be consider for completion again.  So  a `completed` flag is needed.
  
  For correct handling of `TK_newline` when parsing, we will need to
  know which states (itemsets) can occur at the start of a line, so we
  
  For correct handling of `TK_newline` when parsing, we will need to
  know which states (itemsets) can occur at the start of a line, so we
-will record a `starts_line` flag too.
+will record a `starts_line` flag too whenever DOT is at the start of a
+`line_like` symbol.
  
  
-Finally, for handling `TK_out` we need to know where production in the
+Finally, for handling `TK_out` we need to know whether productions in the
  current state started *before* the most recent indent.  A state
  doesn't usually keep details of individual productions, so we need to
  add one extra detail. `min_prefix` is the smallest non-zero number of
  current state started *before* the most recent indent.  A state
  doesn't usually keep details of individual productions, so we need to
  add one extra detail. `min_prefix` is the smallest non-zero number of
@@ -1301,7 +1283,7 @@ be supplemented by the LA set for the item which produce the new item.
  
  We also collect a set of all symbols which follow "DOT" (in `done`) as this
  is used in the next stage.
  
  We also collect a set of all symbols which follow "DOT" (in `done`) as this
  is used in the next stage.
-If any of these symbols are flagged as starting a line, then this
+If any of these symbols are flagged as `line_like`, then this
  state must be a `starts_line` state so now is a good time to record that.
  
  When itemsets are created we assign a precedence to the itemset from
  state must be a `starts_line` state so now is a good time to record that.
  
  When itemsets are created we assign a precedence to the itemset from
@@ -1532,7 +1514,6 @@ changeover point in `first_nonterm`.
                         g->symtab[s->num] = s;
  
                 set_nullable(g);
                         g->symtab[s->num] = s;
  
                 set_nullable(g);
-               set_can_eol(g);
                 set_line_like(g);
                 if (type >= SLR)
                         build_first(g);
                 set_line_like(g);
                 if (type >= SLR)
                         build_first(g);
@@ -1583,9 +1564,8 @@ show if it can end in a newline (`>`), if it is considered to be
                         if (!s)
                                 continue;
  
                         if (!s)
                                 continue;
  
-                       printf(" %c%c%c%3d%c: ",
+                       printf(" %c%c%3d%c: ",
                                s->nullable ? '.':' ',
                                s->nullable ? '.':' ',
-                              s->can_eol ? '>':' ',
                                s->line_like ? '<':' ',
                                s->num, symtypes[s->type]);
                         prtxt(s->name);
                                s->line_like ? '<':' ',
                                s->num, symtypes[s->type]);
                         prtxt(s->name);
author	NeilBrown <neil@brown.name>
	Sun, 26 May 2019 05:04:43 +0000 (15:04 +1000)
committer	NeilBrown <neil@brown.name>
	Sun, 26 May 2019 05:04:43 +0000 (15:04 +1000)
csrc/oceani-tests.mdc		patch \| blob \| history
csrc/parsergen.mdc		patch \| blob \| history