From 357e56af7fb7655f8de2a6de134c17f548dfb594 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 2 Oct 2014 20:49:59 +1000 Subject: [PATCH] parsergen: adjust for new definition of line_like symbols. A symbol is line-like if it is followed by a NEWLINE, or any symbol which starts with a NEWLINE. Signed-off-by: NeilBrown --- csrc/parsergen.mdc | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc index 497b3cb..7859a98 100644 --- a/csrc/parsergen.mdc +++ b/csrc/parsergen.mdc @@ -869,29 +869,32 @@ changes happen. } } -### Setting `can_eol` and `starts_line` +### Setting `can_eol` and `line_like` In order to be able to ignore newline tokens when not relevant, but still include them in the parse when needed, we will need to know which states can start a "line-like" section of code. We ignore newlines when there is an indent since the most recent start of a -line-like section. +line-like symbol. -To know what is line-like, we first need to know which symbols can end -a line-like section, which is precisely those which can end with a -newline token. These symbols don't necessarily alway end with a -newline, but they can. Hence they are not described as "lines" but -only "line-like". +To know which symbols are line-like, we first need to know which +symbols start with a NEWLINE token. Any symbol which is followed by a +NEWLINE, or anything that starts with a NEWLINE, is deemed to be a line-like symbol. +Certainly when trying to parse one of these we must take not of NEWLINEs. -Clearly the `TK_newline` token can end with a newline. Any symbol -which is the head of a production that contains a line-ending symbol -followed only by nullable symbols is also a line-ending symbol. We -use a new field `can_eol` to record this attribute of symbols, and -compute it in a repetitive manner similar to `set_nullable`. +Clearly the `TK_newline` token can start with a NEWLINE. Any symbol +which is the head of a production that contains a starts-with-NEWLINE +symbol preceeded only by nullable symbols is also a +starts-with-NEWLINE symbol. We use a new field `can_eol` to record +this attribute of symbols, and compute it in a repetitive manner +similar to `set_nullable`. + +Once we have that, we can determine which symbols are `line_like` be +seeing which are followed by a `can_eol` symbol in any production. ###### symbol fields int can_eol; - int starts_line; + int line_like; ###### functions static void set_can_eol(struct grammar *g) @@ -908,7 +911,7 @@ compute it in a repetitive manner similar to `set_nullable`. if (pr->head->can_eol) continue; - for (s = pr->body_size - 1; s >= 0; s--) { + for (s = 0 ; s < pr->body_size; s++) { if (pr->body[s]->can_eol) { pr->head->can_eol = 1; check_again = 1; @@ -921,16 +924,16 @@ compute it in a repetitive manner similar to `set_nullable`. } } - static void set_starts_line(struct grammar *g) + static void set_line_like(struct grammar *g) { int p; for (p = 0; p < g->production_count; p++) { struct production *pr = g->productions[p]; int s; - for (s = 0; s < pr->body_size - 1; s++) + for (s = 1; s < pr->body_size; s++) if (pr->body[s]->can_eol) - pr->body[s+1]->starts_line = 1; + pr->body[s-1]->line_like = 1; } } @@ -1286,7 +1289,7 @@ though. s = pr->body[bs]; if (symset_find(&done, s->num) < 0) { symset_add(&done, s->num, 0); - if (s->starts_line) + if (s->line_like) is->starts_line = 1; } if (s->type != Nonterminal) @@ -1464,7 +1467,7 @@ changeover point in `first_nonterm`. set_nullable(g); set_can_eol(g); - set_starts_line(g); + set_line_like(g); if (type >= SLR) build_first(g); @@ -1517,7 +1520,7 @@ line (`<`), or if it is nullable (`.`). printf(" %c%c%c%3d%c: ", s->nullable ? '.':' ', s->can_eol ? '>':' ', - s->starts_line ? '<':' ', + s->line_like ? '<':' ', s->num, symtypes[s->type]); prtxt(s->name); if (s->precedence) -- 2.43.0