From 151e984bb8300e4eb3f7cdf41135f9a3b0261fd5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 8 Jun 2019 14:35:34 +1000 Subject: [PATCH] scanner: fix handling of indents in sub-nodes I seem to have confused ->indent_sizes[] and ->col ->col is used for the reported location of a token so must be the actual column in the file, with no adjustment. ->indent_sizes[] is indents, which must include any inherited from parent nodes. So this is a completely different value. So change mdcode to store the local node indent in ->needs_strip - this is the number of text columns that are stripped off. This, subtracted from ->indent is the text offset of the physical start-of-line. Adding the measured ->col then gives us the indent in the composed file, the indent that must be used for detecting TK_in and TK_out. Introduce a new function state_indent() which determines that indent, and use it instead of ->col. Signed-off-by: NeilBrown --- csrc/mdcode.mdc | 15 ++++++++++++--- csrc/scanner.mdc | 33 ++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/csrc/mdcode.mdc b/csrc/mdcode.mdc index 2789671..cf6887d 100644 --- a/csrc/mdcode.mdc +++ b/csrc/mdcode.mdc @@ -161,8 +161,11 @@ the root. Finally we need to know if the `code_node` was recognised by being indented or not. If it was, the client of this data will want to -strip of the leading tab or 4 spaces. Hence a `needs_strip` flag is -needed. +strip off the leading tab or 4 spaces. Hence a `needs_strip` flag is +needed. This will be set to 8 if a tab is found and 4 if four spaces are found. +This means the relative indent of text in the node +is `node->indent - node->needs_strip`. +The relative indent is needed for detecting indents in the overall file. ##### exported types @@ -287,7 +290,13 @@ in a new node. n->code = txt; n->indent = 0; n->line_no = line_no; - n->needs_strip = needs_strip; + if (needs_strip) { + if (txt.txt[0] == '\t') + n->needs_strip = 8; + else + n->needs_strip = 4; + } else + n->needs_strip = 0; n->next = NULL; n->child = NULL; if (where->last) diff --git a/csrc/scanner.mdc b/csrc/scanner.mdc index fa27a89..21ec828 100644 --- a/csrc/scanner.mdc +++ b/csrc/scanner.mdc @@ -716,6 +716,8 @@ ignored. int indent_level; int indent_sizes[20]; +`indent_sizes[0]` will always be zero - this simplifies some code. + #### Newlines Newlines can optionally be reported. Newlines within a block comment @@ -796,6 +798,14 @@ Separately we need, on each call to `token_next`, to check if there are some delayed tokens and if so we need to advance the state information and return one token. +###### internal functions + static int state_indent(struct token_state *state) + { + if (state->node == NULL) + return state->col; + return state->node->indent - state->node->needs_strip + state->col; + } + ###### white space if (is_newline(ch) || (at_son(state) && ch <= ' ')) { int newlines = 0; @@ -817,11 +827,6 @@ information and return one token. } if (at_eon(state)) { newlines += 1; - if (state->node->next && - state->node->next->indent > state->node->indent) - state->col = state->node->next->indent; - else - state->col = state->node->indent; } else unget_char(state); state->delayed_lines = newlines; @@ -833,7 +838,7 @@ information and return one token. ###### delayed tokens if (state->check_indent || state->delayed_lines) { - if (state->col < state->indent_sizes[state->indent_level]) { + if (state_indent(state) < state->indent_sizes[state->indent_level]) { if (!state->out_next && !(ignored & (1<out_next = 1; @@ -845,10 +850,10 @@ information and return one token. tk.num = TK_out; return tk; } - if (state->col > state->indent_sizes[state->indent_level] && + if (state_indent(state) > state->indent_sizes[state->indent_level] && state->indent_level < sizeof(state->indent_sizes)-1) { state->indent_level += 1; - state->indent_sizes[state->indent_level] = state->col; + state->indent_sizes[state->indent_level] = state_indent(state); state->delayed_lines -= 1; tk.num = TK_in; return tk; @@ -938,7 +943,7 @@ a flag that tells us whether or not we need to strip. ###### internal functions - static int do_strip(struct token_state *state) + static void do_strip(struct token_state *state) { int indent = 0; if (state->node->needs_strip) { @@ -954,7 +959,6 @@ a flag that tells us whether or not we need to strip. n -= 4; } } - return indent; } static wint_t get_char(struct token_state *state) @@ -973,7 +977,8 @@ a flag that tells us whether or not we need to strip. if (state->node == NULL) return WEOF; state->line = state->node->line_no; - state->col = do_strip(state); + do_strip(state); + state->col = state->node->needs_strip; state->strip_offset = state->offset; } @@ -999,7 +1004,8 @@ a flag that tells us whether or not we need to strip. state->col += 1; } else if (is_newline(next)) { state->line += 1; - state->col = do_strip(state); + do_strip(state); + state->col = state->node->needs_strip; } else if (next == '\t') { state->col = indent_tab(state->col); } @@ -1211,7 +1217,8 @@ As well as getting tokens, we need to be able to create the memset(state, 0, sizeof(*state)); state->node = code; state->line = code->line_no; - state->col = do_strip(state); + do_strip(state); + state->col = state->node->needs_strip; state->strip_offset = state->offset; state->conf = conf; return state; -- 2.43.0