From 151e984bb8300e4eb3f7cdf41135f9a3b0261fd5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Sat, 8 Jun 2019 14:35:34 +1000
Subject: [PATCH] scanner: fix handling of indents in sub-nodes

I seem to have confused ->indent_sizes[] and ->col

->col is used for the reported location of a token so
must be the actual column in the file, with no adjustment.

->indent_sizes[] is indents, which must include any inherited from
parent nodes.  So this is a completely different value.

So change mdcode to store the local node indent in ->needs_strip -
this is the number of text columns that are stripped off.
This, subtracted from ->indent is the text offset of the physical
start-of-line.  Adding the measured ->col then gives us
the indent in the composed file, the indent that must be used
for detecting TK_in and TK_out.

Introduce a new function state_indent() which determines that indent,
and use it instead of ->col.

Signed-off-by: NeilBrown <neil@brown.name>
---
 csrc/mdcode.mdc  | 15 ++++++++++++---
 csrc/scanner.mdc | 33 ++++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/csrc/mdcode.mdc b/csrc/mdcode.mdc
index 2789671..cf6887d 100644
--- a/csrc/mdcode.mdc
+++ b/csrc/mdcode.mdc
@@ -161,8 +161,11 @@ the root.
 
 Finally we need to know if the `code_node` was recognised by being
 indented or not.  If it was, the client of this data will want to
-strip of the leading tab or 4 spaces.  Hence a `needs_strip` flag is
-needed.
+strip off the leading tab or 4 spaces.  Hence a `needs_strip` flag is
+needed.  This will be set to 8 if a tab is found and 4 if four spaces are found.
+This means the relative indent of text in the node
+is `node->indent - node->needs_strip`.
+The relative indent is needed for detecting indents in the overall file.
 
 ##### exported types
 
@@ -287,7 +290,13 @@ in a new node.
 		n->code = txt;
 		n->indent = 0;
 		n->line_no = line_no;
-		n->needs_strip = needs_strip;
+		if (needs_strip) {
+			if (txt.txt[0] == '\t')
+				n->needs_strip = 8;
+			else
+				n->needs_strip = 4;
+		} else
+			n->needs_strip = 0;
 		n->next = NULL;
 		n->child = NULL;
 		if (where->last)
diff --git a/csrc/scanner.mdc b/csrc/scanner.mdc
index fa27a89..21ec828 100644
--- a/csrc/scanner.mdc
+++ b/csrc/scanner.mdc
@@ -716,6 +716,8 @@ ignored.
 	int	indent_level;
 	int	indent_sizes[20];
 
+`indent_sizes[0]` will always be zero - this simplifies some code.
+
 #### Newlines
 
 Newlines can optionally be reported.  Newlines within a block comment
@@ -796,6 +798,14 @@ Separately we need, on each call to `token_next`, to check if
 there are some delayed tokens and if so we need to advance the state
 information and return one token.
 
+###### internal functions
+	static int state_indent(struct token_state *state)
+	{
+		if (state->node == NULL)
+			return state->col;
+		return state->node->indent - state->node->needs_strip + state->col;
+	}
+
 ###### white space
 	if (is_newline(ch) || (at_son(state) && ch <= ' ')) {
 		int newlines = 0;
@@ -817,11 +827,6 @@ information and return one token.
 		}
 		if (at_eon(state)) {
 			newlines += 1;
-			if (state->node->next &&
-			    state->node->next->indent > state->node->indent)
-				state->col = state->node->next->indent;
-			else
-				state->col = state->node->indent;
 		} else
 			unget_char(state);
 		state->delayed_lines = newlines;
@@ -833,7 +838,7 @@ information and return one token.
 ###### delayed tokens
 
 	if (state->check_indent || state->delayed_lines) {
-		if (state->col < state->indent_sizes[state->indent_level]) {
+		if (state_indent(state) < state->indent_sizes[state->indent_level]) {
 			if (!state->out_next &&
 			    !(ignored & (1<<TK_newline))) {
 				state->out_next = 1;
@@ -845,10 +850,10 @@ information and return one token.
 			tk.num = TK_out;
 			return tk;
 		}
-		if (state->col > state->indent_sizes[state->indent_level] &&
+		if (state_indent(state) > state->indent_sizes[state->indent_level] &&
 		    state->indent_level < sizeof(state->indent_sizes)-1) {
 			state->indent_level += 1;
-			state->indent_sizes[state->indent_level] = state->col;
+			state->indent_sizes[state->indent_level] = state_indent(state);
 			state->delayed_lines -= 1;
 			tk.num = TK_in;
 			return tk;
@@ -938,7 +943,7 @@ a flag that tells us whether or not we need to strip.
 
 ###### internal functions
 
-	static int do_strip(struct token_state *state)
+	static void do_strip(struct token_state *state)
 	{
 		int indent = 0;
 		if (state->node->needs_strip) {
@@ -954,7 +959,6 @@ a flag that tells us whether or not we need to strip.
 				n -= 4;
 			}
 		}
-		return indent;
 	}
 
 	static wint_t get_char(struct token_state *state)
@@ -973,7 +977,8 @@ a flag that tells us whether or not we need to strip.
 			if (state->node == NULL)
 				return WEOF;
 			state->line = state->node->line_no;
-			state->col = do_strip(state);
+			do_strip(state);
+			state->col = state->node->needs_strip;
 			state->strip_offset = state->offset;
 		}
 
@@ -999,7 +1004,8 @@ a flag that tells us whether or not we need to strip.
 			state->col += 1;
 		} else if (is_newline(next)) {
 			state->line += 1;
-			state->col = do_strip(state);
+			do_strip(state);
+			state->col = state->node->needs_strip;
 		} else if (next == '\t') {
 			state->col = indent_tab(state->col);
 		}
@@ -1211,7 +1217,8 @@ As well as getting tokens, we need to be able to create the
 		memset(state, 0, sizeof(*state));
 		state->node = code;
 		state->line = code->line_no;
-		state->col = do_strip(state);
+		do_strip(state);
+		state->col = state->node->needs_strip;
 		state->strip_offset = state->offset;
 		state->conf = conf;
 		return state;
-- 
2.43.0