echo "PASSED"; \
done
+ ## other tests
+
@gcov -o coverage scanner.c libscanner.c > /dev/null 2> /dev/null
@mv *.gcov coverage; [ -f .gcov ] && mv .gcov coverage || true
@awk '/NOTEST/ { next } /^ *[1-9]/ {ran+=1} /^ *###/ {skip+=1} \
END {printf "coverage: %6.2f%%\n", ran * 100 / (ran + skip); \
- if (ran < (ran + skip) *0.90) exit(1) }' \
+ if (ran < (ran + skip) *0.92) exit(1) }' \
coverage/scanner.mdc.gcov
@rm -f .tmp*
15:0 newline()
15:0 eof()
+## Nested tests.
+
+We need to test various aspects of tokenizing code that is stored
+in multiple nodes. For example, comments and multi-line strings mustn't
+cross a node boundary.
+
+For this we tell `scanner` to extract sections directly from this file.
+As the file changes, line numbers might change as well, so we need to factor
+that out when testing. A simple awk script can normalise the first line number
+to one.
+
+###### other tests
+ @for T in $(scanner_section_tests); do \
+ echo -n "Test $$T ... "; \
+ i="$IFS"; IFS=,; set $$T; IFS="$$i"; section="$$1"; shift; \
+ ./md2c scanner-tests.mdc "output: $$T" | grep -v '^#' > .tmp.want; \
+ ./coverage_scanner --file scanner-tests.mdc --section "test: $$section" \
+ $${1+"$$@"} | awk -F: ' BEGIN {OFS=":"} $$1 ~ /^[0-9]/ {if (!first) first = $$1 - 1; \
+ $$1 = $$1 - first} { print } '> .tmp.have; \
+ if ! cmp -s .tmp.want .tmp.have; then \
+ echo "FAILED"; diff -u .tmp.want .tmp.have; exit 1; fi ; \
+ echo "PASSED"; \
+ done
+
+###### test list
+ scanner_section_tests += section1
+
+###### test: section1
+
+ foreach s in sections:
+ ## section2
+ print done
+
+###### section2
+
+ This is another
+ section
+
+###### output: section1
+ Tokenizing: test: section1
+ 1:8 ident(foreach)
+ 1:16 ident(s)
+ 1:18 ident(in)
+ 1:21 ident(sections)
+ 1:29 mark(:)
+ 7:16 in()
+ 7:16 ident(This)
+ 7:21 ident(is)
+ 7:24 ident(another)
+ 8:8 newline()
+ 8:8 out()
+ 8:8 in()
+ 8:8 ident(section)
+ 3:16 newline()
+ 3:16 ident(print)
+ 3:22 ident(done)
+ 4:0 newline()
+ 4:0 out()
+ 4:0 newline()
+ 4:0 eof()
+
## Ad-hoc test
These tests test bugs that were found in practice, and so prevent them recuring.
}
###### white space
+ if (is_newline(ch))
+ state_check_node(state);
if (is_newline(ch) || (at_son(state) && ch <= ' ')) {
int newlines = 0;
- int was_son = at_son(state);
+ int was_nl = is_newline(ch);
if (ignored & (1<<TK_in)) {
if (!is_newline(ch))
continue;
return tk;
}
// Indents are needed, so check all white space.
- while (ch <= ' ' && !at_eon(state)) {
+ while (ch <= ' ' && ch != WEOF) {
if (is_newline(ch))
newlines += 1;
ch = get_char(state);
+ if (is_newline(ch))
+ state_check_node(state);
}
- if (at_eon(state)) {
- newlines += 1;
- } else
+ if (ch != WEOF)
unget_char(state);
state->delayed_lines = newlines;
- state->out_next = was_son;
+ state->out_next = !was_nl;
state->check_indent = 1;
continue;
}
}
}
+ static void state_check_node(struct token_state *state)
+ {
+ if (!state->node)
+ return;
+ if (state->node->code.len > state->offset)
+ return;
+
+ do
+ state->node = state->node->next;
+ while (state->node && state->node->code.txt == NULL);
+ state->offset = 0;
+ state->prev_offset = 0;
+ state->strip_offset = 0;
+ state->col = 0;
+ if (state->node == NULL)
+ return;
+ state->line = state->node->line_no;
+ do_strip(state);
+ state->col = state->node->needs_strip;
+ state->strip_offset = state->offset;
+ }
+
static wint_t get_char(struct token_state *state)
{
wchar_t next;
size_t n;
mbstate_t mbstate;
+ state_check_node(state);
if (state->node == NULL)
return WEOF;
- if (state->node->code.len <= state->offset) {
- do
- state->node = state->node->next;
- while (state->node && state->node->code.txt == NULL);
- state->offset = 0;
- if (state->node == NULL)
- return WEOF;
- state->line = state->node->line_no;
- do_strip(state);
- state->col = state->node->needs_strip;
- state->strip_offset = state->offset;
- }
## before get_char