echo "PASSED"; \
done
+ ## other tests
+
@gcov -o coverage scanner.c libscanner.c > /dev/null 2> /dev/null
@mv *.gcov coverage; [ -f .gcov ] && mv .gcov coverage || true
@awk '/NOTEST/ { next } /^ *[1-9]/ {ran+=1} /^ *###/ {skip+=1} \
END {printf "coverage: %6.2f%%\n", ran * 100 / (ran + skip); \
- if (ran < (ran + skip) *0.85) exit(1) }' \
+ if (ran < (ran + skip) *0.95) exit(1) }' \
coverage/scanner.mdc.gcov
@rm -f .tmp*
- coverage_scanner: scanner.c libscanner.c
+ coverage_scanner: scanner.c libscanner.c libmdcode.o libnumber.o libstring.o
$(CC) $(CFLAGS) --coverage -fprofile-dir=coverage -o coverage_scanner \
scanner.c libscanner.c \
libmdcode.o libnumber.o libstring.o -licuuc -lgmp
-
## Basic tests
Some simple tests... maybe all tests are simple.
+Include a special test for numbers, as they are interesting.
###### test list
- scanner_tests += "test1,if,then,+,-"
- scanner_tests += "test1,if,then,+,-,/"
- scanner_tests += "test1,--ignore-indent,if,then,+,-,/"
+ scanner_tests += "test1,-r,if,then,+,-"
+ scanner_tests += "test1,-r,if,then,+,-,/"
+ scanner_tests += "test1,-r,--ignore-indent,if,then,+,-,/"
+ scanner_tests += "test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/"
scanner_tests += "test1,--ignore-indent,--ignore-newline,if,then,+,-,/"
- scanner_tests += "test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/"
- scanner_tests += "test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/"
+ scanner_tests += "test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/"
###### test: test1
if else then fi while
1234, 1.234 -123.456e45
- 0x1234 + 0x543p3
+ 0x1234 + 0x543p+3
"This is a string" &"so is this"a
a = """
This is a multi-
lines */
divident /+ divisor
-###### output: test1,if,then,+,-
+###### output: test1,-r,if,then,+,-
Tokenizing:
2:0 ident(A)
2:2 ident(B)
18:0 newline()
18:0 number(1234) 1234
18:4 mark(,)
- 18:7 number(1.234 ) 617/500
+ 18:7 number(1.234) 617/500
18:13 -
18:14 number(123.456e45) 123456000000000000000000000000000000000000000000
19:0 newline()
- 19:0 number(0x1234 ) 4660
+ 19:0 number(0x1234) 4660
19:7 +
- 19:10 number(0x543p3) 10776
+ 19:10 number(0x543p+3) 10776
20:0 newline()
20:0 string("This is a string") This is a string
20:19 mark(&)
32:0 newline()
32:0 eof()
-###### output: test1,if,then,+,-,/
+###### output: test1,-r,if,then,+,-,/
Tokenizing:
2:0 ident(A)
2:2 ident(B)
18:0 newline()
18:0 number(1234) 1234
18:4 mark(,)
- 18:7 number(1.234 ) 617/500
+ 18:7 number(1.234) 617/500
18:13 -
18:14 number(123.456e45) 123456000000000000000000000000000000000000000000
19:0 newline()
- 19:0 number(0x1234 ) 4660
+ 19:0 number(0x1234) 4660
19:7 +
- 19:10 number(0x543p3) 10776
+ 19:10 number(0x543p+3) 10776
20:0 newline()
20:0 string("This is a string") This is a string
20:19 mark(&)
32:0 newline()
32:0 eof()
-###### output: test1,--ignore-indent,if,then,+,-,/
+###### output: test1,-r,--ignore-indent,if,then,+,-,/
Tokenizing:
2:0 ident(A)
2:2 ident(B)
17:21 newline()
18:0 number(1234) 1234
18:4 mark(,)
- 18:7 number(1.234 ) 617/500
+ 18:7 number(1.234) 617/500
18:13 -
18:14 number(123.456e45) 123456000000000000000000000000000000000000000000
18:24 newline()
- 19:0 number(0x1234 ) 4660
+ 19:0 number(0x1234) 4660
19:7 +
- 19:10 number(0x543p3) 10776
- 19:17 newline()
+ 19:10 number(0x543p+3) 10776
+ 19:18 newline()
20:0 string("This is a string") This is a string
20:19 mark(&)
20:20 string("so is this"a) a so is this
31:19 newline()
32:0 eof()
-###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/
Tokenizing:
2:0 ident(A)
2:2 ident(B)
17:16 ident(while)
18:0 number(1234) 1234
18:4 mark(,)
- 18:7 number(1.234 ) 617/500
+ 18:7 number(1.234) 617/500
18:13 -
18:14 number(123.456e45) 123456000000000000000000000000000000000000000000
- 19:0 number(0x1234 ) 4660
+ 19:0 number(0x1234) 4660
19:7 +
- 19:10 number(0x543p3) 10776
+ 19:10 number(0x543p+3) 10776
20:0 string("This is a string") This is a string
20:19 mark(&)
20:20 string("so is this"a) a so is this
31:12 ident(divisor)
32:0 eof()
-###### output: test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/
Tokenizing:
2:0 ident(A)
2:2 ident(B)
17:16 ident(while)
18:0 number(1234) 1234
18:4 mark(,)
- 18:7 number(1.234 ) 617/500
+ 18:7 number(1.234) 617/500
18:13 -
18:14 number(123.456e45) 123456000000000000000000000000000000000000000000
- 19:0 number(0x1234 ) 4660
+ 19:0 number(0x1234) 4660
19:7 +
- 19:10 number(0x543p3) 10776
+ 19:10 number(0x543p+3) 10776
20:0 string("This is a string") This is a string
20:19 mark(&)
20:20 string("so is this"a) a so is this
31:12 ident(divisor)
32:0 eof()
-###### output: test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/
Tokenizing:
2:0 ident(A)
2:2 ident(B)
17:16 ident(while)
18:0 number(1234) 1234
18:4 mark(,)
- 18:7 number(1.234 ) 617/500
+ 18:7 number(1.234) 617/500
18:13 -
18:14 number(123.456e45) 123456000000000000000000000000000000000000000000
- 19:0 number(0x1234 ) 4660
+ 19:0 number(0x1234) 4660
19:7 +
- 19:10 number(0x543p3) 10776
+ 19:10 number(0x543p+3) 10776
20:0 mark(")
20:1 ident(This)
20:6 ident(is)
31:10 +
31:12 ident(divisor)
32:0 eof()
+
+###### test list
+ scanner_tests += "testnum"
+
+###### test: testnum
+ 12345
+ 1234.56
+ 1234.56e7
+ 1234.56e-7
+ 0x1234
+ 0x123,456
+ 0o777
+ 0o111.111p4
+ 0b11011110p3
+
+ 123 456 789
+ 0x1234_5678_9abc
+
+ "Now for some non-number"
+ 1234p4
+ 12-34
+ 01234
+ 0c1234
+ 123.456e1a
+ 123.e4
+ 0x123 456
+ 0b1234
+ 123_345_.34
+ .75
+
+###### output: testnum
+ Tokenizing:
+ 2:0 number(12345) 12345
+ 3:0 newline()
+ 3:0 number(1234.56) 30864/25
+ 4:0 newline()
+ 4:0 number(1234.56e7) 12345600000
+ 5:0 newline()
+ 5:0 number(1234.56e-7) 1929/15625000
+ 6:0 newline()
+ 6:0 number(0x1234) 4660
+ 7:0 newline()
+ 7:0 number(0x123,456) 596523/2048
+ 8:0 newline()
+ 8:0 number(0o777) 511
+ 9:0 newline()
+ 9:0 number(0o111.111p4) 37449/32
+ 10:0 newline()
+ 10:0 number(0b11011110p3) 1776
+ 12:0 newline()
+ 12:0 newline()
+ 12:0 number(123 456 789) 123456789
+ 13:0 newline()
+ 13:0 number(0x1234_5678_9abc) 20015998343868
+ 15:0 newline()
+ 15:0 newline()
+ 15:0 string("Now for some non-..) Now for some non-n..
+ 16:0 newline()
+ 16:0 number(1234p4) BAD NUMBER
+ 17:0 newline()
+ 17:0 number(12) 12
+ 17:2 mark(-)
+ 17:3 number(34) 34
+ 18:0 newline()
+ 18:0 number(01234) BAD NUMBER
+ 19:0 newline()
+ 19:0 number(0c1234) BAD NUMBER
+ 20:0 newline()
+ 20:0 number(123.456e1a) a 30864/25
+ 21:0 newline()
+ 21:0 number(123.e4) 1230000
+ 22:0 newline()
+ 22:0 number(0x123 456) BAD NUMBER
+ 23:0 newline()
+ 23:0 number(0b1234) BAD NUMBER
+ 24:0 newline()
+ 24:0 number(123_345) 123345
+ 24:7 ident(_)
+ 24:8 mark(.)
+ 24:9 number(34) 34
+ 25:0 newline()
+ 25:0 mark(.)
+ 25:1 number(75) 75
+ 26:0 newline()
+ 26:0 eof()
+
+## Error tests
+
+Now to test for some errors ... though things I thought would be errors
+sometimes aren't.
+
+###### test list
+ scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-"
+ scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-"
+
+###### test: errtest
+
+ multiple decimal pointer 3.141.59
+ "Check for decimal commas" 3,14159 = 3,141,59
+ """A multi-string must not have text here
+ """
+ "or after close" + """
+ no text ..
+ """ here
+
+ /* No embedded /* comments */ in comments */
+ /* or content after
+ * a multiline comment */ Error
+
+ " \\ \t \n special chars in strings"
+
+###### output: errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-
+
+ Tokenizing:
+ 2:0 ERROR(multiple)
+ 2:9 ERROR(decimal)
+ 2:17 ERROR(pointer)
+ 2:25 number(3.141) 3141/1000
+ 2:30 ERROR(.)
+ 2:31 number(59) 59
+ 3:0 newline()
+ 3:0 string("Check for decimal..) Check for decimal ..
+ 3:27 number(3,14159) 314159/100000
+ 3:35 ERROR(=)
+ 3:37 number(3,141) 3141/1000
+ 3:42 ERROR(,)
+ 3:43 number(59) 59
+ 4:0 newline()
+ 4:0 string("")
+ 4:2 ERROR("A multi-string mu..)
+ 5:0 newline()
+ 5:0 ERROR("""\x0a"or after clos..)
+ 8:12 ERROR(here)
+ 10:0 newline()
+ 10:0 newline()
+ 10:0 ERROR(/* No embedded /*)
+ 10:15 bcomment(/* comments */)
+ 10:30 ERROR(in)
+ 10:33 ERROR(comments)
+ 10:42 ERROR(*/)
+ 11:0 newline()
+ 11:0 ERROR(/* or content afte..)
+ 12:26 ERROR(Error)
+ 14:0 newline()
+ 14:0 newline()
+ 14:0 string(" \\\\ \\t \\n specia..) \\ \x09 \x0a special ch..
+ 15:0 newline()
+ 15:0 eof()
+
+###### output: errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-
+ Tokenizing:
+ 2:0 ERROR(multiple)
+ 2:9 ERROR(decimal)
+ 2:17 ERROR(pointer)
+ 2:25 ERROR(3)
+ 2:26 ERROR(.)
+ 2:27 ERROR(1)
+ 2:28 ERROR(4)
+ 2:29 ERROR(1)
+ 2:30 ERROR(.)
+ 2:31 ERROR(5)
+ 2:32 ERROR(9)
+ 3:0 newline()
+ 3:0 string("Check for decimal..) Check for decimal ..
+ 3:27 ERROR(3)
+ 3:28 ERROR(,)
+ 3:29 ERROR(1)
+ 3:30 ERROR(4)
+ 3:31 ERROR(1)
+ 3:32 ERROR(5)
+ 3:33 ERROR(9)
+ 3:35 ERROR(=)
+ 3:37 ERROR(3)
+ 3:38 ERROR(,)
+ 3:39 ERROR(1)
+ 3:40 ERROR(4)
+ 3:41 ERROR(1)
+ 3:42 ERROR(,)
+ 3:43 ERROR(5)
+ 3:44 ERROR(9)
+ 4:0 newline()
+ 4:0 string("")
+ 4:2 ERROR("A multi-string mu..)
+ 5:0 newline()
+ 5:0 ERROR("""\x0a"or after clos..)
+ 8:12 ERROR(here)
+ 10:0 newline()
+ 10:0 newline()
+ 10:0 ERROR(/* No embedded /*)
+ 10:15 bcomment(/* comments */)
+ 10:30 ERROR(in)
+ 10:33 ERROR(comments)
+ 10:42 ERROR(*/)
+ 11:0 newline()
+ 11:0 ERROR(/* or content afte..)
+ 12:26 ERROR(Error)
+ 14:0 newline()
+ 14:0 newline()
+ 14:0 string(" \\\\ \\t \\n specia..) \\ \x09 \x0a special ch..
+ 15:0 newline()
+ 15:0 eof()
+
+## Nested tests.
+
+We need to test various aspects of tokenizing code that is stored
+in multiple nodes. For example, comments and multi-line strings mustn't
+cross a node boundary.
+
+For this we tell `scanner` to extract sections directly from this file.
+As the file changes, line numbers might change as well, so we need to factor
+that out when testing. A simple awk script can normalise the first line number
+to one.
+
+###### other tests
+ @for T in $(scanner_section_tests); do \
+ echo -n "Test $$T ... "; \
+ i="$IFS"; IFS=,; set $$T; IFS="$$i"; section="$$1"; shift; \
+ ./md2c scanner-tests.mdc "output: $$T" | grep -v '^#' > .tmp.want; \
+ ./coverage_scanner --file scanner-tests.mdc --section "test: $$section" \
+ $${1+"$$@"} | awk -F: ' BEGIN {OFS=":"} $$1 ~ /^[0-9]/ {if (!first) first = $$1 - 1; \
+ $$1 = $$1 - first} { print } '> .tmp.have; \
+ if ! cmp -s .tmp.want .tmp.have; then \
+ echo "FAILED"; diff -u .tmp.want .tmp.have; exit 1; fi ; \
+ echo "PASSED"; \
+ done
+
+###### test list
+ scanner_section_tests += section1 section_string section_comment
+
+###### test: section1
+
+ foreach s in sections:
+ ## section2
+ print done
+
+###### section2
+
+ This is another
+ section
+
+###### output: section1
+ Tokenizing: test: section1
+ 1:8 ident(foreach)
+ 1:16 ident(s)
+ 1:18 ident(in)
+ 1:21 ident(sections)
+ 1:29 mark(:)
+ 7:16 in()
+ 7:16 ident(This)
+ 7:21 ident(is)
+ 7:24 ident(another)
+ 8:8 newline()
+ 8:8 out()
+ 8:8 in()
+ 8:8 ident(section)
+ 3:16 newline()
+ 3:16 ident(print)
+ 3:22 ident(done)
+ 4:0 newline()
+ 4:0 out()
+ 4:0 newline()
+ 4:0 eof()
+
+###### test: section_string
+ a = '''
+ A sting mustn't cross
+ ## string B
+ skip
+
+###### string B
+ to a new node
+ '''
+
+###### output: section_string
+ Tokenizing: test: section_string
+ 1:8 ident(a)
+ 1:10 mark(=)
+ 1:12 ERROR('''\x0a\x09 A sting mus..)
+ 7:8 in()
+ 7:8 ident(to)
+ 7:11 ident(a)
+ 7:13 ident(new)
+ 7:17 ident(node)
+ 8:8 newline()
+ 8:8 ERROR(''')
+ 4:8 newline()
+ 4:8 out()
+ 4:8 newline()
+ 4:8 ident(skip)
+ 5:0 newline()
+ 5:0 eof()
+
+###### test: section_comment
+ /* Mult-line comment must stay within
+ ## comment B
+ */
+
+###### comment B
+ a single node, they cannot cross nodes.
+
+###### output: section_comment
+ Tokenizing: test: section_comment
+ 1:8 ERROR(/* Mult-line comme..)
+ 6:8 ident(a)
+ 6:10 ident(single)
+ 6:17 ident(node)
+ 6:21 mark(,)
+ 6:23 ident(they)
+ 6:28 ident(cannot)
+ 6:35 ident(cross)
+ 6:41 ident(nodes)
+ 6:46 mark(.)
+ 3:8 newline()
+ 3:8 mark(*/)
+ 4:0 newline()
+ 4:0 eof()
+
+## Ad-hoc test
+
+These tests test bugs that were found in practice, and so prevent them recuring.
+
+The "bad_indent" test was written because I was seeing a TK_in before the
+"program" instead of TK_newline
+
+###### test list
+ scanner_tests += "bad_indent"
+
+###### test: bad_indent
+
+ const:
+ foo : number = 45
+ bar := "string"
+ program:
+ foo := 4
+ print foo, bar
+
+###### output: bad_indent
+ Tokenizing:
+ 2:8 in()
+ 2:8 ident(const)
+ 2:13 mark(:)
+ 3:16 in()
+ 3:16 ident(foo)
+ 3:20 mark(:)
+ 3:22 ident(number)
+ 3:29 mark(=)
+ 3:31 number(45) 45
+ 4:16 newline()
+ 4:16 ident(bar)
+ 4:20 mark(:=)
+ 4:23 string("string") string
+ 5:8 newline()
+ 5:8 out()
+ 5:8 newline()
+ 5:8 ident(program)
+ 5:15 mark(:)
+ 6:16 in()
+ 6:16 ident(foo)
+ 6:20 mark(:=)
+ 6:23 number(4) 4
+ 7:16 newline()
+ 7:16 ident(print)
+ 7:22 ident(foo)
+ 7:25 mark(,)
+ 7:27 ident(bar)
+ 8:0 newline()
+ 8:0 out()
+ 8:0 newline()
+ 8:0 out()
+ 8:0 newline()
+ 8:0 eof()