X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;f=csrc%2Fscanner-tests.mdc;h=776b2533f98046109af0e8cc8938a33a67379ca5;hb=cb8d9032314bbb51b505a5100b0bc484056771e0;hp=3496f79eb645e1d2550aa8931fb6bdea927726d5;hpb=5107f51f1ba2b12dbffef31403ebfea30688b96b;p=ocean

diff --git a/csrc/scanner-tests.mdc b/csrc/scanner-tests.mdc
index 3496f79..776b253 100644
--- a/csrc/scanner-tests.mdc
+++ b/csrc/scanner-tests.mdc
@@ -32,15 +32,17 @@ about each test.
 		    echo "PASSED"; \
 		done
 
+		## other tests
+		@for i in coverage/#*.gcda; do mv $$i coverage/$${i##*#}; done
 		@gcov -o coverage scanner.c libscanner.c > /dev/null 2> /dev/null
 		@mv *.gcov coverage; [ -f .gcov ] && mv .gcov coverage || true
 		@awk '/NOTEST/ { next } /^ *[1-9]/ {ran+=1} /^ *###/ {skip+=1} \
 		    END {printf "coverage: %6.2f%%\n", ran * 100 / (ran + skip); \
-		         if (ran < (ran + skip) *0.90) exit(1) }' \
+		         if (ran < (ran + skip) *0.94) exit(1) }' \
 		        coverage/scanner.mdc.gcov
 		@rm -f .tmp*
 
-	coverage_scanner: scanner.c libscanner.c
+	coverage_scanner: scanner.c libscanner.c libmdcode.o libnumber.o libstring.o
 		$(CC) $(CFLAGS) --coverage -fprofile-dir=coverage -o coverage_scanner \
 			scanner.c libscanner.c \
 			libmdcode.o libnumber.o libstring.o -licuuc -lgmp
@@ -48,14 +50,15 @@ about each test.
 ## Basic tests
 
 Some simple tests... maybe all tests are simple.
+Include a special test for numbers, as they are interesting.
 
 ###### test list
-	scanner_tests += "test1,if,then,+,-"
-	scanner_tests += "test1,if,then,+,-,/"
-	scanner_tests += "test1,--ignore-indent,if,then,+,-,/"
+	scanner_tests += "test1,-r,if,then,+,-"
+	scanner_tests += "test1,-r,if,then,+,-,/"
+	scanner_tests += "test1,-r,--ignore-indent,if,then,+,-,/"
+	scanner_tests += "test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/"
 	scanner_tests += "test1,--ignore-indent,--ignore-newline,if,then,+,-,/"
-	scanner_tests += "test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/"
-	scanner_tests += "test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/"
+	scanner_tests += "test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/"
 
 ###### test: test1
 
@@ -90,7 +93,7 @@ Some simple tests... maybe all tests are simple.
 	lines */
 	divident /+ divisor
 
-###### output: test1,if,then,+,-
+###### output: test1,-r,if,then,+,-
 	Tokenizing: 
 	2:0 ident(A)
 	2:2 ident(B)
@@ -144,11 +147,11 @@ Some simple tests... maybe all tests are simple.
 	18:0 newline()
 	18:0 number(1234)  1234
 	18:4 mark(,)
-	18:7 number(1.234 )  617/500
+	18:7 number(1.234)  617/500
 	18:13 -
 	18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
 	19:0 newline()
-	19:0 number(0x1234 )  4660
+	19:0 number(0x1234)  4660
 	19:7 +
 	19:10 number(0x543p+3)  10776
 	20:0 newline()
@@ -177,7 +180,7 @@ Some simple tests... maybe all tests are simple.
 	32:0 newline()
 	32:0 eof()
 
-###### output: test1,if,then,+,-,/
+###### output: test1,-r,if,then,+,-,/
 	Tokenizing: 
 	2:0 ident(A)
 	2:2 ident(B)
@@ -231,11 +234,11 @@ Some simple tests... maybe all tests are simple.
 	18:0 newline()
 	18:0 number(1234)  1234
 	18:4 mark(,)
-	18:7 number(1.234 )  617/500
+	18:7 number(1.234)  617/500
 	18:13 -
 	18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
 	19:0 newline()
-	19:0 number(0x1234 )  4660
+	19:0 number(0x1234)  4660
 	19:7 +
 	19:10 number(0x543p+3)  10776
 	20:0 newline()
@@ -265,7 +268,7 @@ Some simple tests... maybe all tests are simple.
 	32:0 newline()
 	32:0 eof()
 
-###### output: test1,--ignore-indent,if,then,+,-,/
+###### output: test1,-r,--ignore-indent,if,then,+,-,/
 	Tokenizing: 
 	2:0 ident(A)
 	2:2 ident(B)
@@ -307,11 +310,11 @@ Some simple tests... maybe all tests are simple.
 	17:21 newline()
 	18:0 number(1234)  1234
 	18:4 mark(,)
-	18:7 number(1.234 )  617/500
+	18:7 number(1.234)  617/500
 	18:13 -
 	18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
 	18:24 newline()
-	19:0 number(0x1234 )  4660
+	19:0 number(0x1234)  4660
 	19:7 +
 	19:10 number(0x543p+3)  10776
 	19:18 newline()
@@ -341,7 +344,7 @@ Some simple tests... maybe all tests are simple.
 	31:19 newline()
 	32:0 eof()
 
-###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/
 	Tokenizing: 
 	2:0 ident(A)
 	2:2 ident(B)
@@ -367,10 +370,10 @@ Some simple tests... maybe all tests are simple.
 	17:16 ident(while)
 	18:0 number(1234)  1234
 	18:4 mark(,)
-	18:7 number(1.234 )  617/500
+	18:7 number(1.234)  617/500
 	18:13 -
 	18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
-	19:0 number(0x1234 )  4660
+	19:0 number(0x1234)  4660
 	19:7 +
 	19:10 number(0x543p+3)  10776
 	20:0 string("This is a string")  This is a string
@@ -393,7 +396,7 @@ Some simple tests... maybe all tests are simple.
 	31:12 ident(divisor)
 	32:0 eof()
 
-###### output: test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/
 	Tokenizing: 
 	2:0 ident(A)
 	2:2 ident(B)
@@ -419,10 +422,10 @@ Some simple tests... maybe all tests are simple.
 	17:16 ident(while)
 	18:0 number(1234)  1234
 	18:4 mark(,)
-	18:7 number(1.234 )  617/500
+	18:7 number(1.234)  617/500
 	18:13 -
 	18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
-	19:0 number(0x1234 )  4660
+	19:0 number(0x1234)  4660
 	19:7 +
 	19:10 number(0x543p+3)  10776
 	20:0 string("This is a string")  This is a string
@@ -442,7 +445,7 @@ Some simple tests... maybe all tests are simple.
 	31:12 ident(divisor)
 	32:0 eof()
 
-###### output: test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/
 	Tokenizing: 
 	2:0 ident(A)
 	2:2 ident(B)
@@ -468,10 +471,10 @@ Some simple tests... maybe all tests are simple.
 	17:16 ident(while)
 	18:0 number(1234)  1234
 	18:4 mark(,)
-	18:7 number(1.234 )  617/500
+	18:7 number(1.234)  617/500
 	18:13 -
 	18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
-	19:0 number(0x1234 )  4660
+	19:0 number(0x1234)  4660
 	19:7 +
 	19:10 number(0x543p+3)  10776
 	20:0 mark(")
@@ -513,14 +516,99 @@ Some simple tests... maybe all tests are simple.
 	31:12 ident(divisor)
 	32:0 eof()
 
+###### test list
+	scanner_tests += "testnum"
+
+###### test: testnum
+	12345
+	1234.56
+	1234.56e7
+	1234.56e-7
+	0x1234
+	0x123,456
+	0o777
+	0o111.111p4
+	0b11011110p3
+
+	123 456 789
+	0x1234_5678_9abc
+
+	"Now for some non-number"
+	1234p4
+	12-34
+	01234
+	0c1234
+	123.456e1a
+	123.e4
+	0x123 456
+	0b1234
+	123_345_.34
+	.75
+
+###### output: testnum
+	Tokenizing: 
+	2:0 number(12345)  12345
+	3:0 newline()
+	3:0 number(1234.56)  30864/25
+	4:0 newline()
+	4:0 number(1234.56e7)  12345600000
+	5:0 newline()
+	5:0 number(1234.56e-7)  1929/15625000
+	6:0 newline()
+	6:0 number(0x1234)  4660
+	7:0 newline()
+	7:0 number(0x123,456)  596523/2048
+	8:0 newline()
+	8:0 number(0o777)  511
+	9:0 newline()
+	9:0 number(0o111.111p4)  37449/32
+	10:0 newline()
+	10:0 number(0b11011110p3)  1776
+	12:0 newline()
+	12:0 newline()
+	12:0 number(123 456 789)  123456789
+	13:0 newline()
+	13:0 number(0x1234_5678_9abc)  20015998343868
+	15:0 newline()
+	15:0 newline()
+	15:0 string("Now for some non-..)  Now for some non-n..
+	16:0 newline()
+	16:0 number(1234p4) BAD NUMBER
+	17:0 newline()
+	17:0 number(12)  12
+	17:2 mark(-)
+	17:3 number(34)  34
+	18:0 newline()
+	18:0 number(01234) BAD NUMBER
+	19:0 newline()
+	19:0 number(0c1234) BAD NUMBER
+	20:0 newline()
+	20:0 number(123.456e1a) a 30864/25
+	21:0 newline()
+	21:0 number(123.e4)  1230000
+	22:0 newline()
+	22:0 number(0x123 456) BAD NUMBER
+	23:0 newline()
+	23:0 number(0b1234) BAD NUMBER
+	24:0 newline()
+	24:0 number(123_345)  123345
+	24:7 ident(_)
+	24:8 mark(.)
+	24:9 number(34)  34
+	25:0 newline()
+	25:0 mark(.)
+	25:1 number(75)  75
+	26:0 newline()
+	26:0 eof()
+
 ## Error tests
 
 Now to test for some errors ... though things I thought would be errors
 sometimes aren't.
 
 ###### test list
-	scanner_tests += "errtest,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-"
-	scanner_tests += "errtest,--ignore-ident,--ignore-mark,-N,if,then,+,-"
+	scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-"
+	scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-"
 
 ###### test: errtest
 
@@ -538,7 +626,7 @@ sometimes aren't.
 
 	"  \\ \t \n special chars in strings"
 
-###### output: errtest,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-
+###### output: errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-
 
 	Tokenizing: 
 	2:0 ERROR(multiple)
@@ -576,7 +664,7 @@ sometimes aren't.
 	15:0 newline()
 	15:0 eof()
 
-###### output: errtest,--ignore-ident,--ignore-mark,-N,if,then,+,-
+###### output: errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-
 	Tokenizing: 
 	2:0 ERROR(multiple)
 	2:9 ERROR(decimal)
@@ -628,3 +716,173 @@ sometimes aren't.
 	14:0 string("  \\\\ \\t \\n specia..)    \\ \x09 \x0a special ch..
 	15:0 newline()
 	15:0 eof()
+
+## Nested tests.
+
+We need to test various aspects of tokenizing code that is stored
+in multiple nodes.  For example, comments and multi-line strings mustn't
+cross a node boundary.
+
+For this we tell `scanner` to extract sections directly from this file.
+As the file changes, line numbers might change as well, so we need to factor
+that out when testing.  A simple awk script can normalise the first line number
+to one.
+
+###### other tests
+	@for T in $(scanner_section_tests); do \
+	   echo -n "Test $$T ... "; \
+	   i="$IFS"; IFS=,; set $$T; IFS="$$i"; section="$$1"; shift; \
+	    ./md2c scanner-tests.mdc "output: $$T" | grep -v '^#' > .tmp.want; \
+	   ./coverage_scanner --file scanner-tests.mdc --section "test: $$section" \
+	     $${1+"$$@"} | awk -F: ' BEGIN {OFS=":"} $$1 ~ /^[0-9]/ {if (!first) first = $$1 - 1; \
+	          $$1 = $$1 - first} { print } '> .tmp.have; \
+	    if ! cmp -s .tmp.want .tmp.have; then \
+	        echo "FAILED"; diff -u .tmp.want .tmp.have; exit 1; fi ; \
+	    echo "PASSED"; \
+	done
+
+###### test list
+	scanner_section_tests += section1 section_string section_comment
+
+###### test: section1
+
+	foreach s in sections:
+		## section2
+		print done
+
+###### section2
+
+		This is another
+	section
+
+###### output: section1
+	Tokenizing: test: section1
+	1:8 ident(foreach)
+	1:16 ident(s)
+	1:18 ident(in)
+	1:21 ident(sections)
+	1:29 mark(:)
+	7:16 in()
+	7:16 ident(This)
+	7:21 ident(is)
+	7:24 ident(another)
+	8:8 newline()
+	8:8 out()
+	8:8 in()
+	8:8 ident(section)
+	3:16 newline()
+	3:16 ident(print)
+	3:22 ident(done)
+	4:0 newline()
+	4:0 out()
+	4:0 newline()
+	4:0 eof()
+
+###### test: section_string
+	a = '''
+	  A sting mustn't cross
+	  ## string B
+	skip
+
+###### string B
+	to a new node
+	'''
+
+###### output: section_string
+	Tokenizing: test: section_string
+	1:8 ident(a)
+	1:10 mark(=)
+	1:12 ERROR('''\x0a\x09  A sting mus..)
+	7:8 in()
+	7:8 ident(to)
+	7:11 ident(a)
+	7:13 ident(new)
+	7:17 ident(node)
+	8:8 newline()
+	8:8 ERROR(''')
+	4:8 newline()
+	4:8 out()
+	4:8 newline()
+	4:8 ident(skip)
+	5:0 newline()
+	5:0 eof()
+
+###### test: section_comment
+	/* Mult-line comment must stay within
+	## comment B
+	*/
+
+###### comment B
+	a single node, they cannot cross nodes.
+
+###### output: section_comment
+	Tokenizing: test: section_comment
+	1:8 ERROR(/* Mult-line comme..)
+	6:8 ident(a)
+	6:10 ident(single)
+	6:17 ident(node)
+	6:21 mark(,)
+	6:23 ident(they)
+	6:28 ident(cannot)
+	6:35 ident(cross)
+	6:41 ident(nodes)
+	6:46 mark(.)
+	3:8 newline()
+	3:8 mark(*/)
+	4:0 newline()
+	4:0 eof()
+
+## Ad-hoc test
+
+These tests test bugs that were found in practice, and so prevent them recuring.
+
+The "bad_indent" test was written because I was seeing a TK_in before the
+"program" instead of TK_newline
+
+###### test list
+	scanner_tests += "bad_indent"
+
+###### test: bad_indent
+
+		const:
+			foo : number = 45
+			bar := "string"
+		program:
+			foo := 4
+			print foo, bar
+
+###### output: bad_indent
+	Tokenizing: 
+	2:8 in()
+	2:8 ident(const)
+	2:13 mark(:)
+	3:16 in()
+	3:16 ident(foo)
+	3:20 mark(:)
+	3:22 ident(number)
+	3:29 mark(=)
+	3:31 number(45)  45
+	4:16 newline()
+	4:16 ident(bar)
+	4:20 mark(:=)
+	4:23 string("string")  string
+	5:8 newline()
+	5:8 out()
+	5:8 newline()
+	5:8 ident(program)
+	5:15 mark(:)
+	6:16 in()
+	6:16 ident(foo)
+	6:20 mark(:=)
+	6:23 number(4)  4
+	7:16 newline()
+	7:16 ident(print)
+	7:22 ident(foo)
+	7:25 mark(,)
+	7:27 ident(bar)
+	8:0 newline()
+	8:0 out()
+	8:0 newline()
+	8:0 out()
+	8:0 newline()
+	8:0 eof()