X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;f=csrc%2Fscanner-tests.mdc;h=776b2533f98046109af0e8cc8938a33a67379ca5;hb=cb8d9032314bbb51b505a5100b0bc484056771e0;hp=3496f79eb645e1d2550aa8931fb6bdea927726d5;hpb=5107f51f1ba2b12dbffef31403ebfea30688b96b;p=ocean diff --git a/csrc/scanner-tests.mdc b/csrc/scanner-tests.mdc index 3496f79..776b253 100644 --- a/csrc/scanner-tests.mdc +++ b/csrc/scanner-tests.mdc @@ -32,15 +32,17 @@ about each test. echo "PASSED"; \ done + ## other tests + @for i in coverage/#*.gcda; do mv $$i coverage/$${i##*#}; done @gcov -o coverage scanner.c libscanner.c > /dev/null 2> /dev/null @mv *.gcov coverage; [ -f .gcov ] && mv .gcov coverage || true @awk '/NOTEST/ { next } /^ *[1-9]/ {ran+=1} /^ *###/ {skip+=1} \ END {printf "coverage: %6.2f%%\n", ran * 100 / (ran + skip); \ - if (ran < (ran + skip) *0.90) exit(1) }' \ + if (ran < (ran + skip) *0.94) exit(1) }' \ coverage/scanner.mdc.gcov @rm -f .tmp* - coverage_scanner: scanner.c libscanner.c + coverage_scanner: scanner.c libscanner.c libmdcode.o libnumber.o libstring.o $(CC) $(CFLAGS) --coverage -fprofile-dir=coverage -o coverage_scanner \ scanner.c libscanner.c \ libmdcode.o libnumber.o libstring.o -licuuc -lgmp @@ -48,14 +50,15 @@ about each test. ## Basic tests Some simple tests... maybe all tests are simple. +Include a special test for numbers, as they are interesting. ###### test list - scanner_tests += "test1,if,then,+,-" - scanner_tests += "test1,if,then,+,-,/" - scanner_tests += "test1,--ignore-indent,if,then,+,-,/" + scanner_tests += "test1,-r,if,then,+,-" + scanner_tests += "test1,-r,if,then,+,-,/" + scanner_tests += "test1,-r,--ignore-indent,if,then,+,-,/" + scanner_tests += "test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/" scanner_tests += "test1,--ignore-indent,--ignore-newline,if,then,+,-,/" - scanner_tests += "test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/" - scanner_tests += "test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/" + scanner_tests += "test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/" ###### test: test1 @@ -90,7 +93,7 @@ Some simple tests... maybe all tests are simple. lines */ divident /+ divisor -###### output: test1,if,then,+,- +###### output: test1,-r,if,then,+,- Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -144,11 +147,11 @@ Some simple tests... maybe all tests are simple. 18:0 newline() 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 19:0 newline() - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + 19:10 number(0x543p+3) 10776 20:0 newline() @@ -177,7 +180,7 @@ Some simple tests... maybe all tests are simple. 32:0 newline() 32:0 eof() -###### output: test1,if,then,+,-,/ +###### output: test1,-r,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -231,11 +234,11 @@ Some simple tests... maybe all tests are simple. 18:0 newline() 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 19:0 newline() - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + 19:10 number(0x543p+3) 10776 20:0 newline() @@ -265,7 +268,7 @@ Some simple tests... maybe all tests are simple. 32:0 newline() 32:0 eof() -###### output: test1,--ignore-indent,if,then,+,-,/ +###### output: test1,-r,--ignore-indent,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -307,11 +310,11 @@ Some simple tests... maybe all tests are simple. 17:21 newline() 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 18:24 newline() - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + 19:10 number(0x543p+3) 10776 19:18 newline() @@ -341,7 +344,7 @@ Some simple tests... maybe all tests are simple. 31:19 newline() 32:0 eof() -###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/ +###### output: test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -367,10 +370,10 @@ Some simple tests... maybe all tests are simple. 17:16 ident(while) 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + 19:10 number(0x543p+3) 10776 20:0 string("This is a string") This is a string @@ -393,7 +396,7 @@ Some simple tests... maybe all tests are simple. 31:12 ident(divisor) 32:0 eof() -###### output: test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/ +###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -419,10 +422,10 @@ Some simple tests... maybe all tests are simple. 17:16 ident(while) 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + 19:10 number(0x543p+3) 10776 20:0 string("This is a string") This is a string @@ -442,7 +445,7 @@ Some simple tests... maybe all tests are simple. 31:12 ident(divisor) 32:0 eof() -###### output: test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/ +###### output: test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -468,10 +471,10 @@ Some simple tests... maybe all tests are simple. 17:16 ident(while) 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + 19:10 number(0x543p+3) 10776 20:0 mark(") @@ -513,14 +516,99 @@ Some simple tests... maybe all tests are simple. 31:12 ident(divisor) 32:0 eof() +###### test list + scanner_tests += "testnum" + +###### test: testnum + 12345 + 1234.56 + 1234.56e7 + 1234.56e-7 + 0x1234 + 0x123,456 + 0o777 + 0o111.111p4 + 0b11011110p3 + + 123 456 789 + 0x1234_5678_9abc + + "Now for some non-number" + 1234p4 + 12-34 + 01234 + 0c1234 + 123.456e1a + 123.e4 + 0x123 456 + 0b1234 + 123_345_.34 + .75 + +###### output: testnum + Tokenizing: + 2:0 number(12345) 12345 + 3:0 newline() + 3:0 number(1234.56) 30864/25 + 4:0 newline() + 4:0 number(1234.56e7) 12345600000 + 5:0 newline() + 5:0 number(1234.56e-7) 1929/15625000 + 6:0 newline() + 6:0 number(0x1234) 4660 + 7:0 newline() + 7:0 number(0x123,456) 596523/2048 + 8:0 newline() + 8:0 number(0o777) 511 + 9:0 newline() + 9:0 number(0o111.111p4) 37449/32 + 10:0 newline() + 10:0 number(0b11011110p3) 1776 + 12:0 newline() + 12:0 newline() + 12:0 number(123 456 789) 123456789 + 13:0 newline() + 13:0 number(0x1234_5678_9abc) 20015998343868 + 15:0 newline() + 15:0 newline() + 15:0 string("Now for some non-..) Now for some non-n.. + 16:0 newline() + 16:0 number(1234p4) BAD NUMBER + 17:0 newline() + 17:0 number(12) 12 + 17:2 mark(-) + 17:3 number(34) 34 + 18:0 newline() + 18:0 number(01234) BAD NUMBER + 19:0 newline() + 19:0 number(0c1234) BAD NUMBER + 20:0 newline() + 20:0 number(123.456e1a) a 30864/25 + 21:0 newline() + 21:0 number(123.e4) 1230000 + 22:0 newline() + 22:0 number(0x123 456) BAD NUMBER + 23:0 newline() + 23:0 number(0b1234) BAD NUMBER + 24:0 newline() + 24:0 number(123_345) 123345 + 24:7 ident(_) + 24:8 mark(.) + 24:9 number(34) 34 + 25:0 newline() + 25:0 mark(.) + 25:1 number(75) 75 + 26:0 newline() + 26:0 eof() + ## Error tests Now to test for some errors ... though things I thought would be errors sometimes aren't. ###### test list - scanner_tests += "errtest,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-" - scanner_tests += "errtest,--ignore-ident,--ignore-mark,-N,if,then,+,-" + scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-" + scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-" ###### test: errtest @@ -538,7 +626,7 @@ sometimes aren't. " \\ \t \n special chars in strings" -###### output: errtest,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,- +###### output: errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,- Tokenizing: 2:0 ERROR(multiple) @@ -576,7 +664,7 @@ sometimes aren't. 15:0 newline() 15:0 eof() -###### output: errtest,--ignore-ident,--ignore-mark,-N,if,then,+,- +###### output: errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,- Tokenizing: 2:0 ERROR(multiple) 2:9 ERROR(decimal) @@ -628,3 +716,173 @@ sometimes aren't. 14:0 string(" \\\\ \\t \\n specia..) \\ \x09 \x0a special ch.. 15:0 newline() 15:0 eof() + +## Nested tests. + +We need to test various aspects of tokenizing code that is stored +in multiple nodes. For example, comments and multi-line strings mustn't +cross a node boundary. + +For this we tell `scanner` to extract sections directly from this file. +As the file changes, line numbers might change as well, so we need to factor +that out when testing. A simple awk script can normalise the first line number +to one. + +###### other tests + @for T in $(scanner_section_tests); do \ + echo -n "Test $$T ... "; \ + i="$IFS"; IFS=,; set $$T; IFS="$$i"; section="$$1"; shift; \ + ./md2c scanner-tests.mdc "output: $$T" | grep -v '^#' > .tmp.want; \ + ./coverage_scanner --file scanner-tests.mdc --section "test: $$section" \ + $${1+"$$@"} | awk -F: ' BEGIN {OFS=":"} $$1 ~ /^[0-9]/ {if (!first) first = $$1 - 1; \ + $$1 = $$1 - first} { print } '> .tmp.have; \ + if ! cmp -s .tmp.want .tmp.have; then \ + echo "FAILED"; diff -u .tmp.want .tmp.have; exit 1; fi ; \ + echo "PASSED"; \ + done + +###### test list + scanner_section_tests += section1 section_string section_comment + +###### test: section1 + + foreach s in sections: + ## section2 + print done + +###### section2 + + This is another + section + +###### output: section1 + Tokenizing: test: section1 + 1:8 ident(foreach) + 1:16 ident(s) + 1:18 ident(in) + 1:21 ident(sections) + 1:29 mark(:) + 7:16 in() + 7:16 ident(This) + 7:21 ident(is) + 7:24 ident(another) + 8:8 newline() + 8:8 out() + 8:8 in() + 8:8 ident(section) + 3:16 newline() + 3:16 ident(print) + 3:22 ident(done) + 4:0 newline() + 4:0 out() + 4:0 newline() + 4:0 eof() + +###### test: section_string + a = ''' + A sting mustn't cross + ## string B + skip + +###### string B + to a new node + ''' + +###### output: section_string + Tokenizing: test: section_string + 1:8 ident(a) + 1:10 mark(=) + 1:12 ERROR('''\x0a\x09 A sting mus..) + 7:8 in() + 7:8 ident(to) + 7:11 ident(a) + 7:13 ident(new) + 7:17 ident(node) + 8:8 newline() + 8:8 ERROR(''') + 4:8 newline() + 4:8 out() + 4:8 newline() + 4:8 ident(skip) + 5:0 newline() + 5:0 eof() + +###### test: section_comment + /* Mult-line comment must stay within + ## comment B + */ + +###### comment B + a single node, they cannot cross nodes. + +###### output: section_comment + Tokenizing: test: section_comment + 1:8 ERROR(/* Mult-line comme..) + 6:8 ident(a) + 6:10 ident(single) + 6:17 ident(node) + 6:21 mark(,) + 6:23 ident(they) + 6:28 ident(cannot) + 6:35 ident(cross) + 6:41 ident(nodes) + 6:46 mark(.) + 3:8 newline() + 3:8 mark(*/) + 4:0 newline() + 4:0 eof() + +## Ad-hoc test + +These tests test bugs that were found in practice, and so prevent them recuring. + +The "bad_indent" test was written because I was seeing a TK_in before the +"program" instead of TK_newline + +###### test list + scanner_tests += "bad_indent" + +###### test: bad_indent + + const: + foo : number = 45 + bar := "string" + program: + foo := 4 + print foo, bar + +###### output: bad_indent + Tokenizing: + 2:8 in() + 2:8 ident(const) + 2:13 mark(:) + 3:16 in() + 3:16 ident(foo) + 3:20 mark(:) + 3:22 ident(number) + 3:29 mark(=) + 3:31 number(45) 45 + 4:16 newline() + 4:16 ident(bar) + 4:20 mark(:=) + 4:23 string("string") string + 5:8 newline() + 5:8 out() + 5:8 newline() + 5:8 ident(program) + 5:15 mark(:) + 6:16 in() + 6:16 ident(foo) + 6:20 mark(:=) + 6:23 number(4) 4 + 7:16 newline() + 7:16 ident(print) + 7:22 ident(foo) + 7:25 mark(,) + 7:27 ident(bar) + 8:0 newline() + 8:0 out() + 8:0 newline() + 8:0 out() + 8:0 newline() + 8:0 eof()