X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;f=csrc%2Fscanner-tests.mdc;h=776b2533f98046109af0e8cc8938a33a67379ca5;hb=88d54b5afa776ae4cc7100dc30238538e509a442;hp=36462081c98da9a6dd731686f1535bb7cbdddaf8;hpb=4175113ddadf8ce701903fd784b5b5420d3f93f5;p=ocean diff --git a/csrc/scanner-tests.mdc b/csrc/scanner-tests.mdc index 3646208..776b253 100644 --- a/csrc/scanner-tests.mdc +++ b/csrc/scanner-tests.mdc @@ -32,31 +32,33 @@ about each test. echo "PASSED"; \ done + ## other tests + @for i in coverage/#*.gcda; do mv $$i coverage/$${i##*#}; done @gcov -o coverage scanner.c libscanner.c > /dev/null 2> /dev/null @mv *.gcov coverage; [ -f .gcov ] && mv .gcov coverage || true @awk '/NOTEST/ { next } /^ *[1-9]/ {ran+=1} /^ *###/ {skip+=1} \ END {printf "coverage: %6.2f%%\n", ran * 100 / (ran + skip); \ - if (ran < (ran + skip) *0.85) exit(1) }' \ + if (ran < (ran + skip) *0.94) exit(1) }' \ coverage/scanner.mdc.gcov @rm -f .tmp* - coverage_scanner: scanner.c libscanner.c + coverage_scanner: scanner.c libscanner.c libmdcode.o libnumber.o libstring.o $(CC) $(CFLAGS) --coverage -fprofile-dir=coverage -o coverage_scanner \ scanner.c libscanner.c \ libmdcode.o libnumber.o libstring.o -licuuc -lgmp - ## Basic tests Some simple tests... maybe all tests are simple. +Include a special test for numbers, as they are interesting. ###### test list - scanner_tests += "test1,if,then,+,-" - scanner_tests += "test1,if,then,+,-,/" - scanner_tests += "test1,--ignore-indent,if,then,+,-,/" + scanner_tests += "test1,-r,if,then,+,-" + scanner_tests += "test1,-r,if,then,+,-,/" + scanner_tests += "test1,-r,--ignore-indent,if,then,+,-,/" + scanner_tests += "test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/" scanner_tests += "test1,--ignore-indent,--ignore-newline,if,then,+,-,/" - scanner_tests += "test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/" - scanner_tests += "test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/" + scanner_tests += "test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/" ###### test: test1 @@ -77,7 +79,7 @@ Some simple tests... maybe all tests are simple. if else then fi while 1234, 1.234 -123.456e45 - 0x1234 + 0x543p3 + 0x1234 + 0x543p+3 "This is a string" &"so is this"a a = """ This is a multi- @@ -91,7 +93,7 @@ Some simple tests... maybe all tests are simple. lines */ divident /+ divisor -###### output: test1,if,then,+,- +###### output: test1,-r,if,then,+,- Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -145,13 +147,13 @@ Some simple tests... maybe all tests are simple. 18:0 newline() 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 19:0 newline() - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + - 19:10 number(0x543p3) 10776 + 19:10 number(0x543p+3) 10776 20:0 newline() 20:0 string("This is a string") This is a string 20:19 mark(&) @@ -178,7 +180,7 @@ Some simple tests... maybe all tests are simple. 32:0 newline() 32:0 eof() -###### output: test1,if,then,+,-,/ +###### output: test1,-r,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -232,13 +234,13 @@ Some simple tests... maybe all tests are simple. 18:0 newline() 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 19:0 newline() - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + - 19:10 number(0x543p3) 10776 + 19:10 number(0x543p+3) 10776 20:0 newline() 20:0 string("This is a string") This is a string 20:19 mark(&) @@ -266,7 +268,7 @@ Some simple tests... maybe all tests are simple. 32:0 newline() 32:0 eof() -###### output: test1,--ignore-indent,if,then,+,-,/ +###### output: test1,-r,--ignore-indent,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -308,14 +310,14 @@ Some simple tests... maybe all tests are simple. 17:21 newline() 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 18:24 newline() - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + - 19:10 number(0x543p3) 10776 - 19:17 newline() + 19:10 number(0x543p+3) 10776 + 19:18 newline() 20:0 string("This is a string") This is a string 20:19 mark(&) 20:20 string("so is this"a) a so is this @@ -342,7 +344,7 @@ Some simple tests... maybe all tests are simple. 31:19 newline() 32:0 eof() -###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/ +###### output: test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -368,12 +370,12 @@ Some simple tests... maybe all tests are simple. 17:16 ident(while) 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + - 19:10 number(0x543p3) 10776 + 19:10 number(0x543p+3) 10776 20:0 string("This is a string") This is a string 20:19 mark(&) 20:20 string("so is this"a) a so is this @@ -394,7 +396,7 @@ Some simple tests... maybe all tests are simple. 31:12 ident(divisor) 32:0 eof() -###### output: test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/ +###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -420,12 +422,12 @@ Some simple tests... maybe all tests are simple. 17:16 ident(while) 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + - 19:10 number(0x543p3) 10776 + 19:10 number(0x543p+3) 10776 20:0 string("This is a string") This is a string 20:19 mark(&) 20:20 string("so is this"a) a so is this @@ -443,7 +445,7 @@ Some simple tests... maybe all tests are simple. 31:12 ident(divisor) 32:0 eof() -###### output: test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/ +###### output: test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/ Tokenizing: 2:0 ident(A) 2:2 ident(B) @@ -469,12 +471,12 @@ Some simple tests... maybe all tests are simple. 17:16 ident(while) 18:0 number(1234) 1234 18:4 mark(,) - 18:7 number(1.234 ) 617/500 + 18:7 number(1.234) 617/500 18:13 - 18:14 number(123.456e45) 123456000000000000000000000000000000000000000000 - 19:0 number(0x1234 ) 4660 + 19:0 number(0x1234) 4660 19:7 + - 19:10 number(0x543p3) 10776 + 19:10 number(0x543p+3) 10776 20:0 mark(") 20:1 ident(This) 20:6 ident(is) @@ -513,3 +515,374 @@ Some simple tests... maybe all tests are simple. 31:10 + 31:12 ident(divisor) 32:0 eof() + +###### test list + scanner_tests += "testnum" + +###### test: testnum + 12345 + 1234.56 + 1234.56e7 + 1234.56e-7 + 0x1234 + 0x123,456 + 0o777 + 0o111.111p4 + 0b11011110p3 + + 123 456 789 + 0x1234_5678_9abc + + "Now for some non-number" + 1234p4 + 12-34 + 01234 + 0c1234 + 123.456e1a + 123.e4 + 0x123 456 + 0b1234 + 123_345_.34 + .75 + +###### output: testnum + Tokenizing: + 2:0 number(12345) 12345 + 3:0 newline() + 3:0 number(1234.56) 30864/25 + 4:0 newline() + 4:0 number(1234.56e7) 12345600000 + 5:0 newline() + 5:0 number(1234.56e-7) 1929/15625000 + 6:0 newline() + 6:0 number(0x1234) 4660 + 7:0 newline() + 7:0 number(0x123,456) 596523/2048 + 8:0 newline() + 8:0 number(0o777) 511 + 9:0 newline() + 9:0 number(0o111.111p4) 37449/32 + 10:0 newline() + 10:0 number(0b11011110p3) 1776 + 12:0 newline() + 12:0 newline() + 12:0 number(123 456 789) 123456789 + 13:0 newline() + 13:0 number(0x1234_5678_9abc) 20015998343868 + 15:0 newline() + 15:0 newline() + 15:0 string("Now for some non-..) Now for some non-n.. + 16:0 newline() + 16:0 number(1234p4) BAD NUMBER + 17:0 newline() + 17:0 number(12) 12 + 17:2 mark(-) + 17:3 number(34) 34 + 18:0 newline() + 18:0 number(01234) BAD NUMBER + 19:0 newline() + 19:0 number(0c1234) BAD NUMBER + 20:0 newline() + 20:0 number(123.456e1a) a 30864/25 + 21:0 newline() + 21:0 number(123.e4) 1230000 + 22:0 newline() + 22:0 number(0x123 456) BAD NUMBER + 23:0 newline() + 23:0 number(0b1234) BAD NUMBER + 24:0 newline() + 24:0 number(123_345) 123345 + 24:7 ident(_) + 24:8 mark(.) + 24:9 number(34) 34 + 25:0 newline() + 25:0 mark(.) + 25:1 number(75) 75 + 26:0 newline() + 26:0 eof() + +## Error tests + +Now to test for some errors ... though things I thought would be errors +sometimes aren't. + +###### test list + scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-" + scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-" + +###### test: errtest + + multiple decimal pointer 3.141.59 + "Check for decimal commas" 3,14159 = 3,141,59 + """A multi-string must not have text here + """ + "or after close" + """ + no text .. + """ here + + /* No embedded /* comments */ in comments */ + /* or content after + * a multiline comment */ Error + + " \\ \t \n special chars in strings" + +###### output: errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,- + + Tokenizing: + 2:0 ERROR(multiple) + 2:9 ERROR(decimal) + 2:17 ERROR(pointer) + 2:25 number(3.141) 3141/1000 + 2:30 ERROR(.) + 2:31 number(59) 59 + 3:0 newline() + 3:0 string("Check for decimal..) Check for decimal .. + 3:27 number(3,14159) 314159/100000 + 3:35 ERROR(=) + 3:37 number(3,141) 3141/1000 + 3:42 ERROR(,) + 3:43 number(59) 59 + 4:0 newline() + 4:0 string("") + 4:2 ERROR("A multi-string mu..) + 5:0 newline() + 5:0 ERROR("""\x0a"or after clos..) + 8:12 ERROR(here) + 10:0 newline() + 10:0 newline() + 10:0 ERROR(/* No embedded /*) + 10:15 bcomment(/* comments */) + 10:30 ERROR(in) + 10:33 ERROR(comments) + 10:42 ERROR(*/) + 11:0 newline() + 11:0 ERROR(/* or content afte..) + 12:26 ERROR(Error) + 14:0 newline() + 14:0 newline() + 14:0 string(" \\\\ \\t \\n specia..) \\ \x09 \x0a special ch.. + 15:0 newline() + 15:0 eof() + +###### output: errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,- + Tokenizing: + 2:0 ERROR(multiple) + 2:9 ERROR(decimal) + 2:17 ERROR(pointer) + 2:25 ERROR(3) + 2:26 ERROR(.) + 2:27 ERROR(1) + 2:28 ERROR(4) + 2:29 ERROR(1) + 2:30 ERROR(.) + 2:31 ERROR(5) + 2:32 ERROR(9) + 3:0 newline() + 3:0 string("Check for decimal..) Check for decimal .. + 3:27 ERROR(3) + 3:28 ERROR(,) + 3:29 ERROR(1) + 3:30 ERROR(4) + 3:31 ERROR(1) + 3:32 ERROR(5) + 3:33 ERROR(9) + 3:35 ERROR(=) + 3:37 ERROR(3) + 3:38 ERROR(,) + 3:39 ERROR(1) + 3:40 ERROR(4) + 3:41 ERROR(1) + 3:42 ERROR(,) + 3:43 ERROR(5) + 3:44 ERROR(9) + 4:0 newline() + 4:0 string("") + 4:2 ERROR("A multi-string mu..) + 5:0 newline() + 5:0 ERROR("""\x0a"or after clos..) + 8:12 ERROR(here) + 10:0 newline() + 10:0 newline() + 10:0 ERROR(/* No embedded /*) + 10:15 bcomment(/* comments */) + 10:30 ERROR(in) + 10:33 ERROR(comments) + 10:42 ERROR(*/) + 11:0 newline() + 11:0 ERROR(/* or content afte..) + 12:26 ERROR(Error) + 14:0 newline() + 14:0 newline() + 14:0 string(" \\\\ \\t \\n specia..) \\ \x09 \x0a special ch.. + 15:0 newline() + 15:0 eof() + +## Nested tests. + +We need to test various aspects of tokenizing code that is stored +in multiple nodes. For example, comments and multi-line strings mustn't +cross a node boundary. + +For this we tell `scanner` to extract sections directly from this file. +As the file changes, line numbers might change as well, so we need to factor +that out when testing. A simple awk script can normalise the first line number +to one. + +###### other tests + @for T in $(scanner_section_tests); do \ + echo -n "Test $$T ... "; \ + i="$IFS"; IFS=,; set $$T; IFS="$$i"; section="$$1"; shift; \ + ./md2c scanner-tests.mdc "output: $$T" | grep -v '^#' > .tmp.want; \ + ./coverage_scanner --file scanner-tests.mdc --section "test: $$section" \ + $${1+"$$@"} | awk -F: ' BEGIN {OFS=":"} $$1 ~ /^[0-9]/ {if (!first) first = $$1 - 1; \ + $$1 = $$1 - first} { print } '> .tmp.have; \ + if ! cmp -s .tmp.want .tmp.have; then \ + echo "FAILED"; diff -u .tmp.want .tmp.have; exit 1; fi ; \ + echo "PASSED"; \ + done + +###### test list + scanner_section_tests += section1 section_string section_comment + +###### test: section1 + + foreach s in sections: + ## section2 + print done + +###### section2 + + This is another + section + +###### output: section1 + Tokenizing: test: section1 + 1:8 ident(foreach) + 1:16 ident(s) + 1:18 ident(in) + 1:21 ident(sections) + 1:29 mark(:) + 7:16 in() + 7:16 ident(This) + 7:21 ident(is) + 7:24 ident(another) + 8:8 newline() + 8:8 out() + 8:8 in() + 8:8 ident(section) + 3:16 newline() + 3:16 ident(print) + 3:22 ident(done) + 4:0 newline() + 4:0 out() + 4:0 newline() + 4:0 eof() + +###### test: section_string + a = ''' + A sting mustn't cross + ## string B + skip + +###### string B + to a new node + ''' + +###### output: section_string + Tokenizing: test: section_string + 1:8 ident(a) + 1:10 mark(=) + 1:12 ERROR('''\x0a\x09 A sting mus..) + 7:8 in() + 7:8 ident(to) + 7:11 ident(a) + 7:13 ident(new) + 7:17 ident(node) + 8:8 newline() + 8:8 ERROR(''') + 4:8 newline() + 4:8 out() + 4:8 newline() + 4:8 ident(skip) + 5:0 newline() + 5:0 eof() + +###### test: section_comment + /* Mult-line comment must stay within + ## comment B + */ + +###### comment B + a single node, they cannot cross nodes. + +###### output: section_comment + Tokenizing: test: section_comment + 1:8 ERROR(/* Mult-line comme..) + 6:8 ident(a) + 6:10 ident(single) + 6:17 ident(node) + 6:21 mark(,) + 6:23 ident(they) + 6:28 ident(cannot) + 6:35 ident(cross) + 6:41 ident(nodes) + 6:46 mark(.) + 3:8 newline() + 3:8 mark(*/) + 4:0 newline() + 4:0 eof() + +## Ad-hoc test + +These tests test bugs that were found in practice, and so prevent them recuring. + +The "bad_indent" test was written because I was seeing a TK_in before the +"program" instead of TK_newline + +###### test list + scanner_tests += "bad_indent" + +###### test: bad_indent + + const: + foo : number = 45 + bar := "string" + program: + foo := 4 + print foo, bar + +###### output: bad_indent + Tokenizing: + 2:8 in() + 2:8 ident(const) + 2:13 mark(:) + 3:16 in() + 3:16 ident(foo) + 3:20 mark(:) + 3:22 ident(number) + 3:29 mark(=) + 3:31 number(45) 45 + 4:16 newline() + 4:16 ident(bar) + 4:20 mark(:=) + 4:23 string("string") string + 5:8 newline() + 5:8 out() + 5:8 newline() + 5:8 ident(program) + 5:15 mark(:) + 6:16 in() + 6:16 ident(foo) + 6:20 mark(:=) + 6:23 number(4) 4 + 7:16 newline() + 7:16 ident(print) + 7:22 ident(foo) + 7:25 mark(,) + 7:27 ident(bar) + 8:0 newline() + 8:0 out() + 8:0 newline() + 8:0 out() + 8:0 newline() + 8:0 eof()