parsergen: add more power to symbol references in generated code

[ocean] / csrc / scanner-tests.mdc
diff --git a/csrc/scanner-tests.mdc b/csrc/scanner-tests.mdc

index 36462081c98da9a6dd731686f1535bb7cbdddaf8..d527ed0442fc9c68bbbe2f0e586bfa65409ce2c5 100644 (file)
--- a/csrc/scanner-tests.mdc
+++ b/csrc/scanner-tests.mdc
@@ -32,31 +32,33 @@ about each test.
                     echo "PASSED"; \
                 done
  
+               ## other tests
+
                 @gcov -o coverage scanner.c libscanner.c > /dev/null 2> /dev/null
                 @mv *.gcov coverage; [ -f .gcov ] && mv .gcov coverage || true
                 @awk '/NOTEST/ { next } /^ *[1-9]/ {ran+=1} /^ *###/ {skip+=1} \
                     END {printf "coverage: %6.2f%%\n", ran * 100 / (ran + skip); \
-                        if (ran < (ran + skip) *0.85) exit(1) }' \
+                        if (ran < (ran + skip) *0.95) exit(1) }' \
                         coverage/scanner.mdc.gcov
                 @rm -f .tmp*
  
-       coverage_scanner: scanner.c libscanner.c
+       coverage_scanner: scanner.c libscanner.c libmdcode.o libnumber.o libstring.o
                 $(CC) $(CFLAGS) --coverage -fprofile-dir=coverage -o coverage_scanner \
                         scanner.c libscanner.c \
                         libmdcode.o libnumber.o libstring.o -licuuc -lgmp
  
-
  ## Basic tests
  
  Some simple tests... maybe all tests are simple.
+Include a special test for numbers, as they are interesting.
  
  ###### test list
-       scanner_tests += "test1,if,then,+,-"
-       scanner_tests += "test1,if,then,+,-,/"
-       scanner_tests += "test1,--ignore-indent,if,then,+,-,/"
+       scanner_tests += "test1,-r,if,then,+,-"
+       scanner_tests += "test1,-r,if,then,+,-,/"
+       scanner_tests += "test1,-r,--ignore-indent,if,then,+,-,/"
+       scanner_tests += "test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/"
         scanner_tests += "test1,--ignore-indent,--ignore-newline,if,then,+,-,/"
-       scanner_tests += "test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/"
-       scanner_tests += "test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/"
+       scanner_tests += "test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/"
  
  ###### test: test1
  
@@ -77,7 +79,7 @@ Some simple tests... maybe all tests are simple.
  
         if else then fi while
         1234,  1.234 -123.456e45
-       0x1234 +  0x543p3
+       0x1234 +  0x543p+3
         "This is a string" &"so is this"a
         a = """
            This is a multi-
@@ -91,7 +93,7 @@ Some simple tests... maybe all tests are simple.
         lines */
         divident /+ divisor
  
-###### output: test1,if,then,+,-
+###### output: test1,-r,if,then,+,-
         Tokenizing: 
         2:0 ident(A)
         2:2 ident(B)
@@ -145,13 +147,13 @@ Some simple tests... maybe all tests are simple.
         18:0 newline()
         18:0 number(1234)  1234
         18:4 mark(,)
-       18:7 number(1.234 )  617/500
+       18:7 number(1.234)  617/500
         18:13 -
         18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
         19:0 newline()
-       19:0 number(0x1234 )  4660
+       19:0 number(0x1234)  4660
         19:7 +
-       19:10 number(0x543p3)  10776
+       19:10 number(0x543p+3)  10776
         20:0 newline()
         20:0 string("This is a string")  This is a string
         20:19 mark(&)
@@ -178,7 +180,7 @@ Some simple tests... maybe all tests are simple.
         32:0 newline()
         32:0 eof()
  
-###### output: test1,if,then,+,-,/
+###### output: test1,-r,if,then,+,-,/
         Tokenizing: 
         2:0 ident(A)
         2:2 ident(B)
@@ -232,13 +234,13 @@ Some simple tests... maybe all tests are simple.
         18:0 newline()
         18:0 number(1234)  1234
         18:4 mark(,)
-       18:7 number(1.234 )  617/500
+       18:7 number(1.234)  617/500
         18:13 -
         18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
         19:0 newline()
-       19:0 number(0x1234 )  4660
+       19:0 number(0x1234)  4660
         19:7 +
-       19:10 number(0x543p3)  10776
+       19:10 number(0x543p+3)  10776
         20:0 newline()
         20:0 string("This is a string")  This is a string
         20:19 mark(&)
@@ -266,7 +268,7 @@ Some simple tests... maybe all tests are simple.
         32:0 newline()
         32:0 eof()
  
-###### output: test1,--ignore-indent,if,then,+,-,/
+###### output: test1,-r,--ignore-indent,if,then,+,-,/
         Tokenizing: 
         2:0 ident(A)
         2:2 ident(B)
@@ -308,14 +310,14 @@ Some simple tests... maybe all tests are simple.
         17:21 newline()
         18:0 number(1234)  1234
         18:4 mark(,)
-       18:7 number(1.234 )  617/500
+       18:7 number(1.234)  617/500
         18:13 -
         18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
         18:24 newline()
-       19:0 number(0x1234 )  4660
+       19:0 number(0x1234)  4660
         19:7 +
-       19:10 number(0x543p3)  10776
-       19:17 newline()
+       19:10 number(0x543p+3)  10776
+       19:18 newline()
         20:0 string("This is a string")  This is a string
         20:19 mark(&)
         20:20 string("so is this"a) a so is this
@@ -342,7 +344,7 @@ Some simple tests... maybe all tests are simple.
         31:19 newline()
         32:0 eof()
  
-###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,-r,--ignore-indent,--ignore-newline,if,then,+,-,/
         Tokenizing: 
         2:0 ident(A)
         2:2 ident(B)
@@ -368,12 +370,12 @@ Some simple tests... maybe all tests are simple.
         17:16 ident(while)
         18:0 number(1234)  1234
         18:4 mark(,)
-       18:7 number(1.234 )  617/500
+       18:7 number(1.234)  617/500
         18:13 -
         18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
-       19:0 number(0x1234 )  4660
+       19:0 number(0x1234)  4660
         19:7 +
-       19:10 number(0x543p3)  10776
+       19:10 number(0x543p+3)  10776
         20:0 string("This is a string")  This is a string
         20:19 mark(&)
         20:20 string("so is this"a) a so is this
@@ -394,7 +396,7 @@ Some simple tests... maybe all tests are simple.
         31:12 ident(divisor)
         32:0 eof()
  
-###### output: test1,-Cc,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,--ignore-indent,--ignore-newline,if,then,+,-,/
         Tokenizing: 
         2:0 ident(A)
         2:2 ident(B)
@@ -420,12 +422,12 @@ Some simple tests... maybe all tests are simple.
         17:16 ident(while)
         18:0 number(1234)  1234
         18:4 mark(,)
-       18:7 number(1.234 )  617/500
+       18:7 number(1.234)  617/500
         18:13 -
         18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
-       19:0 number(0x1234 )  4660
+       19:0 number(0x1234)  4660
         19:7 +
-       19:10 number(0x543p3)  10776
+       19:10 number(0x543p+3)  10776
         20:0 string("This is a string")  This is a string
         20:19 mark(&)
         20:20 string("so is this"a) a so is this
@@ -443,7 +445,7 @@ Some simple tests... maybe all tests are simple.
         31:12 ident(divisor)
         32:0 eof()
  
-###### output: test1,-CcSz,--ignore-indent,--ignore-newline,if,then,+,-,/
+###### output: test1,-Sz,--ignore-indent,--ignore-newline,if,then,+,-,/
         Tokenizing: 
         2:0 ident(A)
         2:2 ident(B)
@@ -469,12 +471,12 @@ Some simple tests... maybe all tests are simple.
         17:16 ident(while)
         18:0 number(1234)  1234
         18:4 mark(,)
-       18:7 number(1.234 )  617/500
+       18:7 number(1.234)  617/500
         18:13 -
         18:14 number(123.456e45)  123456000000000000000000000000000000000000000000
-       19:0 number(0x1234 )  4660
+       19:0 number(0x1234)  4660
         19:7 +
-       19:10 number(0x543p3)  10776
+       19:10 number(0x543p+3)  10776
         20:0 mark(")
         20:1 ident(This)
         20:6 ident(is)
@@ -513,3 +515,374 @@ Some simple tests... maybe all tests are simple.
         31:10 +
         31:12 ident(divisor)
         32:0 eof()
+
+###### test list
+       scanner_tests += "testnum"
+
+###### test: testnum
+       12345
+       1234.56
+       1234.56e7
+       1234.56e-7
+       0x1234
+       0x123,456
+       0o777
+       0o111.111p4
+       0b11011110p3
+
+       123 456 789
+       0x1234_5678_9abc
+
+       "Now for some non-number"
+       1234p4
+       12-34
+       01234
+       0c1234
+       123.456e1a
+       123.e4
+       0x123 456
+       0b1234
+       123_345_.34
+       .75
+
+###### output: testnum
+       Tokenizing: 
+       2:0 number(12345)  12345
+       3:0 newline()
+       3:0 number(1234.56)  30864/25
+       4:0 newline()
+       4:0 number(1234.56e7)  12345600000
+       5:0 newline()
+       5:0 number(1234.56e-7)  1929/15625000
+       6:0 newline()
+       6:0 number(0x1234)  4660
+       7:0 newline()
+       7:0 number(0x123,456)  596523/2048
+       8:0 newline()
+       8:0 number(0o777)  511
+       9:0 newline()
+       9:0 number(0o111.111p4)  37449/32
+       10:0 newline()
+       10:0 number(0b11011110p3)  1776
+       12:0 newline()
+       12:0 newline()
+       12:0 number(123 456 789)  123456789
+       13:0 newline()
+       13:0 number(0x1234_5678_9abc)  20015998343868
+       15:0 newline()
+       15:0 newline()
+       15:0 string("Now for some non-..)  Now for some non-n..
+       16:0 newline()
+       16:0 number(1234p4) BAD NUMBER
+       17:0 newline()
+       17:0 number(12)  12
+       17:2 mark(-)
+       17:3 number(34)  34
+       18:0 newline()
+       18:0 number(01234) BAD NUMBER
+       19:0 newline()
+       19:0 number(0c1234) BAD NUMBER
+       20:0 newline()
+       20:0 number(123.456e1a) a 30864/25
+       21:0 newline()
+       21:0 number(123.e4)  1230000
+       22:0 newline()
+       22:0 number(0x123 456) BAD NUMBER
+       23:0 newline()
+       23:0 number(0b1234) BAD NUMBER
+       24:0 newline()
+       24:0 number(123_345)  123345
+       24:7 ident(_)
+       24:8 mark(.)
+       24:9 number(34)  34
+       25:0 newline()
+       25:0 mark(.)
+       25:1 number(75)  75
+       26:0 newline()
+       26:0 eof()
+
+## Error tests
+
+Now to test for some errors ... though things I thought would be errors
+sometimes aren't.
+
+###### test list
+       scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-"
+       scanner_tests += "errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-"
+
+###### test: errtest
+
+       multiple decimal pointer 3.141.59
+       "Check for decimal commas" 3,14159 = 3,141,59
+       """A multi-string must not have text here
+       """
+       "or after close" + """
+               no text ..
+               """ here
+
+       /* No embedded /* comments */ in comments */
+       /* or content after
+        * a multiline comment */ Error
+
+       "  \\ \t \n special chars in strings"
+
+###### output: errtest,-r,--ignore-ident,--ignore-mark,-W_,-w_,if,then,+,-
+
+       Tokenizing: 
+       2:0 ERROR(multiple)
+       2:9 ERROR(decimal)
+       2:17 ERROR(pointer)
+       2:25 number(3.141)  3141/1000
+       2:30 ERROR(.)
+       2:31 number(59)  59
+       3:0 newline()
+       3:0 string("Check for decimal..)  Check for decimal ..
+       3:27 number(3,14159)  314159/100000
+       3:35 ERROR(=)
+       3:37 number(3,141)  3141/1000
+       3:42 ERROR(,)
+       3:43 number(59)  59
+       4:0 newline()
+       4:0 string("")  
+       4:2 ERROR("A multi-string mu..)
+       5:0 newline()
+       5:0 ERROR("""\x0a"or after clos..)
+       8:12 ERROR(here)
+       10:0 newline()
+       10:0 newline()
+       10:0 ERROR(/* No embedded /*)
+       10:15 bcomment(/* comments */)
+       10:30 ERROR(in)
+       10:33 ERROR(comments)
+       10:42 ERROR(*/)
+       11:0 newline()
+       11:0 ERROR(/* or content afte..)
+       12:26 ERROR(Error)
+       14:0 newline()
+       14:0 newline()
+       14:0 string("  \\\\ \\t \\n specia..)    \\ \x09 \x0a special ch..
+       15:0 newline()
+       15:0 eof()
+
+###### output: errtest,-r,--ignore-ident,--ignore-mark,-N,if,then,+,-
+       Tokenizing: 
+       2:0 ERROR(multiple)
+       2:9 ERROR(decimal)
+       2:17 ERROR(pointer)
+       2:25 ERROR(3)
+       2:26 ERROR(.)
+       2:27 ERROR(1)
+       2:28 ERROR(4)
+       2:29 ERROR(1)
+       2:30 ERROR(.)
+       2:31 ERROR(5)
+       2:32 ERROR(9)
+       3:0 newline()
+       3:0 string("Check for decimal..)  Check for decimal ..
+       3:27 ERROR(3)
+       3:28 ERROR(,)
+       3:29 ERROR(1)
+       3:30 ERROR(4)
+       3:31 ERROR(1)
+       3:32 ERROR(5)
+       3:33 ERROR(9)
+       3:35 ERROR(=)
+       3:37 ERROR(3)
+       3:38 ERROR(,)
+       3:39 ERROR(1)
+       3:40 ERROR(4)
+       3:41 ERROR(1)
+       3:42 ERROR(,)
+       3:43 ERROR(5)
+       3:44 ERROR(9)
+       4:0 newline()
+       4:0 string("")  
+       4:2 ERROR("A multi-string mu..)
+       5:0 newline()
+       5:0 ERROR("""\x0a"or after clos..)
+       8:12 ERROR(here)
+       10:0 newline()
+       10:0 newline()
+       10:0 ERROR(/* No embedded /*)
+       10:15 bcomment(/* comments */)
+       10:30 ERROR(in)
+       10:33 ERROR(comments)
+       10:42 ERROR(*/)
+       11:0 newline()
+       11:0 ERROR(/* or content afte..)
+       12:26 ERROR(Error)
+       14:0 newline()
+       14:0 newline()
+       14:0 string("  \\\\ \\t \\n specia..)    \\ \x09 \x0a special ch..
+       15:0 newline()
+       15:0 eof()
+
+## Nested tests.
+
+We need to test various aspects of tokenizing code that is stored
+in multiple nodes.  For example, comments and multi-line strings mustn't
+cross a node boundary.
+
+For this we tell `scanner` to extract sections directly from this file.
+As the file changes, line numbers might change as well, so we need to factor
+that out when testing.  A simple awk script can normalise the first line number
+to one.
+
+###### other tests
+       @for T in $(scanner_section_tests); do \
+          echo -n "Test $$T ... "; \
+          i="$IFS"; IFS=,; set $$T; IFS="$$i"; section="$$1"; shift; \
+           ./md2c scanner-tests.mdc "output: $$T" | grep -v '^#' > .tmp.want; \
+          ./coverage_scanner --file scanner-tests.mdc --section "test: $$section" \
+            $${1+"$$@"} | awk -F: ' BEGIN {OFS=":"} $$1 ~ /^[0-9]/ {if (!first) first = $$1 - 1; \
+                 $$1 = $$1 - first} { print } '> .tmp.have; \
+           if ! cmp -s .tmp.want .tmp.have; then \
+               echo "FAILED"; diff -u .tmp.want .tmp.have; exit 1; fi ; \
+           echo "PASSED"; \
+       done
+
+###### test list
+       scanner_section_tests += section1 section_string section_comment
+
+###### test: section1
+
+       foreach s in sections:
+               ## section2
+               print done
+
+###### section2
+
+               This is another
+       section
+
+###### output: section1
+       Tokenizing: test: section1
+       1:8 ident(foreach)
+       1:16 ident(s)
+       1:18 ident(in)
+       1:21 ident(sections)
+       1:29 mark(:)
+       7:16 in()
+       7:16 ident(This)
+       7:21 ident(is)
+       7:24 ident(another)
+       8:8 newline()
+       8:8 out()
+       8:8 in()
+       8:8 ident(section)
+       3:16 newline()
+       3:16 ident(print)
+       3:22 ident(done)
+       4:0 newline()
+       4:0 out()
+       4:0 newline()
+       4:0 eof()
+
+###### test: section_string
+       a = '''
+         A sting mustn't cross
+         ## string B
+       skip
+
+###### string B
+       to a new node
+       '''
+
+###### output: section_string
+       Tokenizing: test: section_string
+       1:8 ident(a)
+       1:10 mark(=)
+       1:12 ERROR('''\x0a\x09  A sting mus..)
+       7:8 in()
+       7:8 ident(to)
+       7:11 ident(a)
+       7:13 ident(new)
+       7:17 ident(node)
+       8:8 newline()
+       8:8 ERROR(''')
+       4:8 newline()
+       4:8 out()
+       4:8 newline()
+       4:8 ident(skip)
+       5:0 newline()
+       5:0 eof()
+
+###### test: section_comment
+       /* Mult-line comment must stay within
+       ## comment B
+       */
+
+###### comment B
+       a single node, they cannot cross nodes.
+
+###### output: section_comment
+       Tokenizing: test: section_comment
+       1:8 ERROR(/* Mult-line comme..)
+       6:8 ident(a)
+       6:10 ident(single)
+       6:17 ident(node)
+       6:21 mark(,)
+       6:23 ident(they)
+       6:28 ident(cannot)
+       6:35 ident(cross)
+       6:41 ident(nodes)
+       6:46 mark(.)
+       3:8 newline()
+       3:8 mark(*/)
+       4:0 newline()
+       4:0 eof()
+
+## Ad-hoc test
+
+These tests test bugs that were found in practice, and so prevent them recuring.
+
+The "bad_indent" test was written because I was seeing a TK_in before the
+"program" instead of TK_newline
+
+###### test list
+       scanner_tests += "bad_indent"
+
+###### test: bad_indent
+
+               const:
+                       foo : number = 45
+                       bar := "string"
+               program:
+                       foo := 4
+                       print foo, bar
+
+###### output: bad_indent
+       Tokenizing: 
+       2:8 in()
+       2:8 ident(const)
+       2:13 mark(:)
+       3:16 in()
+       3:16 ident(foo)
+       3:20 mark(:)
+       3:22 ident(number)
+       3:29 mark(=)
+       3:31 number(45)  45
+       4:16 newline()
+       4:16 ident(bar)
+       4:20 mark(:=)
+       4:23 string("string")  string
+       5:8 newline()
+       5:8 out()
+       5:8 newline()
+       5:8 ident(program)
+       5:15 mark(:)
+       6:16 in()
+       6:16 ident(foo)
+       6:20 mark(:=)
+       6:23 number(4)  4
+       7:16 newline()
+       7:16 ident(print)
+       7:22 ident(foo)
+       7:25 mark(,)
+       7:27 ident(bar)
+       8:0 newline()
+       8:0 out()
+       8:0 newline()
+       8:0 out()
+       8:0 newline()
+       8:0 eof()