]> ocean-lang.org Git - ocean/commitdiff
scanner: add options to allow more complete testing.
authorNeilBrown <neil@brown.name>
Sun, 26 May 2019 22:48:35 +0000 (08:48 +1000)
committerNeilBrown <neil@brown.name>
Sun, 26 May 2019 22:48:35 +0000 (08:48 +1000)
Allow all facets of scanner to be controlled by command line
options.  This will make it easier to do exhaustive testing.

Also make sure to free things that are allocated.

Signed-off-by: NeilBrown <neil@brown.name>
csrc/scanner.mdc

index 15306dd17494245986dbd014ebd2a2c8693c977d..0abc08c6f8dd09a548b239b40cf253ab676bfc67 100644 (file)
@@ -436,7 +436,7 @@ token types.
 ###### internal functions
        static int is_quote(wchar_t ch)
        {
-               return ch == '\'' || ch == '"' || ch == '`';
+               return ch == '\'' || ch == '"' || ch == '`'; // "
        }
 
 #### Multi-line strings
@@ -1933,6 +1933,7 @@ the tokens one per line.
        #include <stdio.h>
        #include <gmp.h>
        #include <locale.h>
+       #include <getopt.h>
        #include "mdcode.h"
        #include "scanner.h"
        #include "number.h"
@@ -1945,11 +1946,19 @@ the tokens one per line.
                fprintf(stderr, "%s\n", msg);
        }
 
+       static int kcmp(const void *ap, const void *bp)
+       {
+               char * const *a = ap;
+               char * const *b = bp;
+               return strcmp(*a, *b);
+       }
+
        int main(int argc, char *argv[])
        {
                int fd;
                int len;
                char *file;
+               char *filename = NULL;
                struct token_state *state;
                const char *known[] = {
                        "==",
@@ -1966,22 +1975,77 @@ the tokens one per line.
                        .words_marks = known,
                        .number_chars = "., _+-",
                        .known_count = sizeof(known)/sizeof(known[0]),
-                       .ignored = (0 << TK_line_comment)
-                                 |(0 << TK_block_comment),
+                       .ignored = 0,
+               };
+               static const struct option long_options[] = {
+                       { "word-start",         1, NULL, 'W'},
+                       { "word-cont",          1, NULL, 'w'},
+                       { "number-chars",       1, NULL, 'n'},
+                       { "ignore-numbers",     0, NULL, 'N'},
+                       { "ignore-ident",       0, NULL, 'I'},
+                       { "ignore-marks",       0, NULL, 'M'},
+                       { "ignore-strings",     0, NULL, 'S'},
+                       { "ignore-multi-strings",0, NULL, 'z'},
+                       { "ignore-line-comment",0, NULL, 'c'},
+                       { "ignore-newline",     0, NULL, 'l'},
+                       { "ignore-block-comment", 0, NULL, 'C'},
+                       { "ignore-indent",      0, NULL, 'i'},
+                       { "file",               1, NULL, 'f'},
+                       { NULL,                 0, NULL, 0},
                };
+               static const char options[] = "W:w:n:NIMSzclCif:";
+
                struct section *table, *s, *prev;
+               int opt;
+
                setlocale(LC_ALL,"");
-               if (argc != 2) {
-                       fprintf(stderr, "Usage: scanner file\n");
-                       exit(2);
+               while ((opt = getopt_long(argc, argv, options, long_options, NULL))
+                      != -1) {
+                       switch(opt) {
+                       case 'W': conf.word_start = optarg; break;
+                       case 'w': conf.word_cont = optarg; break;
+                       case 'n': conf.number_chars = optarg; break;
+                       case 'N': conf.ignored |= 1 << TK_number; break;
+                       case 'I': conf.ignored |= 1 << TK_ident; break;
+                       case 'M': conf.ignored |= 1 << TK_mark; break;
+                       case 'S': conf.ignored |= 1 << TK_string; break;
+                       case 'z': conf.ignored |= 1 << TK_multi_string; break;
+                       case 'c': conf.ignored |= 1 << TK_line_comment; break;
+                       case 'C': conf.ignored |= 1 << TK_block_comment; break;
+                       case 'l': conf.ignored |= 1 << TK_newline; break;
+                       case 'i': conf.ignored |= 1 << TK_in; break;
+                       case 'f': filename = optarg; break;
+                       default: fprintf(stderr, "scanner: unknown option '%c'.\n",
+                                        opt);
+                               exit(1);
+                       }
                }
-               fd = open(argv[1], O_RDONLY);
+
+               if (optind < argc) {
+                       const char **wm = calloc(argc - optind, sizeof(char*));
+                       int i;
+                       for (i = optind; i < argc; i++)
+                               wm[i - optind] = argv[i];
+                       qsort(wm, argc-optind, sizeof(char*), kcmp);
+                       conf.words_marks = wm;
+                       conf.known_count = argc - optind;
+               }
+
+               if (filename)
+                       fd = open(filename, O_RDONLY);
+               else
+                       fd = 0;
                if (fd < 0) {
                        fprintf(stderr, "scanner: cannot open %s: %s\n",
-                               argv[1], strerror(errno));
+                               filename, strerror(errno));
                        exit(1);
                }
                len = lseek(fd, 0, 2);
+               if (len <= 0) {
+                       fprintf(stderr,"scanner: %s is empty or not seekable\n",
+                               filename ?: "stdin");
+                       exit(1);
+               }
                file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
                table = code_extract(file, file+len, pr_err);
 
@@ -2025,7 +2089,10 @@ the tokens one per line.
                                if (tk.num == TK_eof)
                                        break;
                        }
+                       token_close(state);
                }
+               if (conf.words_marks != known)
+                       free(conf.words_marks);
                exit(!!errs);
        }
 ###### File: scanner.mk