From: NeilBrown Date: Sun, 26 May 2019 22:48:35 +0000 (+1000) Subject: scanner: add options to allow more complete testing. X-Git-Url: https://ocean-lang.org/code/?a=commitdiff_plain;h=30d54e829e0fa98589a25ef481bbec07e20e538d;p=ocean scanner: add options to allow more complete testing. Allow all facets of scanner to be controlled by command line options. This will make it easier to do exhaustive testing. Also make sure to free things that are allocated. Signed-off-by: NeilBrown --- diff --git a/csrc/scanner.mdc b/csrc/scanner.mdc index 15306dd..0abc08c 100644 --- a/csrc/scanner.mdc +++ b/csrc/scanner.mdc @@ -436,7 +436,7 @@ token types. ###### internal functions static int is_quote(wchar_t ch) { - return ch == '\'' || ch == '"' || ch == '`'; + return ch == '\'' || ch == '"' || ch == '`'; // " } #### Multi-line strings @@ -1933,6 +1933,7 @@ the tokens one per line. #include #include #include + #include #include "mdcode.h" #include "scanner.h" #include "number.h" @@ -1945,11 +1946,19 @@ the tokens one per line. fprintf(stderr, "%s\n", msg); } + static int kcmp(const void *ap, const void *bp) + { + char * const *a = ap; + char * const *b = bp; + return strcmp(*a, *b); + } + int main(int argc, char *argv[]) { int fd; int len; char *file; + char *filename = NULL; struct token_state *state; const char *known[] = { "==", @@ -1966,22 +1975,77 @@ the tokens one per line. .words_marks = known, .number_chars = "., _+-", .known_count = sizeof(known)/sizeof(known[0]), - .ignored = (0 << TK_line_comment) - |(0 << TK_block_comment), + .ignored = 0, + }; + static const struct option long_options[] = { + { "word-start", 1, NULL, 'W'}, + { "word-cont", 1, NULL, 'w'}, + { "number-chars", 1, NULL, 'n'}, + { "ignore-numbers", 0, NULL, 'N'}, + { "ignore-ident", 0, NULL, 'I'}, + { "ignore-marks", 0, NULL, 'M'}, + { "ignore-strings", 0, NULL, 'S'}, + { "ignore-multi-strings",0, NULL, 'z'}, + { "ignore-line-comment",0, NULL, 'c'}, + { "ignore-newline", 0, NULL, 'l'}, + { "ignore-block-comment", 0, NULL, 'C'}, + { "ignore-indent", 0, NULL, 'i'}, + { "file", 1, NULL, 'f'}, + { NULL, 0, NULL, 0}, }; + static const char options[] = "W:w:n:NIMSzclCif:"; + struct section *table, *s, *prev; + int opt; + setlocale(LC_ALL,""); - if (argc != 2) { - fprintf(stderr, "Usage: scanner file\n"); - exit(2); + while ((opt = getopt_long(argc, argv, options, long_options, NULL)) + != -1) { + switch(opt) { + case 'W': conf.word_start = optarg; break; + case 'w': conf.word_cont = optarg; break; + case 'n': conf.number_chars = optarg; break; + case 'N': conf.ignored |= 1 << TK_number; break; + case 'I': conf.ignored |= 1 << TK_ident; break; + case 'M': conf.ignored |= 1 << TK_mark; break; + case 'S': conf.ignored |= 1 << TK_string; break; + case 'z': conf.ignored |= 1 << TK_multi_string; break; + case 'c': conf.ignored |= 1 << TK_line_comment; break; + case 'C': conf.ignored |= 1 << TK_block_comment; break; + case 'l': conf.ignored |= 1 << TK_newline; break; + case 'i': conf.ignored |= 1 << TK_in; break; + case 'f': filename = optarg; break; + default: fprintf(stderr, "scanner: unknown option '%c'.\n", + opt); + exit(1); + } } - fd = open(argv[1], O_RDONLY); + + if (optind < argc) { + const char **wm = calloc(argc - optind, sizeof(char*)); + int i; + for (i = optind; i < argc; i++) + wm[i - optind] = argv[i]; + qsort(wm, argc-optind, sizeof(char*), kcmp); + conf.words_marks = wm; + conf.known_count = argc - optind; + } + + if (filename) + fd = open(filename, O_RDONLY); + else + fd = 0; if (fd < 0) { fprintf(stderr, "scanner: cannot open %s: %s\n", - argv[1], strerror(errno)); + filename, strerror(errno)); exit(1); } len = lseek(fd, 0, 2); + if (len <= 0) { + fprintf(stderr,"scanner: %s is empty or not seekable\n", + filename ?: "stdin"); + exit(1); + } file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); table = code_extract(file, file+len, pr_err); @@ -2025,7 +2089,10 @@ the tokens one per line. if (tk.num == TK_eof) break; } + token_close(state); } + if (conf.words_marks != known) + free(conf.words_marks); exit(!!errs); } ###### File: scanner.mk