From 30d54e829e0fa98589a25ef481bbec07e20e538d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 May 2019 08:48:35 +1000 Subject: [PATCH] scanner: add options to allow more complete testing. Allow all facets of scanner to be controlled by command line options. This will make it easier to do exhaustive testing. Also make sure to free things that are allocated. Signed-off-by: NeilBrown --- csrc/scanner.mdc | 83 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/csrc/scanner.mdc b/csrc/scanner.mdc index 15306dd..0abc08c 100644 --- a/csrc/scanner.mdc +++ b/csrc/scanner.mdc @@ -436,7 +436,7 @@ token types. ###### internal functions static int is_quote(wchar_t ch) { - return ch == '\'' || ch == '"' || ch == '`'; + return ch == '\'' || ch == '"' || ch == '`'; // " } #### Multi-line strings @@ -1933,6 +1933,7 @@ the tokens one per line. #include #include #include + #include #include "mdcode.h" #include "scanner.h" #include "number.h" @@ -1945,11 +1946,19 @@ the tokens one per line. fprintf(stderr, "%s\n", msg); } + static int kcmp(const void *ap, const void *bp) + { + char * const *a = ap; + char * const *b = bp; + return strcmp(*a, *b); + } + int main(int argc, char *argv[]) { int fd; int len; char *file; + char *filename = NULL; struct token_state *state; const char *known[] = { "==", @@ -1966,22 +1975,77 @@ the tokens one per line. .words_marks = known, .number_chars = "., _+-", .known_count = sizeof(known)/sizeof(known[0]), - .ignored = (0 << TK_line_comment) - |(0 << TK_block_comment), + .ignored = 0, + }; + static const struct option long_options[] = { + { "word-start", 1, NULL, 'W'}, + { "word-cont", 1, NULL, 'w'}, + { "number-chars", 1, NULL, 'n'}, + { "ignore-numbers", 0, NULL, 'N'}, + { "ignore-ident", 0, NULL, 'I'}, + { "ignore-marks", 0, NULL, 'M'}, + { "ignore-strings", 0, NULL, 'S'}, + { "ignore-multi-strings",0, NULL, 'z'}, + { "ignore-line-comment",0, NULL, 'c'}, + { "ignore-newline", 0, NULL, 'l'}, + { "ignore-block-comment", 0, NULL, 'C'}, + { "ignore-indent", 0, NULL, 'i'}, + { "file", 1, NULL, 'f'}, + { NULL, 0, NULL, 0}, }; + static const char options[] = "W:w:n:NIMSzclCif:"; + struct section *table, *s, *prev; + int opt; + setlocale(LC_ALL,""); - if (argc != 2) { - fprintf(stderr, "Usage: scanner file\n"); - exit(2); + while ((opt = getopt_long(argc, argv, options, long_options, NULL)) + != -1) { + switch(opt) { + case 'W': conf.word_start = optarg; break; + case 'w': conf.word_cont = optarg; break; + case 'n': conf.number_chars = optarg; break; + case 'N': conf.ignored |= 1 << TK_number; break; + case 'I': conf.ignored |= 1 << TK_ident; break; + case 'M': conf.ignored |= 1 << TK_mark; break; + case 'S': conf.ignored |= 1 << TK_string; break; + case 'z': conf.ignored |= 1 << TK_multi_string; break; + case 'c': conf.ignored |= 1 << TK_line_comment; break; + case 'C': conf.ignored |= 1 << TK_block_comment; break; + case 'l': conf.ignored |= 1 << TK_newline; break; + case 'i': conf.ignored |= 1 << TK_in; break; + case 'f': filename = optarg; break; + default: fprintf(stderr, "scanner: unknown option '%c'.\n", + opt); + exit(1); + } } - fd = open(argv[1], O_RDONLY); + + if (optind < argc) { + const char **wm = calloc(argc - optind, sizeof(char*)); + int i; + for (i = optind; i < argc; i++) + wm[i - optind] = argv[i]; + qsort(wm, argc-optind, sizeof(char*), kcmp); + conf.words_marks = wm; + conf.known_count = argc - optind; + } + + if (filename) + fd = open(filename, O_RDONLY); + else + fd = 0; if (fd < 0) { fprintf(stderr, "scanner: cannot open %s: %s\n", - argv[1], strerror(errno)); + filename, strerror(errno)); exit(1); } len = lseek(fd, 0, 2); + if (len <= 0) { + fprintf(stderr,"scanner: %s is empty or not seekable\n", + filename ?: "stdin"); + exit(1); + } file = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); table = code_extract(file, file+len, pr_err); @@ -2025,7 +2089,10 @@ the tokens one per line. if (tk.num == TK_eof) break; } + token_close(state); } + if (conf.words_marks != known) + free(conf.words_marks); exit(!!errs); } ###### File: scanner.mk -- 2.43.0