]> ocean-lang.org Git - ocean/blobdiff - csrc/scanner.mdc
scanner: improve transition from node to node.
[ocean] / csrc / scanner.mdc
index 80adb2a3ad1f7b1684af3ee7fb1f77006492c7a0..777688482566796691d97f6df6cd3e8e5cf9e863 100644 (file)
@@ -807,9 +807,11 @@ information and return one token.
        }
 
 ###### white space
+       if (is_newline(ch))
+               state_check_node(state);
        if (is_newline(ch) || (at_son(state) && ch <= ' ')) {
                int newlines = 0;
-               int was_son = at_son(state);
+               int was_nl = is_newline(ch);
                if (ignored & (1<<TK_in)) {
                        if (!is_newline(ch))
                                continue;
@@ -820,17 +822,17 @@ information and return one token.
                        return tk;
                }
                // Indents are needed, so check all white space.
-               while (ch <= ' ' && !at_eon(state)) {
+               while (ch <= ' ' && ch != WEOF) {
                        if (is_newline(ch))
                                newlines += 1;
                        ch = get_char(state);
+                       if (is_newline(ch))
+                               state_check_node(state);
                }
-               if (at_eon(state)) {
-                       newlines += 1;
-               } else
+               if (ch != WEOF)
                        unget_char(state);
                state->delayed_lines = newlines;
-               state->out_next = was_son;
+               state->out_next = !was_nl;
                state->check_indent = 1;
                continue;
        }
@@ -962,26 +964,37 @@ a flag that tells us whether or not we need to strip.
                }
        }
 
+       static void state_check_node(struct token_state *state)
+       {
+               if (!state->node)
+                       return;
+               if (state->node->code.len > state->offset)
+                       return;
+
+               do
+                       state->node = state->node->next;
+               while (state->node && state->node->code.txt == NULL);
+               state->offset = 0;
+               state->prev_offset = 0;
+               state->strip_offset = 0;
+               state->col = 0;
+               if (state->node == NULL)
+                       return;
+               state->line = state->node->line_no;
+               do_strip(state);
+               state->col = state->node->needs_strip;
+               state->strip_offset = state->offset;
+       }
+
        static wint_t get_char(struct token_state *state)
        {
                wchar_t next;
                size_t n;
                mbstate_t mbstate;
 
+               state_check_node(state);
                if (state->node == NULL)
                        return WEOF;
-               if (state->node->code.len <= state->offset) {
-                       do
-                               state->node = state->node->next;
-                       while (state->node && state->node->code.txt == NULL);
-                       state->offset = 0;
-                       if (state->node == NULL)
-                               return WEOF;
-                       state->line = state->node->line_no;
-                       do_strip(state);
-                       state->col = state->node->needs_strip;
-                       state->strip_offset = state->offset;
-               }
 
                ## before get_char
 
@@ -2030,12 +2043,15 @@ the tokens one per line.
                        { "ignore-block-comment", 0, NULL, 'C'},
                        { "ignore-indent",      0, NULL, 'i'},
                        { "file",               1, NULL, 'f'},
+                       { "section",            1, NULL, 's'},
                        { NULL,                 0, NULL, 0},
                };
-               static const char options[] = "W:w:n:NIMSzclCif:";
+               static const char options[] = "W:w:n:NIMSzclCif:s:";
 
                struct section *table, *s, *prev;
                int opt;
+               char *section_name = NULL;
+               int section_found = 0;
 
                setlocale(LC_ALL,"");
                while ((opt = getopt_long(argc, argv, options, long_options, NULL))
@@ -2054,6 +2070,7 @@ the tokens one per line.
                        case 'l': conf.ignored |= 1 << TK_newline; break;
                        case 'i': conf.ignored |= 1 << TK_in; break;
                        case 'f': filename = optarg; break;
+                       case 's': section_name = optarg; break;
                        default: fprintf(stderr, "scanner: unknown option '%c'.\n",
                                         opt);
                                exit(1);
@@ -2090,6 +2107,12 @@ the tokens one per line.
 
                for (s = table; s;
                        (code_free(s->code), prev = s, s = s->next, free(prev))) {
+                       if (section_name &&
+                           (s->section.len != strlen(section_name) ||
+                            strncmp(s->section.txt, section_name, s->section.len) != 0))
+                               continue;
+                       if (section_name)
+                               section_found = 1;
                        printf("Tokenizing: %.*s\n", s->section.len,
                                s->section.txt);
                        state = token_open(s->code, &conf);
@@ -2132,6 +2155,10 @@ the tokens one per line.
                }
                if (conf.words_marks != known)
                        free(conf.words_marks);
+               if (section_name && !section_found) {
+                       fprintf(stderr, "scanner: section %s not found\n", section_name);
+                       errs = 1;
+               }
                exit(!!errs);
        }
 ###### File: scanner.mk