parsergen: make it easier to test the simple 'calc' code.

[ocean] / csrc / scanner.mdc
diff --git a/csrc/scanner.mdc b/csrc/scanner.mdc

index 4d922415018e67a2f9aa87dba9a0f28b60fb6485..15306dd17494245986dbd014ebd2a2c8693c977d 100644 (file)
--- a/csrc/scanner.mdc
+++ b/csrc/scanner.mdc
@@ -520,6 +520,10 @@ If `TK_string` is ignored, then quote characters will appear as `TK_mark`s.
                                 break;
                         }
                 }
+               while (!at_eon(state) && (ch = get_char(state)) &&
+                                         iswalpha(ch))
+                       ;
+               unget_char(state);
                 close_token(state, &tk);
                 return tk;
         }
@@ -901,19 +905,23 @@ a flag that tells us whether or not we need to strip.
  
  ###### internal functions
  
-       static void do_strip(struct token_state *state)
+       static int do_strip(struct token_state *state)
         {
+               int indent = 0;
                 if (state->node->needs_strip) {
                         int n = 4;
                         while (n && state->node->code.txt[state->offset] == ' ') {
+                               indent += 1;
                                 state->offset += 1;
                                 n -= 1;
                         }
                         while (n == 4 && state->node->code.txt[state->offset] == '\t') {
+                               indent = indent_tab(indent);
                                 state->offset += 1;
                                 n -= 4;
                         }
                 }
+               return indent;
         }
  
         static wint_t get_char(struct token_state *state)
@@ -931,9 +939,8 @@ a flag that tells us whether or not we need to strip.
                         state->offset = 0;
                         if (state->node == NULL)
                                 return WEOF;
-                       do_strip(state);
                         state->line = state->node->line_no;
-                       state->col = state->node->indent;
+                       state->col = do_strip(state);
                 }
  
                 ## before get_char
@@ -958,8 +965,7 @@ a flag that tells us whether or not we need to strip.
                         state->col += 1;
                 } else if (is_newline(next)) {
                         state->line += 1;
-                       state->col = state->node->indent;
-                       do_strip(state);
+                       state->col = do_strip(state);
                 } else if (next == '\t') {
                         state->col = indent_tab(state->col);
                 }
@@ -1062,8 +1068,11 @@ parsed too much already.  For that there is `reset_token`.
         static void close_token(struct token_state *state,
                                 struct token *tk)
         {
-               tk->txt.len = (state->node->code.txt + state->offset)
-                             - tk->txt.txt;
+               if (state->node != tk->node)
+                       tk->txt.len = tk->node->code.len - (tk->txt.txt - tk->node->code.txt);
+               else
+                       tk->txt.len = (state->node->code.txt + state->offset)
+                                     - tk->txt.txt;
         }
  
         static void reset_token(struct token_state *state, struct token *tok)
@@ -1169,9 +1178,8 @@ As well as getting tokens, we need to be able to create the
                 memset(state, 0, sizeof(*state));
                 state->node = code;
                 state->line = code->line_no;
-               state->col = code->indent;
+               state->col = do_strip(state);
                 state->conf = conf;
-               do_strip(state);
                 return state;
         }
         void token_close(struct token_state *state)