scanner: fix silly error calling indent_tab()

[ocean] / csrc / scanner.mdc
diff --git a/csrc/scanner.mdc b/csrc/scanner.mdc

index 37b336f2233c152b3cf398f385748cf647849c70..14cec4b89ef7b885a48422fa0a8cb2c7af59843d 100644 (file)
--- a/csrc/scanner.mdc
+++ b/csrc/scanner.mdc
@@ -90,7 +90,7 @@ The different tokens are numbers, words, marks, strings, comments,
  newlines, EOF, and indents, each of which is examined in detail below.
  
  There are various cases where no token can be found in part of the
-input.  All of these will be reported as an `TK_error` token.
+input.  All of these will be reported as a `TK_error` token.
  
  It is possible to declare a number of strings which form distinct
  tokens (rather than being grouped as e.g. 'word').  These are given
@@ -260,6 +260,9 @@ and the length of the list must be given (`known_count`).
  Tokens matching these known words are reported as the index of the
  list added to `TK_reserved`.
  
+If identifiers are ignored, then any work which is not listed as a
+known word results in an error.
+
  ###### token config parameters
         const char **words_marks;
         int known_count;
@@ -341,7 +344,16 @@ Known marks are included in the same list as the list of known words.
                 if (n >= 0)
                         tk.num = TK_reserved + n;
                 else if (tk.num != TK_error) {
-                       /* found a longest-known-mark */
+                       /* found a longest-known-mark, still need to
+                        * check for comments
+                        */
+                       if (tk.txt.len == 2 && tk.txt.txt[0] == '/' &&
+                           (ch == '/' || ch == '*')) {
+                               /* Yes, this is a comment, not a '/' */
+                               restore_unget_state(state);
+                               tk.num = TK_error;
+                               break;
+                       }
                         unget_char(state);
                         close_token(state, &tk);
                         return tk;
@@ -351,13 +363,16 @@ Known marks are included in the same list as the list of known words.
                 ch = get_char(state);
                 if (!(ignored && (1<<TK_string)) && is_quote(ch))
                         break;
-               if (prev == '#')
+               if (prev == '#' && n < 0)
+                       /* '#' is not a known mark, so assume it is a comment */
                         break;
-               if (prev == '/' && ch == '/' && tk.txt.len > 1) {
+               if (prev == '/' && ch == '/' && tk.txt.len == 1 && n < 0) {
+                       close_token(state, &tk);
                         restore_unget_state(state);
                         break;
                 }
-               if (prev == '/' && ch == '*' && tk.txt.len > 1) {
+               if (prev == '/' && ch == '*' && tk.txt.len == 1 && n < 0) {
+                       close_token(state, &tk);
                         restore_unget_state(state);
                         break;
                 }
@@ -830,6 +845,11 @@ tokens will continue to return the same end-of-file token.
  
  ###### white space
         if (ch == WEOF) {
+               if (state->col) {
+                       state->col = 0;
+                       state->check_indent = 1;
+                       continue;
+               }
                 tk.num = TK_eof;
                 return tk;
         }
@@ -1754,7 +1774,7 @@ required indent is found.
                 if (c == ' ')
                         skipped += 1;
                 else if (c == '\t')
-                       skipped = indent_tab(c);
+                       skipped = indent_tab(skipped);
                 else
                         break;
                 i+= 1;