]> ocean-lang.org Git - ocean/blobdiff - csrc/scanner.mdc
Scanner: parsing of comments and strings must recognise end-of-node
[ocean] / csrc / scanner.mdc
index 113c9d2ea199480e4f9d057a76f9c3895c5b6deb..37b336f2233c152b3cf398f385748cf647849c70 100644 (file)
@@ -347,25 +347,34 @@ Known marks are included in the same list as the list of known words.
                        return tk;
                }
                prev = ch;
-               if (prev == '/')
-                       save_unget_state(state);
+               save_unget_state(state);
                ch = get_char(state);
                if (!(ignored && (1<<TK_string)) && is_quote(ch))
                        break;
-               if (!(ignored && (1<<TK_line_comment)) &&
-                   prev == '/' && ch == '/') {
+               if (prev == '#')
+                       break;
+               if (prev == '/' && ch == '/' && tk.txt.len > 1) {
                        restore_unget_state(state);
                        break;
                }
-               if (!(ignored && (1<<TK_block_comment)) &&
-                   prev == '/' && ch == '*') {
+               if (prev == '/' && ch == '*' && tk.txt.len > 1) {
                        restore_unget_state(state);
                        break;
                }
        }
        unget_char(state);
-       if (tk.num != TK_error)
+       if (tk.num != TK_error) {
+               close_token(state, &tk);
                return tk;
+       }
+
+If we don't find a known mark, we will check for strings and comments
+before assuming that we have an unknown mark
+
+###### parse mark
+       ## parse string
+       ## parse comment
+       ## unknown mark
 
 ###### unknown mark
        if (tk.txt.len) {
@@ -441,7 +450,8 @@ followed by the start of a new string.
                         * unget so the newline is seen,
                         * but return rest of string as an error.
                         */
-                       unget_char(state);
+                       if (is_newline(ch))
+                               unget_char(state);
                        close_token(state, &tk);
                        tk.num = TK_error;
                        return tk;
@@ -475,14 +485,18 @@ If `TK_string` is ignored, then quote characters will appear as `TK_mark`s.
            !(ignored & (1<<TK_string))) {
                wchar_t first = tk.txt.txt[0];
                reset_token(state, &tk);
-               get_char(state);
-               do
+               ch = get_char(state);
+               tk.num = TK_error;
+               while (!at_eon(state) && !is_newline(ch)) {
                        ch = get_char(state);
-               while (ch != first && !is_newline(ch));
-               tk.num = TK_string;
-               if (is_newline(ch)) {
-                       unget_char(state);
-                       tk.num = TK_error;
+                       if (ch == first) {
+                               tk.num = TK_string;
+                               break;
+                       }
+                       if (is_newline(ch)) {
+                               unget_char(state);
+                               break;
+                       }
                }
                close_token(state, &tk);
                return tk;
@@ -506,7 +520,7 @@ it would not suffer from this rule.
 
 These two comment types are reported as two separate token types, and
 consequently can be ignored separately.  When ignored a comment is
-parsed and discarded.
+still parsed, but is discarded.
 
 ###### token types
        TK_line_comment,
@@ -528,14 +542,16 @@ parsed and discarded.
 
 #### Single line comments
 
-A single-line comment continues up to, but not including the newline.
+A single-line comment continues up to, but not including the newline
+or end of node.
 
 ###### parse comment
 
        if (is_line_comment(tk.txt)) {
-               while (!is_newline(ch))
+               while (!is_newline(ch) && !at_eon(state))
                        ch = get_char(state);
-               unget_char(state);
+               if (is_newline(ch))
+                       unget_char(state);
                close_token(state, &tk);
                tk.num = TK_line_comment;
                if (ignored & (1 << TK_line_comment))
@@ -1109,9 +1125,6 @@ loop.
        ## parse number
        ## parse word
        ## parse mark
-       ## parse string
-       ## parse comment
-       ## unknown mark
 
 ### Start and stop