oceani: simplify test in var_block_close.

[ocean] / csrc / mdcode.mdc
diff --git a/csrc/mdcode.mdc b/csrc/mdcode.mdc

index f49ee83e6775d5441ee05385385bae67ba198914..cf6887dea25c316bab6bc6679ce2f8e2d622a046 100644 (file)
--- a/csrc/mdcode.mdc
+++ b/csrc/mdcode.mdc
@@ -55,20 +55,25 @@ times.  Allowing this might make some sense, but it is probably a
  mistake, and prohibiting it make some of the code a bit cleaner.
  
  Equally, every section of code should be interpolated at least once -
-with two exceptions.  These exceptions are imposed by the tool, not
-the library.  A different client could impose different rules on the
-names of top-level code sections.
-
-The first exception we have already seen.  A section name starting
-__Example:__ indicates code that is not to be included in the final product.
-
-The second exception is for the top level code sections which will be
-written to files.  Again these are identified by their section name.
-This must start with __File:__  the following text (after optional
-spaces) will be used as a file name.
-
-Any section containing code that does not start __Example:__ or
-__File:__ must be included in some other section exactly once.
+with one exception.  This exception is imposed by the
+tool, not the library.  A different client could impose different
+rules on the names of top-level code sections.
+
+One example of the exception we have already seen.  A section name
+starting __Example:__ indicates code that is not to be included in the
+final product.  Any leading word will do, providing there is a space,
+and the first space is preceded by a colon, that section name will be
+ignored.
+
+A special case of this exception exists for the leading word
+__File__.  These sections are the top level code sections and they
+will be written to the named file.  Thus a section named
+__File: foo__ should not be referenced by another section, and its
+contents after all references are expanded will be written to the file
+__foo__.
+
+Any section containing code that does not start __Word:__
+must be included in some other section exactly once.
  
  ### Multiple files
  
@@ -93,6 +98,7 @@ will "do the right thing".
  
  ### File: mdcode.h
  
+       #include <stdio.h>
         ## exported types
         ## exported functions
  
@@ -113,11 +119,11 @@ will "do the right thing".
         libmdcode.o : libmdcode.c mdcode.h
                 $(CC) $(CFLAGS) -c libmdcode.c
  
-
  ### File: md2c.c
  
         #include <unistd.h>
         #include <stdlib.h>
+       #include <stdio.h>
  
         #include "mdcode.h"
  
@@ -155,8 +161,11 @@ the root.
  
  Finally we need to know if the `code_node` was recognised by being
  indented or not.  If it was, the client of this data will want to
-strip of the leading tab or 4 spaces.  Hence a `needs_strip` flag is
-needed.
+strip off the leading tab or 4 spaces.  Hence a `needs_strip` flag is
+needed.  This will be set to 8 if a tab is found and 4 if four spaces are found.
+This means the relative indent of text in the node
+is `node->indent - node->needs_strip`.
+The relative indent is needed for detecting indents in the overall file.
  
  ##### exported types
  
@@ -281,7 +290,13 @@ in a new node.
                 n->code = txt;
                 n->indent = 0;
                 n->line_no = line_no;
-               n->needs_strip = needs_strip;
+               if (needs_strip) {
+                       if (txt.txt[0] == '\t')
+                               n->needs_strip = 8;
+                       else
+                               n->needs_strip = 4;
+               } else
+                       n->needs_strip = 0;
                 n->next = NULL;
                 n->child = NULL;
                 if (where->last)
@@ -323,21 +338,31 @@ However when adding a link, we might be able to include it in the last
  Now we need a lookup table to be able to find sections by name.
  Something that provides an `n*log(N)` search time is probably
  justified, but for now I want a minimal stand-alone program so a
-linked list managed by insertion-sort will do.  As a comparison
-function it is easiest to sort based on length before content.  So
-sections won't be in standard lexical order, but that isn't important.
+linked list managed by insertion-sort will do.
+
+The text compare function will likely be useful for any clients of our
+library, so we may as well export it.
  
  If we cannot find a section, we simply want to create it.  This allows
  sections and references to be created in any order.  Sections with
  no references or no content will cause a warning eventually.
  
+#### exported functions
+
+       int text_cmp(struct text a, struct text b);
+
  #### internal functions
  
-       static int text_cmp(struct text a, struct text b)
+       int text_cmp(struct text a, struct text b)
         {
-               if (a.len != b.len)
+               int len = a.len;
+               if (len > b.len)
+                       len = b.len;
+               int cmp = strncmp(a.txt, b.txt, len);
+               if (cmp)
+                       return cmp;
+               else
                         return a.len - b.len;
-               return strncmp(a.txt, b.txt, a.len);
         }
  
         static struct psection *section_find(struct psection **list, struct text name)
@@ -524,7 +549,7 @@ There are two sorts of end markers: the presence of a particular
  string, or the absence of an indent.  We will use a string to
  represent a presence, and a `NULL` to represent the absence.
  
-While looking at code we don't think about paragraphs are all - just
+While looking at code we don't think about paragraphs at all - just
  look for a line that starts with the right thing.
  Every line that is still code then needs to be examined to see if it
  is a section reference.
@@ -540,6 +565,11 @@ number of spaces (counting 8 for tabs) after the natural indent of the
  code (which is a tab or 4 spaces).  We use a separate function `count_spaces`
  for that.
  
+If there are completely blank linkes (no indent) at the end of the found code,
+these should be considered to be spacing between the code and the next section,
+and so no included in the code.  When a marker is used to explicitly mark the
+end of the code, we don't need to check for these blank lines.
+
  #### internal functions
  
         static int count_space(char *sol, char *p)
@@ -555,7 +585,6 @@ for that.
                 return c;
         }
  
-
         static char *take_code(char *pos, char *end, char *marker,
                                struct psection **table, struct text section,
                                int *line_nop)
@@ -616,6 +645,12 @@ for that.
                         struct text txt;
                         txt.txt = start;
                         txt.len = pos - start;
+                       /* strip trailing blank lines */
+                       while (!marker && txt.len > 2 &&
+                              start[txt.len-1] == '\n' &&
+                              start[txt.len-2] == '\n')
+                               txt.len -= 1;
+
                         code_add_text(sect, txt, start_line,
                                       marker == NULL);
                 }
@@ -746,7 +781,6 @@ error message - a `code_err_fn`.
  
         struct section *code_extract(char *pos, char *end, code_err_fn error);
  
-
  ## Using the library
  
  Now that we can extract code from a document and link it all together
@@ -772,13 +806,13 @@ This could go wrong if the first line of a code block marked by
  _`` ``` ``_ is indented.  To overcome this we would need to
  record some extra state in each `code_node`.  For now we won't bother.
  
-The indents we insert will all be spaces.  This might not work well
-for `Makefiles`.
+The indents we insert will mostly be spaces.  All-spaces doesn't work
+for `Makefiles`, so if the indent is 8 or more, we use a TAB first.
  
-##### client functions
+##### internal functions
  
-       static void code_print(FILE *out, struct code_node *node,
-                              char *fname)
+       void code_node_print(FILE *out, struct code_node *node,
+                            char *fname)
         {
                 for (; node; node = node->next) {
                         char *c = node->code.txt;
@@ -790,7 +824,10 @@ for `Makefiles`.
                         fprintf(out, "#line %d \"%s\"\n",
                                 node->line_no, fname);
                         while (len && *c) {
-                               fprintf(out, "%*s", node->indent, "");
+                               if (node->indent >= 8)
+                                       fprintf(out, "\t%*s", node->indent - 8, "");
+                               else
+                                       fprintf(out, "%*s", node->indent, "");
                                 if (node->needs_strip) {
                                         if (*c == '\t' && len > 1) {
                                                 c++;
@@ -809,6 +846,9 @@ for `Makefiles`.
                 }
         }
  
+###### exported functions
+       void code_node_print(FILE *out, struct code_node *node, char *fname);
+
  ### Bringing it all together
  
  We are just about ready for the `main` function of the tool which will
@@ -849,14 +889,12 @@ And now we take a single file name, extract the code, and if there are
  no error we write out a file for each appropriate code section.  And
  we are done.
  
-
  ##### client includes
  
         #include <fcntl.h>
         #include <errno.h>
         #include <sys/mman.h>
         #include <string.h>
-       #include <stdio.h>
  
  ##### client functions
  
@@ -867,18 +905,33 @@ we are done.
                 fprintf(stderr, "%s\n", msg);
         }
  
+       static char *strnchr(char *haystack, int len, char needle)
+       {
+               while (len > 0 && *haystack && *haystack != needle) {
+                       haystack++;
+                       len--;
+               }
+               return len > 0 && *haystack == needle ? haystack : NULL;
+       }
+
         int main(int argc, char *argv[])
         {
                 int fd;
                 size_t len;
                 char *file;
+               struct text section = {NULL, 0};
                 struct section *table, *s, *prev;
  
                 errs = 0;
-               if (argc != 2) {
-                       fprintf(stderr, "Usage: mdcode file.mdc\n");
+               if (argc != 2 && argc != 3) {
+                       fprintf(stderr, "Usage: mdcode file.mdc [section]\n");
                         exit(2);
                 }
+               if (argc == 3) {
+                       section.txt = argv[2];
+                       section.len = strlen(argv[2]);
+               }
+
                 fd = open(argv[1], O_RDONLY);
                 if (fd < 0) {
                         fprintf(stderr, "mdcode: cannot open %s: %s\n",
@@ -893,14 +946,25 @@ we are done.
                         (code_free(s->code), prev = s, s = s->next, free(prev))) {
                         FILE *fl;
                         char fname[1024];
-                       if (strncmp(s->section.txt, "Example:", 8) == 0)
-                               continue;
-                       if (strncmp(s->section.txt, "File:", 5) != 0) {
-                               fprintf(stderr, "Unreferenced section is not a file name: %.*s\n",
+                       char *spc = strnchr(s->section.txt, s->section.len, ' ');
+
+                       if (spc > s->section.txt && spc[-1] == ':') {
+                               if (strncmp(s->section.txt, "File: ", 6) != 0 &&
+                                   (section.txt == NULL ||
+                                    text_cmp(s->section, section) != 0))
+                                       /* Ignore this section */
+                                       continue;
+                       } else {
+                               fprintf(stderr, "Code in unreferenced section that is not ignored or a file name: %.*s\n",
                                         s->section.len, s->section.txt);
                                 errs++;
                                 continue;
                         }
+                       if (section.txt) {
+                               if (text_cmp(s->section, section) == 0)
+                                       code_node_print(stdout, s->code, argv[1]);
+                               break;
+                       }
                         copy_fname(fname, sizeof(fname), s->section);
                         if (fname[0] == 0) {
                                 fprintf(stderr, "Missing file name at:%.*s\n",
@@ -915,9 +979,8 @@ we are done.
                                 errs++;
                                 continue;
                         }
-                       code_print(fl, s->code, argv[1]);
+                       code_node_print(fl, s->code, argv[1]);
                         fclose(fl);
                 }
                 exit(!!errs);
         }
-