X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;f=csrc%2Fmdcode.mdc;h=cf6887dea25c316bab6bc6679ce2f8e2d622a046;hb=85e747e065a6f4b2236acc8b3a2471de4d831c95;hp=dc78a12c1d3ec62fc1d96da351e97cbec2f25cbd;hpb=bf502feaba385e5e8ebd5f80c005522c59660496;p=ocean diff --git a/csrc/mdcode.mdc b/csrc/mdcode.mdc index dc78a12..cf6887d 100644 --- a/csrc/mdcode.mdc +++ b/csrc/mdcode.mdc @@ -55,20 +55,25 @@ times. Allowing this might make some sense, but it is probably a mistake, and prohibiting it make some of the code a bit cleaner. Equally, every section of code should be interpolated at least once - -with two exceptions. These exceptions are imposed by the tool, not -the library. A different client could impose different rules on the -names of top-level code sections. - -The first exception we have already seen. A section name starting -__Example:__ indicates code that is not to be included in the final product. - -The second exception is for the top level code sections which will be -written to files. Again these are identified by their section name. -This must start with __File:__ the following text (after optional -spaces) will be used as a file name. - -Any section containing code that does not start __Example:__ or -__File:__ must be included in some other section exactly once. +with one exception. This exception is imposed by the +tool, not the library. A different client could impose different +rules on the names of top-level code sections. + +One example of the exception we have already seen. A section name +starting __Example:__ indicates code that is not to be included in the +final product. Any leading word will do, providing there is a space, +and the first space is preceded by a colon, that section name will be +ignored. + +A special case of this exception exists for the leading word +__File__. These sections are the top level code sections and they +will be written to the named file. Thus a section named +__File: foo__ should not be referenced by another section, and its +contents after all references are expanded will be written to the file +__foo__. + +Any section containing code that does not start __Word:__ +must be included in some other section exactly once. ### Multiple files @@ -93,6 +98,7 @@ will "do the right thing". ### File: mdcode.h + #include ## exported types ## exported functions @@ -113,11 +119,11 @@ will "do the right thing". libmdcode.o : libmdcode.c mdcode.h $(CC) $(CFLAGS) -c libmdcode.c - ### File: md2c.c #include #include + #include #include "mdcode.h" @@ -153,6 +159,14 @@ The `code_node` will also have an `indent` depth which eventually gets set to the sum for the indents from all references on the path from the root. +Finally we need to know if the `code_node` was recognised by being +indented or not. If it was, the client of this data will want to +strip off the leading tab or 4 spaces. Hence a `needs_strip` flag is +needed. This will be set to 8 if a tab is found and 4 if four spaces are found. +This means the relative indent of text in the node +is `node->indent - node->needs_strip`. +The relative indent is needed for detecting indents in the overall file. + ##### exported types struct text { @@ -170,6 +184,7 @@ the root. struct text code; int indent; int line_no; + int needs_strip; struct code_node *next; struct section *child; }; @@ -266,7 +281,7 @@ in a new node. ##### internal functions static void code_add_text(struct psection *where, struct text txt, - int line_no) + int line_no, int needs_strip) { struct code_node *n; if (txt.len == 0) @@ -275,6 +290,13 @@ in a new node. n->code = txt; n->indent = 0; n->line_no = line_no; + if (needs_strip) { + if (txt.txt[0] == '\t') + n->needs_strip = 8; + else + n->needs_strip = 4; + } else + n->needs_strip = 0; n->next = NULL; n->child = NULL; if (where->last) @@ -316,21 +338,31 @@ However when adding a link, we might be able to include it in the last Now we need a lookup table to be able to find sections by name. Something that provides an `n*log(N)` search time is probably justified, but for now I want a minimal stand-alone program so a -linked list managed by insertion-sort will do. As a comparison -function it is easiest to sort based on length before content. So -sections won't be in standard lexical order, but that isn't important. +linked list managed by insertion-sort will do. + +The text compare function will likely be useful for any clients of our +library, so we may as well export it. If we cannot find a section, we simply want to create it. This allows sections and references to be created in any order. Sections with no references or no content will cause a warning eventually. +#### exported functions + + int text_cmp(struct text a, struct text b); + #### internal functions - static int text_cmp(struct text a, struct text b) + int text_cmp(struct text a, struct text b) { - if (a.len != b.len) + int len = a.len; + if (len > b.len) + len = b.len; + int cmp = strncmp(a.txt, b.txt, len); + if (cmp) + return cmp; + else return a.len - b.len; - return strncmp(a.txt, b.txt, a.len); } static struct psection *section_find(struct psection **list, struct text name) @@ -517,7 +549,7 @@ There are two sorts of end markers: the presence of a particular string, or the absence of an indent. We will use a string to represent a presence, and a `NULL` to represent the absence. -While looking at code we don't think about paragraphs are all - just +While looking at code we don't think about paragraphs at all - just look for a line that starts with the right thing. Every line that is still code then needs to be examined to see if it is a section reference. @@ -533,6 +565,11 @@ number of spaces (counting 8 for tabs) after the natural indent of the code (which is a tab or 4 spaces). We use a separate function `count_spaces` for that. +If there are completely blank linkes (no indent) at the end of the found code, +these should be considered to be spacing between the code and the next section, +and so no included in the code. When a marker is used to explicitly mark the +end of the code, we don't need to check for these blank lines. + #### internal functions static int count_space(char *sol, char *p) @@ -548,7 +585,6 @@ for that. return c; } - static char *take_code(char *pos, char *end, char *marker, struct psection **table, struct text section, int *line_nop) @@ -592,7 +628,8 @@ for that. struct text txt; txt.txt = start; txt.len = pos - start; - code_add_text(sect, txt, start_line); + code_add_text(sect, txt, start_line, + marker == NULL); } ref = take_header(t, end); if (ref.len) { @@ -608,7 +645,14 @@ for that. struct text txt; txt.txt = start; txt.len = pos - start; - code_add_text(sect, txt, start_line); + /* strip trailing blank lines */ + while (!marker && txt.len > 2 && + start[txt.len-1] == '\n' && + start[txt.len-2] == '\n') + txt.len -= 1; + + code_add_text(sect, txt, start_line, + marker == NULL); } if (marker) { pos = skip_line(pos, end); @@ -737,7 +781,6 @@ error message - a `code_err_fn`. struct section *code_extract(char *pos, char *end, code_err_fn error); - ## Using the library Now that we can extract code from a document and link it all together @@ -761,31 +804,31 @@ If the first line of a code block is indented, then either one tab or This could go wrong if the first line of a code block marked by _`` ``` ``_ is indented. To overcome this we would need to -record someextra state in each `code_node`. For now we won't bother. +record some extra state in each `code_node`. For now we won't bother. -The indents we insert will all be spaces. This might not work well -for `Makefiles`. +The indents we insert will mostly be spaces. All-spaces doesn't work +for `Makefiles`, so if the indent is 8 or more, we use a TAB first. -##### client functions +##### internal functions - static void code_print(FILE *out, struct code_node *node, - char *fname) + void code_node_print(FILE *out, struct code_node *node, + char *fname) { for (; node; node = node->next) { char *c = node->code.txt; int len = node->code.len; - int undent = 0; if (!len) continue; fprintf(out, "#line %d \"%s\"\n", node->line_no, fname); - if (*c == ' ' || *c == '\t') - undent = 1; while (len && *c) { - fprintf(out, "%*s", node->indent, ""); - if (undent) { + if (node->indent >= 8) + fprintf(out, "\t%*s", node->indent - 8, ""); + else + fprintf(out, "%*s", node->indent, ""); + if (node->needs_strip) { if (*c == '\t' && len > 1) { c++; len--; @@ -803,6 +846,9 @@ for `Makefiles`. } } +###### exported functions + void code_node_print(FILE *out, struct code_node *node, char *fname); + ### Bringing it all together We are just about ready for the `main` function of the tool which will @@ -843,14 +889,12 @@ And now we take a single file name, extract the code, and if there are no error we write out a file for each appropriate code section. And we are done. - ##### client includes #include #include #include #include - #include ##### client functions @@ -861,18 +905,33 @@ we are done. fprintf(stderr, "%s\n", msg); } + static char *strnchr(char *haystack, int len, char needle) + { + while (len > 0 && *haystack && *haystack != needle) { + haystack++; + len--; + } + return len > 0 && *haystack == needle ? haystack : NULL; + } + int main(int argc, char *argv[]) { int fd; size_t len; char *file; + struct text section = {NULL, 0}; struct section *table, *s, *prev; errs = 0; - if (argc != 2) { - fprintf(stderr, "Usage: mdcode file.mdc\n"); + if (argc != 2 && argc != 3) { + fprintf(stderr, "Usage: mdcode file.mdc [section]\n"); exit(2); } + if (argc == 3) { + section.txt = argv[2]; + section.len = strlen(argv[2]); + } + fd = open(argv[1], O_RDONLY); if (fd < 0) { fprintf(stderr, "mdcode: cannot open %s: %s\n", @@ -887,14 +946,25 @@ we are done. (code_free(s->code), prev = s, s = s->next, free(prev))) { FILE *fl; char fname[1024]; - if (strncmp(s->section.txt, "Example:", 8) == 0) - continue; - if (strncmp(s->section.txt, "File:", 5) != 0) { - fprintf(stderr, "Unreferenced section is not a file name: %.*s\n", + char *spc = strnchr(s->section.txt, s->section.len, ' '); + + if (spc > s->section.txt && spc[-1] == ':') { + if (strncmp(s->section.txt, "File: ", 6) != 0 && + (section.txt == NULL || + text_cmp(s->section, section) != 0)) + /* Ignore this section */ + continue; + } else { + fprintf(stderr, "Code in unreferenced section that is not ignored or a file name: %.*s\n", s->section.len, s->section.txt); errs++; continue; } + if (section.txt) { + if (text_cmp(s->section, section) == 0) + code_node_print(stdout, s->code, argv[1]); + break; + } copy_fname(fname, sizeof(fname), s->section); if (fname[0] == 0) { fprintf(stderr, "Missing file name at:%.*s\n", @@ -909,9 +979,8 @@ we are done. errs++; continue; } - code_print(fl, s->code, argv[1]); + code_node_print(fl, s->code, argv[1]); fclose(fl); } exit(!!errs); } -