From fde929c68cfd3a5169c48c0042a4005143f12399 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 11 May 2014 14:21:26 +1000
Subject: [PATCH] parsergen: add --tag option.

Normally parsergen extracts three secctions: header, code, and grammar.
With "--tag foo", it will ignore anything that doesn't start "foo:",
will extract "foo: header", "foo: code", and "foo: grammar", and only
complain if there are other "foo:" headers.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 csrc/parsergen.mdc | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/csrc/parsergen.mdc b/csrc/parsergen.mdc
index 2cfcb45..a7d107c 100644
--- a/csrc/parsergen.mdc
+++ b/csrc/parsergen.mdc
@@ -58,6 +58,12 @@ sections: `header`, `code`, and `grammar`.  The first two will be
 literally copied into the generated `.c` and `.h`. files.  The last
 contains the grammar.  This is tokenised with "[scanner][]".
 
+If the `--tag` option is given, then any top level header that doesn't
+start with the tag is ignored, and the tag is striped from the rest.  So
+`--tag Foo`
+means that the three needed sections must be `Foo: header`, `Foo: code`,
+and `Foo: grammar`.
+
 [mdcode]: mdcode.html
 [scanner]: scanner.html
 
@@ -103,6 +109,9 @@ comparing we define `text_is` and `prtxt`, which should possibly go in
 `mdcode`.  `scanner` does provide `text_dump` which is useful for strings
 which might contain control characters.
 
+`strip_tag` is a bit like `strncmp`, but adds a test for a colon,
+because that is what we need to detect tags.
+
 ###### functions
 	static int text_is(struct text t, char *s)
 	{
@@ -114,6 +123,20 @@ which might contain control characters.
 		printf("%.*s", t.len, t.txt);
 	}
 
+	static int strip_tag(struct text *t, char *tag)
+	{
+		int skip = strlen(tag) + 1;
+		if (skip >= t->len ||
+		    strncmp(t->txt, tag, skip-1) != 0 ||
+		    t->txt[skip-1] != ':')
+			return 0;
+		while (skip < t->len && t->txt[skip] == ' ')
+			skip++;
+		t->len -= skip;
+		t->txt += skip;
+		return 1;
+	}
+
 ### Symbols
 
 Productions are comprised primarily of symbols - terminal and
@@ -2069,17 +2092,19 @@ grammar file).
 		{ "SLR",	0, NULL, 'S' },
 		{ "LALR",	0, NULL, 'L' },
 		{ "LR1",	0, NULL, '1' },
+		{ "tag",	1, NULL, 't' },
 		{ "report",	0, NULL, 'R' },
 		{ "output",	1, NULL, 'o' },
 		{ NULL,		0, NULL, 0   }
 	};
-	const char *options = "05SL1Ro:";
+	const char *options = "05SL1t:Ro:";
 
 ###### process arguments
 	int opt;
 	char *outfile = NULL;
 	char *infile;
 	char *name;
+	char *tag = NULL;
 	int report = 1;
 	enum grammar_type type = LR05;
 	while ((opt = getopt_long(argc, argv, options,
@@ -2099,6 +2124,8 @@ grammar file).
 			report = 2; break;
 		case 'o':
 			outfile = optarg; break;
+		case 't':
+			tag = optarg; break;
 		default:
 			fprintf(stderr, "Usage: parsergen ...\n");
 			exit(1);
@@ -2164,11 +2191,14 @@ parser with neither. "grammar" must be provided.
 	struct code_node *code = NULL;
 	struct code_node *gram = NULL;
 	for (s = table; s; s = s->next) {
-		if (text_is(s->section, "header"))
+		struct text sec = s->section;
+		if (tag && !strip_tag(&sec, tag))
+			continue;
+		if (text_is(sec, "header"))
 			hdr = s->code;
-		else if (text_is(s->section, "code"))
+		else if (text_is(sec, "code"))
 			code = s->code;
-		else if (text_is(s->section, "grammar"))
+		else if (text_is(sec, "grammar"))
 			gram = s->code;
 		else {
 			fprintf(stderr, "Unknown content section: %.*s\n",
-- 
2.43.0