commit 133667f

shrub  ·  2026-04-07 16:18:28 +0000 UTC
parent 133667f
initial
7 files changed,  +1139, -0
A main.c
A readme
+8, -0
1@@ -0,0 +1,8 @@
2+AlignEscapedNewlines: DontAlign
3+AlwaysBreakAfterDefinitionReturnType: All
4+BreakBeforeBraces: Linux
5+ColumnLimit: 0
6+IndentWidth: 8
7+SortIncludes: false
8+TabWidth: 8
9+UseTab: ForIndentation
+4, -0
1@@ -0,0 +1,4 @@
2+config.mak
3+test-musl.mk
4+*.o
5+shin
+20, -0
 1@@ -0,0 +1,20 @@
 2+BIN = shin
 3+SRCS = main.c shinobi.c
 4+FMTSRCS = $(SRCS) shinobi.h
 5+OBJS = $(SRCS:.c=.o)
 6+
 7+CC = clang
 8+CFLAGS = -O2 -Wall -Wextra -pedantic
 9+CPPFLAGS =
10+LDFLAGS = -static
11+
12+all: $(BIN)
13+
14+$(BIN): $(OBJS)
15+	$(CC) $(LDFLAGS) -o $(BIN) $(OBJS) $(LDLIBS)
16+
17+fmt:
18+	clang-format -i $(FMTSRCS)
19+
20+clean:
21+	rm -f $(BIN) $(OBJS)
A main.c
+168, -0
  1@@ -0,0 +1,168 @@
  2+#include "shinobi.h"
  3+
  4+#include <stdio.h>
  5+#include <stdlib.h>
  6+#include <string.h>
  7+
  8+static char *
  9+readfile(const char *path)
 10+{
 11+	FILE *fp;
 12+	long n;
 13+	char *buf;
 14+
 15+	fp = fopen(path, "rb");
 16+	if (!fp)
 17+		return 0;
 18+	if (fseek(fp, 0, SEEK_END) < 0) {
 19+		fclose(fp);
 20+		return 0;
 21+	}
 22+	n = ftell(fp);
 23+	if (n < 0) {
 24+		fclose(fp);
 25+		return 0;
 26+	}
 27+	if (fseek(fp, 0, SEEK_SET) < 0) {
 28+		fclose(fp);
 29+		return 0;
 30+	}
 31+	buf = malloc((size_t)n + 1);
 32+	if (!buf) {
 33+		fclose(fp);
 34+		return 0;
 35+	}
 36+	if (fread(buf, 1, (size_t)n, fp) != (size_t)n) {
 37+		fclose(fp);
 38+		free(buf);
 39+		return 0;
 40+	}
 41+	buf[n] = 0;
 42+	fclose(fp);
 43+	return buf;
 44+}
 45+
 46+static const char *
 47+assignname(enum AssignOp op)
 48+{
 49+	switch (op) {
 50+	case ASSIGN_EQ:
 51+		return "=";
 52+	case ASSIGN_PLUS_EQ:
 53+		return "+=";
 54+	case ASSIGN_COLON_EQ:
 55+		return ":=";
 56+	case ASSIGN_QMARK_EQ:
 57+		return "?=";
 58+	case ASSIGN_BANG_EQ:
 59+		return "!=";
 60+	}
 61+	return "?";
 62+}
 63+
 64+static const char *
 65+condname(enum CondKind kind)
 66+{
 67+	switch (kind) {
 68+	case COND_IFEQ:
 69+		return "ifeq";
 70+	case COND_IFNEQ:
 71+		return "ifneq";
 72+	case COND_ELSE:
 73+		return "else";
 74+	case COND_ENDIF:
 75+		return "endif";
 76+	}
 77+	return "?";
 78+}
 79+
 80+static void
 81+printwords(const char *name, const struct StrList *list)
 82+{
 83+	size_t i;
 84+
 85+	printf("  %s:", name);
 86+	for (i = 0; i < list->n; i++)
 87+		printf(" [%s]", list->v[i]);
 88+	printf("\n");
 89+}
 90+
 91+/*dump ast, TODO make it actually do something useful*/
 92+/*also TODO
 93+ * variable eval
 94+ * build a proper graph
 95+ * a lot more
 96+ */
 97+static void
 98+dump(const struct Ast *ast)
 99+{
100+	size_t i, j;
101+
102+	for (i = 0; i < ast->n; i++) {
103+		const struct Node *s = &ast->v[i];
104+		printf("%d-%d ", s->loc.line0, s->loc.line1);
105+		switch (s->kind) {
106+		case NODE_BLANK:
107+			puts("blank");
108+			break;
109+		case NODE_COMMENT:
110+			printf("comment %s\n", s->data.raw.text);
111+			break;
112+		case NODE_INCLUDE:
113+			printf("include optional=%d path=%s\n", s->data.include.optional, s->data.include.path);
114+			break;
115+		case NODE_COND:
116+			printf("cond %s", condname(s->data.cond.kind));
117+			if (s->data.cond.arg1)
118+				printf(" arg1=%s", s->data.cond.arg1);
119+			if (s->data.cond.arg2)
120+				printf(" arg2=%s", s->data.cond.arg2);
121+			printf("\n");
122+			break;
123+		case NODE_ASSIGN:
124+			printf("assign%s lhs=%s op=%s rhs=%s\n",
125+			       s->data.assign.tspec ? " target" : "",
126+			       s->data.assign.lhs,
127+			       assignname(s->data.assign.op),
128+			       s->data.assign.rhs);
129+			if (s->data.assign.tspec)
130+				printwords("targets", &s->data.assign.targets);
131+			break;
132+		case NODE_RULE:
133+			puts("rule");
134+			printwords("targets", &s->data.rule.targets);
135+			printwords("prereqs", &s->data.rule.prereqs);
136+			printwords("order_only", &s->data.rule.order_only);
137+			for (j = 0; j < s->data.rule.recipes.n; j++)
138+				printf("  recipe: %s\n", s->data.rule.recipes.v[j]);
139+			break;
140+		case NODE_RAW:
141+			printf("raw %s\n", s->data.raw.text);
142+			break;
143+		}
144+	}
145+}
146+
147+int
148+main(int argc, char **argv)
149+{
150+	const char *path;
151+	char *src;
152+	struct Ast ast;
153+
154+	path = argc > 1 ? argv[1] : "Makefile";
155+	src = readfile(path);
156+	if (!src) {
157+		fprintf(stderr, "could not read %s\n", path);
158+		return 1;
159+	}
160+	if (parse(path, src, &ast) < 0) {
161+		fprintf(stderr, "parse error in %s\n", path);
162+		free(src);
163+		return 1;
164+	}
165+	dump(&ast);
166+	freeast(&ast);
167+	free(src);
168+	return 0;
169+}
A readme
+6, -0
1@@ -0,0 +1,6 @@
2+shinobi
3+-------
4+
5+a small parser for a small subset of gnu make that dumps a flat ast.
6+
7+the end goal here is to convert gnu makefiles to ninja.
+812, -0
  1@@ -0,0 +1,812 @@
  2+#include "shinobi.h"
  3+
  4+#include <ctype.h>
  5+#include <stddef.h>
  6+#include <stdio.h>
  7+#include <stdlib.h>
  8+#include <string.h>
  9+
 10+struct AssignScan {
 11+	size_t pos;
 12+	size_t len;
 13+	enum AssignOp op;
 14+	int ok;
 15+};
 16+
 17+struct Inc {
 18+	char **v;
 19+	size_t n;
 20+};
 21+
 22+static void *
 23+xmalloc(size_t n)
 24+{
 25+	void *p;
 26+
 27+	p = malloc(n ? n : 1);
 28+	if (!p) {
 29+		fprintf(stderr, "out of memory\n");
 30+		exit(1);
 31+	}
 32+	return p;
 33+}
 34+
 35+static void *
 36+xrealloc(void *p, size_t n)
 37+{
 38+	void *q;
 39+
 40+	q = realloc(p, n ? n : 1);
 41+	if (!q) {
 42+		fprintf(stderr, "out of memory\n");
 43+		exit(1);
 44+	}
 45+	return q;
 46+}
 47+
 48+static char *
 49+xstrndup(const char *s, size_t n)
 50+{
 51+	char *p;
 52+
 53+	p = xmalloc(n + 1);
 54+	memcpy(p, s, n);
 55+	p[n] = 0;
 56+	return p;
 57+}
 58+
 59+static char *
 60+xstrdup(const char *s)
 61+{
 62+	return xstrndup(s, strlen(s));
 63+}
 64+
 65+static char *
 66+trimdup(const char *s, size_t n)
 67+{
 68+	size_t i, j;
 69+
 70+	for (i = 0; i < n && isspace((unsigned char)s[i]); i++)
 71+		;
 72+	for (j = n; j > i && isspace((unsigned char)s[j - 1]); j--)
 73+		;
 74+	return xstrndup(s + i, j - i);
 75+}
 76+
 77+static int
 78+haskw(const char *s, const char *kw)
 79+{
 80+	size_t n;
 81+
 82+	n = strlen(kw);
 83+	if (strncmp(s, kw, n) != 0)
 84+		return 0;
 85+	return s[n] == 0 || isspace((unsigned char)s[n]);
 86+}
 87+
 88+static int
 89+hascont(const char *s, size_t n)
 90+{
 91+	size_t i, count;
 92+
 93+	if (!n)
 94+		return 0;
 95+	for (i = n; i > 0 && (s[i - 1] == '\n' || s[i - 1] == '\r'); i--)
 96+		;
 97+	if (!i)
 98+		return 0;
 99+	count = 0;
100+	while (i > 0 && s[i - 1] == '\\') {
101+		count++;
102+		i--;
103+	}
104+	return (count & 1) != 0;
105+}
106+
107+static int
108+skipref(const char *s, size_t n, size_t *i)
109+{
110+	size_t j, depth;
111+	char open, close;
112+
113+	j = *i;
114+	if (j + 1 >= n || s[j] != '$')
115+		return 0;
116+	open = s[j + 1];
117+	if (open != '(' && open != '{') {
118+		*i = j + 1;
119+		return 1;
120+	}
121+	close = open == '(' ? ')' : '}';
122+	depth = 1;
123+	j += 2;
124+	while (j < n && depth) {
125+		if (s[j] == '$' && j + 1 < n && (s[j + 1] == '(' || s[j + 1] == '{')) {
126+			depth++;
127+			j += 2;
128+			continue;
129+		}
130+		if (s[j] == close)
131+			depth--;
132+		j++;
133+	}
134+	*i = j;
135+	return 1;
136+}
137+
138+static void
139+stripcomment(char *s)
140+{
141+	size_t i, n, first;
142+
143+	n = strlen(s);
144+	for (first = 0; first < n && isspace((unsigned char)s[first]); first++)
145+		;
146+	if (first < n && s[first] == '#')
147+		return;
148+	for (i = 0; i < n; i++) {
149+		if (s[i] == '$' && skipref(s, n, &i)) {
150+			i--;
151+			continue;
152+		}
153+		if (s[i] == '#') {
154+			size_t cut;
155+
156+			s[i] = 0;
157+			cut = strlen(s);
158+			while (cut > 0 && isspace((unsigned char)s[cut - 1]))
159+				cut--;
160+			s[cut] = 0;
161+			return;
162+		}
163+	}
164+}
165+
166+static struct AssignScan
167+findassign(const char *s, size_t n, size_t start)
168+{
169+	size_t i;
170+	struct AssignScan out;
171+
172+	memset(&out, 0, sizeof(out));
173+	for (i = start; i < n; i++) {
174+		if (s[i] == '$' && skipref(s, n, &i)) {
175+			i--;
176+			continue;
177+		}
178+		if (i + 1 < n && s[i] == '+' && s[i + 1] == '=') {
179+			out.pos = i;
180+			out.len = 2;
181+			out.op = ASSIGN_PLUS_EQ;
182+			out.ok = 1;
183+			return out;
184+		}
185+		if (i + 1 < n && s[i] == ':' && s[i + 1] == '=') {
186+			out.pos = i;
187+			out.len = 2;
188+			out.op = ASSIGN_COLON_EQ;
189+			out.ok = 1;
190+			return out;
191+		}
192+		if (i + 1 < n && s[i] == '?' && s[i + 1] == '=') {
193+			out.pos = i;
194+			out.len = 2;
195+			out.op = ASSIGN_QMARK_EQ;
196+			out.ok = 1;
197+			return out;
198+		}
199+		if (i + 1 < n && s[i] == '!' && s[i + 1] == '=') {
200+			out.pos = i;
201+			out.len = 2;
202+			out.op = ASSIGN_BANG_EQ;
203+			out.ok = 1;
204+			return out;
205+		}
206+		if (s[i] == '=') {
207+			out.pos = i;
208+			out.len = 1;
209+			out.op = ASSIGN_EQ;
210+			out.ok = 1;
211+			return out;
212+		}
213+	}
214+	return out;
215+}
216+
217+static ptrdiff_t
218+findtop(const char *s, size_t n, char want)
219+{
220+	size_t i;
221+
222+	for (i = 0; i < n; i++) {
223+		if (s[i] == '$' && skipref(s, n, &i)) {
224+			i--;
225+			continue;
226+		}
227+		if (s[i] == want)
228+			return (ptrdiff_t)i;
229+	}
230+	return -1;
231+}
232+
233+static void
234+splitwords(struct StrList *out, const char *s, size_t n)
235+{
236+	size_t i, j, start;
237+
238+	for (i = 0; i < n;) {
239+		while (i < n && isspace((unsigned char)s[i]))
240+			i++;
241+		if (i >= n)
242+			break;
243+		start = i;
244+		j = i;
245+		while (j < n) {
246+			if (s[j] == '$' && skipref(s, n, &j))
247+				continue;
248+			if (isspace((unsigned char)s[j]))
249+				break;
250+			j++;
251+		}
252+		out->v = xrealloc(out->v, (out->n + 1) * sizeof(out->v[0]));
253+		out->v[out->n++] = xstrndup(s + start, j - start);
254+		i = j;
255+	}
256+}
257+
258+static int
259+readline(const char **src, int *lineno, struct PreLine *line)
260+{
261+	const char *p, *start;
262+	size_t cap, len, chunk;
263+	char *buf;
264+	int line0, line1;
265+	int first;
266+
267+	p = *src;
268+	if (!*p)
269+		return 0;
270+
271+	buf = 0;
272+	cap = 0;
273+	len = 0;
274+	line0 = *lineno;
275+	line1 = *lineno;
276+	first = 1;
277+	line->isrecipe = 0;
278+
279+	for (;;) {
280+		start = p;
281+		while (*p && *p != '\n')
282+			p++;
283+		chunk = (size_t)(p - start);
284+		if (*p == '\n')
285+			p++;
286+
287+		if (first && chunk > 0 && start[0] == '\t')
288+			line->isrecipe = 1;
289+
290+		if (len + chunk + 1 > cap) {
291+			cap = (len + chunk + 1) * 2;
292+			buf = xrealloc(buf, cap);
293+		}
294+		memcpy(buf + len, start, chunk);
295+		len += chunk;
296+		buf[len] = 0;
297+
298+		if (line->isrecipe || !hascont(buf, len))
299+			break;
300+		len--;
301+		buf[len++] = ' ';
302+		line1++;
303+		first = 0;
304+		if (!*p)
305+			break;
306+	}
307+
308+	*lineno = line1 + 1;
309+	*src = p;
310+	line->text = buf ? buf : xstrndup("", 0);
311+	line->line0 = line0;
312+	line->line1 = line1;
313+	return 1;
314+}
315+
316+static char *
317+readfile(const char *path)
318+{
319+	FILE *fp;
320+	long n;
321+	char *buf;
322+
323+	fp = fopen(path, "rb");
324+	if (!fp)
325+		return 0;
326+	if (fseek(fp, 0, SEEK_END) < 0) {
327+		fclose(fp);
328+		return 0;
329+	}
330+	n = ftell(fp);
331+	if (n < 0) {
332+		fclose(fp);
333+		return 0;
334+	}
335+	if (fseek(fp, 0, SEEK_SET) < 0) {
336+		fclose(fp);
337+		return 0;
338+	}
339+	buf = xmalloc((size_t)n + 1);
340+	if (fread(buf, 1, (size_t)n, fp) != (size_t)n) {
341+		fclose(fp);
342+		free(buf);
343+		return 0;
344+	}
345+	buf[n] = 0;
346+	fclose(fp);
347+	return buf;
348+}
349+
350+static void
351+popinc(struct Inc *inc)
352+{
353+	if (!inc->n)
354+		return;
355+	free(inc->v[inc->n - 1]);
356+	inc->n--;
357+}
358+
359+static int
360+hasinc(struct Inc *inc, const char *path)
361+{
362+	size_t i;
363+
364+	for (i = 0; i < inc->n; i++) {
365+		if (strcmp(inc->v[i], path) == 0)
366+			return 1;
367+	}
368+	return 0;
369+}
370+
371+static int
372+isplainpath(const char *s)
373+{
374+	size_t i;
375+
376+	if (!*s)
377+		return 0;
378+	for (i = 0; s[i]; i++) {
379+		if (isspace((unsigned char)s[i]) || s[i] == '$')
380+			return 0;
381+	}
382+	return 1;
383+}
384+
385+static char *
386+dirpart(const char *path)
387+{
388+	const char *slash;
389+
390+	slash = strrchr(path, '/');
391+	if (!slash)
392+		return xstrdup(".");
393+	return xstrndup(path, (size_t)(slash - path));
394+}
395+
396+static char *
397+joinpath(const char *dir, const char *name)
398+{
399+	size_t ndir, nname;
400+	char *out;
401+
402+	ndir = strlen(dir);
403+	nname = strlen(name);
404+	out = xmalloc(ndir + 1 + nname + 1);
405+	memcpy(out, dir, ndir);
406+	out[ndir] = '/';
407+	memcpy(out + ndir + 1, name, nname);
408+	out[ndir + 1 + nname] = 0;
409+	return out;
410+}
411+
412+static int
413+preprocfile(const char *path, struct Pre *pre, struct Inc *inc)
414+{
415+	char *src, *dir;
416+	const char *p;
417+	int lineno;
418+	struct PreLine line;
419+
420+	if (hasinc(inc, path)) {
421+		fprintf(stderr, "include cycle: %s\n", path);
422+		return -1;
423+	}
424+	src = readfile(path);
425+	if (!src)
426+		return -1;
427+	inc->v = xrealloc(inc->v, (inc->n + 1) * sizeof(inc->v[0]));
428+	inc->v[inc->n++] = xstrdup(path);
429+	dir = dirpart(path);
430+	p = src;
431+	lineno = 1;
432+	while (readline(&p, &lineno, &line)) {
433+		line.path = xstrdup(path);
434+		if (!line.isrecipe) {
435+			char *trim, *incarg;
436+			int opt;
437+
438+			stripcomment(line.text);
439+			trim = trimdup(line.text, strlen(line.text));
440+			if (haskw(trim, "include") || haskw(trim, "-include")) {
441+				char *full;
442+				int rc;
443+
444+				opt = haskw(trim, "-include");
445+				incarg = trimdup(trim + (opt ? 8 : 7), strlen(trim + (opt ? 8 : 7)));
446+				if (isplainpath(incarg)) {
447+					full = incarg[0] == '/' ? xstrdup(incarg) : joinpath(dir, incarg);
448+					free(line.path);
449+					free(line.text);
450+					free(trim);
451+					free(incarg);
452+					rc = preprocfile(full, pre, inc);
453+					free(full);
454+					if (rc < 0 && !opt) {
455+						free(dir);
456+						free(src);
457+						popinc(inc);
458+						return -1;
459+					}
460+					continue;
461+				}
462+				free(incarg);
463+			}
464+			free(trim);
465+		}
466+		pre->v = xrealloc(pre->v, (pre->n + 1) * sizeof(pre->v[0]));
467+		pre->v[pre->n++] = line;
468+	}
469+	free(dir);
470+	free(src);
471+	popinc(inc);
472+	return 0;
473+}
474+
475+int
476+preproc(const char *path, struct Pre *pre)
477+{
478+	struct Inc inc;
479+
480+	memset(pre, 0, sizeof(*pre));
481+	memset(&inc, 0, sizeof(inc));
482+	if (preprocfile(path, pre, &inc) < 0) {
483+		free(inc.v);
484+		freepre(pre);
485+		return -1;
486+	}
487+	free(inc.v);
488+	return 0;
489+}
490+
491+void
492+freepre(struct Pre *pre)
493+{
494+	size_t i;
495+
496+	for (i = 0; i < pre->n; i++)
497+		free(pre->v[i].path);
498+	for (i = 0; i < pre->n; i++)
499+		free(pre->v[i].text);
500+	free(pre->v);
501+	pre->v = 0;
502+	pre->n = 0;
503+}
504+
505+static struct Node
506+parseinclude(const struct PreLine *line, const char *s)
507+{
508+	struct Node state;
509+	size_t off;
510+
511+	memset(&state, 0, sizeof(state));
512+	state.kind = NODE_INCLUDE;
513+	state.loc.line0 = line->line0;
514+	state.loc.line1 = line->line1;
515+
516+	if (haskw(s, "-include")) {
517+		state.data.include.optional = 1;
518+		off = strlen("-include");
519+	} else {
520+		off = strlen("include");
521+	}
522+	state.data.include.path = trimdup(s + off, strlen(s + off));
523+	return state;
524+}
525+
526+static struct Node
527+parsecond(const struct PreLine *line, const char *s)
528+{
529+	struct Node state;
530+	const char *p;
531+	size_t n, mid;
532+
533+	memset(&state, 0, sizeof(state));
534+	state.kind = NODE_COND;
535+	state.loc.line0 = line->line0;
536+	state.loc.line1 = line->line1;
537+	state.data.cond.raw = xstrndup(s, strlen(s));
538+
539+	if (haskw(s, "ifeq")) {
540+		state.data.cond.kind = COND_IFEQ;
541+		p = s + 4;
542+	} else if (haskw(s, "ifneq")) {
543+		state.data.cond.kind = COND_IFNEQ;
544+		p = s + 5;
545+	} else if (haskw(s, "else")) {
546+		state.data.cond.kind = COND_ELSE;
547+		p = s + 4;
548+		state.data.cond.arg1 = trimdup(p, strlen(p));
549+		return state;
550+	} else {
551+		state.data.cond.kind = COND_ENDIF;
552+		return state;
553+	}
554+
555+	while (*p && isspace((unsigned char)*p))
556+		p++;
557+	n = strlen(p);
558+	if (!n) {
559+		state.data.cond.arg1 = xstrndup("", 0);
560+		state.data.cond.arg2 = xstrndup("", 0);
561+		return state;
562+	}
563+	if ((p[0] == '(' && p[n - 1] == ')') || (p[0] == '"' && p[n - 1] == '"')) {
564+		mid = n / 2;
565+		state.data.cond.arg1 = xstrndup(p, n);
566+		state.data.cond.arg2 = xstrndup("", 0);
567+		if (p[0] == '(') {
568+			mid = (size_t)(findtop(p + 1, n - 2, ','));
569+			if (mid != (size_t)-1) {
570+				free(state.data.cond.arg1);
571+				free(state.data.cond.arg2);
572+				state.data.cond.arg1 = trimdup(p + 1, mid);
573+				state.data.cond.arg2 = trimdup(p + 2 + mid, n - 3 - mid);
574+			}
575+		}
576+	}
577+	return state;
578+}
579+
580+static struct Node
581+parseassign(const struct PreLine *line, const char *s, size_t n, size_t base, struct AssignScan as, int tspec, size_t tend)
582+{
583+	struct Node state;
584+
585+	memset(&state, 0, sizeof(state));
586+	state.kind = NODE_ASSIGN;
587+	state.loc.line0 = line->line0;
588+	state.loc.line1 = line->line1;
589+	state.data.assign.op = as.op;
590+	state.data.assign.tspec = tspec;
591+	state.data.assign.lhs = trimdup(s + base, as.pos - base);
592+	state.data.assign.rhs = trimdup(s + as.pos + as.len, n - as.pos - as.len);
593+	if (tspec)
594+		splitwords(&state.data.assign.targets, s, tend);
595+	return state;
596+}
597+
598+static struct Node
599+parserule(const struct PreLine *line, const char *s, size_t n, size_t colon)
600+{
601+	struct Node state;
602+	const char *rhs;
603+	size_t rhsn, split;
604+
605+	memset(&state, 0, sizeof(state));
606+	state.kind = NODE_RULE;
607+	state.loc.line0 = line->line0;
608+	state.loc.line1 = line->line1;
609+
610+	splitwords(&state.data.rule.targets, s, colon);
611+
612+	rhs = s + colon + 1;
613+	rhsn = n - colon - 1;
614+	split = (size_t)findtop(rhs, rhsn, '|');
615+	if (split != (size_t)-1) {
616+		splitwords(&state.data.rule.prereqs, rhs, split);
617+		splitwords(&state.data.rule.order_only, rhs + split + 1, rhsn - split - 1);
618+	} else {
619+		splitwords(&state.data.rule.prereqs, rhs, rhsn);
620+	}
621+	return state;
622+}
623+
624+/*unclassified line*/
625+static struct Node
626+parseraw(const struct PreLine *line, const char *s)
627+{
628+	struct Node state;
629+
630+	memset(&state, 0, sizeof(state));
631+	state.kind = NODE_RAW;
632+	state.loc.line0 = line->line0;
633+	state.loc.line1 = line->line1;
634+	state.data.raw.text = xstrndup(s, strlen(s));
635+	return state;
636+}
637+
638+static struct Node
639+parseline(const struct PreLine *line)
640+{
641+	struct Node state;
642+	struct AssignScan as;
643+	char *trim;
644+	size_t n;
645+	ptrdiff_t colon;
646+
647+	trim = trimdup(line->text, strlen(line->text));
648+	n = strlen(trim);
649+
650+	if (!n) {
651+		memset(&state, 0, sizeof(state));
652+		state.kind = NODE_BLANK;
653+		state.loc.line0 = line->line0;
654+		state.loc.line1 = line->line1;
655+		free(trim);
656+		return state;
657+	}
658+	if (trim[0] == '#') {
659+		memset(&state, 0, sizeof(state));
660+		state.kind = NODE_COMMENT;
661+		state.loc.line0 = line->line0;
662+		state.loc.line1 = line->line1;
663+		state.data.raw.text = trim;
664+		return state;
665+	}
666+	if (haskw(trim, "ifeq") || haskw(trim, "ifneq") ||
667+	    haskw(trim, "else") || haskw(trim, "endif")) {
668+		state = parsecond(line, trim);
669+		free(trim);
670+		return state;
671+	}
672+	if (haskw(trim, "include") || haskw(trim, "-include")) {
673+		state = parseinclude(line, trim);
674+		free(trim);
675+		return state;
676+	}
677+
678+	colon = findtop(trim, n, ':');
679+	as = findassign(trim, n, 0);
680+	if (colon >= 0 && as.ok && (size_t)colon < as.pos) {
681+		state = parseassign(line, trim, n, (size_t)colon + 1, as, 1, (size_t)colon);
682+		free(trim);
683+		return state;
684+	}
685+	if (colon >= 0) {
686+		state = parserule(line, trim, n, (size_t)colon);
687+		free(trim);
688+		return state;
689+	}
690+	if (as.ok) {
691+		state = parseassign(line, trim, n, 0, as, 0, 0);
692+		free(trim);
693+		return state;
694+	}
695+	state = parseraw(line, trim);
696+	free(trim);
697+	return state;
698+}
699+
700+/*TODO the ast we build is very flat. we probably want actual nested blocks for conditionals*/
701+int
702+buildast(const char *path, const struct Pre *pre, struct Ast *ast)
703+{
704+	struct Node state;
705+	struct Node *last_rule;
706+	size_t i;
707+
708+	(void)path;
709+	memset(ast, 0, sizeof(*ast));
710+	last_rule = 0;
711+
712+	for (i = 0; i < pre->n; i++) {
713+		struct PreLine *line;
714+
715+		line = &pre->v[i];
716+		if (line->isrecipe) {
717+			if (last_rule) {
718+				last_rule->data.rule.recipes.v = xrealloc(last_rule->data.rule.recipes.v,
719+				                                          (last_rule->data.rule.recipes.n + 1) * sizeof(last_rule->data.rule.recipes.v[0]));
720+				last_rule->data.rule.recipes.v[last_rule->data.rule.recipes.n++] =
721+				    xstrndup(line->text + 1, strlen(line->text + 1));
722+				last_rule->loc.line1 = line->line1;
723+				continue;
724+			}
725+			memmove(line->text, line->text + 1, strlen(line->text + 1) + 1);
726+		}
727+
728+		state = parseline(line);
729+		ast->v = xrealloc(ast->v, (ast->n + 1) * sizeof(ast->v[0]));
730+		ast->v[ast->n++] = state;
731+		if (state.kind == NODE_RULE)
732+			last_rule = &ast->v[ast->n - 1];
733+		else if (state.kind != NODE_BLANK && state.kind != NODE_COMMENT)
734+			last_rule = 0;
735+	}
736+
737+	return 0;
738+}
739+
740+/* pre procces build ast, */
741+int
742+parse(const char *path, const char *src, struct Ast *ast)
743+{
744+	struct Pre pre;
745+	int rc;
746+
747+	(void)src;
748+	rc = preproc(path, &pre);
749+	if (rc < 0)
750+		return rc;
751+	rc = buildast(path, &pre, ast);
752+	freepre(&pre);
753+	return rc;
754+}
755+
756+static void
757+freestrs(struct StrList *list)
758+{
759+	size_t i;
760+
761+	for (i = 0; i < list->n; i++)
762+		free(list->v[i]);
763+	free(list->v);
764+}
765+
766+static void
767+freerec(struct RecipeList *list)
768+{
769+	size_t i;
770+
771+	for (i = 0; i < list->n; i++)
772+		free(list->v[i]);
773+	free(list->v);
774+}
775+
776+void
777+freeast(struct Ast *ast)
778+{
779+	size_t i;
780+
781+	for (i = 0; i < ast->n; i++) {
782+		switch (ast->v[i].kind) {
783+		case NODE_COMMENT:
784+		case NODE_RAW:
785+			free(ast->v[i].data.raw.text);
786+			break;
787+		case NODE_ASSIGN:
788+			free(ast->v[i].data.assign.lhs);
789+			free(ast->v[i].data.assign.rhs);
790+			freestrs(&ast->v[i].data.assign.targets);
791+			break;
792+		case NODE_RULE:
793+			freestrs(&ast->v[i].data.rule.targets);
794+			freestrs(&ast->v[i].data.rule.prereqs);
795+			freestrs(&ast->v[i].data.rule.order_only);
796+			freerec(&ast->v[i].data.rule.recipes);
797+			break;
798+		case NODE_INCLUDE:
799+			free(ast->v[i].data.include.path);
800+			break;
801+		case NODE_COND:
802+			free(ast->v[i].data.cond.arg1);
803+			free(ast->v[i].data.cond.arg2);
804+			free(ast->v[i].data.cond.raw);
805+			break;
806+		case NODE_BLANK:
807+			break;
808+		}
809+	}
810+	free(ast->v);
811+	ast->v = 0;
812+	ast->n = 0;
813+}
+121, -0
  1@@ -0,0 +1,121 @@
  2+#ifndef SHINOBI_H
  3+#define SHINOBI_H
  4+
  5+#include <stddef.h>
  6+
  7+/*
  8+ * types
  9+ * PreLine: one preprocessed line with source location
 10+ * Pre: flattened preprocessed makefile input
 11+ * Node: one parsed syntax node
 12+ * Ast: flat list of parsed nodes in source order
 13+ */
 14+
 15+enum NodeKind {
 16+	NODE_BLANK,
 17+	NODE_COMMENT,
 18+	NODE_ASSIGN,
 19+	NODE_RULE,
 20+	NODE_INCLUDE,
 21+	NODE_COND,
 22+	NODE_RAW,
 23+};
 24+
 25+enum AssignOp {
 26+	ASSIGN_EQ,
 27+	ASSIGN_PLUS_EQ,
 28+	ASSIGN_COLON_EQ,
 29+	ASSIGN_QMARK_EQ,
 30+	ASSIGN_BANG_EQ,
 31+};
 32+
 33+enum CondKind {
 34+	COND_IFEQ,
 35+	COND_IFNEQ,
 36+	COND_ELSE,
 37+	COND_ENDIF,
 38+};
 39+
 40+struct SrcLoc {
 41+	int line0;
 42+	int line1;
 43+};
 44+
 45+struct PreLine {
 46+	char *path;
 47+	char *text;
 48+	int line0;
 49+	int line1;
 50+	int isrecipe;
 51+};
 52+
 53+struct Pre {
 54+	struct PreLine *v;
 55+	size_t n;
 56+};
 57+
 58+struct StrList {
 59+	char **v;
 60+	size_t n;
 61+};
 62+
 63+struct RecipeList {
 64+	char **v;
 65+	size_t n;
 66+};
 67+
 68+struct AssignNode {
 69+	char *lhs;
 70+	char *rhs;
 71+	enum AssignOp op;
 72+	int tspec;
 73+	struct StrList targets;
 74+};
 75+
 76+struct RuleNode {
 77+	struct StrList targets;
 78+	struct StrList prereqs;
 79+	struct StrList order_only;
 80+	struct RecipeList recipes;
 81+};
 82+
 83+struct IncludeNode {
 84+	int optional;
 85+	char *path;
 86+};
 87+
 88+struct CondNode {
 89+	enum CondKind kind;
 90+	char *arg1;
 91+	char *arg2;
 92+	char *raw;
 93+};
 94+
 95+struct RawNode {
 96+	char *text;
 97+};
 98+
 99+struct Node {
100+	enum NodeKind kind;
101+	struct SrcLoc loc;
102+	union StmtData {
103+		struct AssignNode assign;
104+		struct RuleNode rule;
105+		struct IncludeNode include;
106+		struct CondNode cond;
107+		struct RawNode raw;
108+	} data;
109+};
110+
111+struct Ast {
112+	struct Node *v;
113+	size_t n;
114+};
115+
116+int preproc(const char *path, struct Pre *pre);
117+void freepre(struct Pre *pre);
118+int buildast(const char *path, const struct Pre *pre, struct Ast *ast);
119+int parse(const char *path, const char *src, struct Ast *ast);
120+void freeast(struct Ast *ast);
121+
122+#endif