1#include "shinobi.h"
2#include "internal.h"
3
4#include <ctype.h>
5#include <stddef.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9
10/*
11 * parse owns the first bit of the pipeline:
12 * preproc reads file, joins \ newlines, strips comments,
13 * and resolves includes, passes that to buildast.
14 * buildast parses that gnu make syntax into nodes.
15 *
16 * this is only abt syntax. we dont eval variables or
17 * execute conditional branches.
18 */
19
20struct AssignScan {
21 size_t pos;
22 size_t len;
23 enum AssignOp op;
24 int ok;
25};
26
27struct Inc {
28 char **v;
29 size_t n;
30};
31
32static int parseerrs;
33static int deadlikemake;
34static struct Arena *g_ast_arena;
35
36static int preprocfile(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
37 enum ShinMode mode);
38static int preprocfile0(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
39 struct SpecialTargets *targets, enum ShinMode mode);
40static int preprocinclude(const char *dir, const char *incarg, struct Pre *pre, struct Inc *inc,
41 struct SpecialTargets *targets, enum ShinMode mode);
42
43static char *
44astdup(const char *s, size_t n)
45{
46 return arena_strndup(g_ast_arena, s, n);
47}
48
49static char *
50atrimdup(const char *s, size_t n)
51{
52 size_t i, j;
53
54 for (i = 0; i < n && isspace((unsigned char)s[i]); i++)
55 ;
56 for (j = n; j > i && isspace((unsigned char)s[j - 1]); j--)
57 ;
58 return astdup(s + i, j - i);
59}
60
61static void
62astsplitwords(struct StrList *out, const char *s, size_t n);
63
64static void
65addrecipe_ast(struct RecipeList *dest, const char *raw);
66
67static void
68addwords_ast(struct StrList *dest, const struct StrList *src)
69{
70 size_t i;
71
72 if (dest->n + src->n > dest->cap) {
73 dest->cap = dest->n + src->n;
74 dest->v = xrealloc(dest->v, dest->cap * sizeof(dest->v[0]));
75 }
76 for (i = 0; i < src->n; i++)
77 dest->v[dest->n++] = src->v[i];
78}
79
80static void
81parseerr(const struct PreLine *line, const char *msg, const char *detail)
82{
83 dielikemake(line->path, line->line0, msg, detail);
84 deadlikemake = 1;
85 parseerrs++;
86}
87
88
89static const char *const unsupported_kws[] = {
90 "undefine", "vpath", "private", "load", 0};
91
92static char *
93trimdup(const char *s, size_t n)
94{
95 size_t i, j;
96
97 for (i = 0; i < n && isspace((unsigned char)s[i]); i++)
98 ;
99 for (j = n; j > i && isspace((unsigned char)s[j - 1]); j--)
100 ;
101 return xstrndup(s + i, j - i);
102}
103
104static int
105haskw(const char *s, const char *kw)
106{
107 size_t n;
108
109 n = strlen(kw);
110 if (strncmp(s, kw, n) != 0)
111 return 0;
112 return s[n] == 0 || isspace((unsigned char)s[n]);
113}
114
115static int
116iscondkw(const char *s)
117{
118 return haskw(s, "ifeq") || haskw(s, "ifneq") ||
119 haskw(s, "ifdef") || haskw(s, "ifndef") ||
120 haskw(s, "else") || haskw(s, "endif");
121}
122
123static int
124hasws(const char *s)
125{
126 size_t i;
127
128 for (i = 0; s[i]; i++) {
129 if (isspace((unsigned char)s[i]))
130 return 1;
131 }
132 return 0;
133}
134
135static int
136hascont(const char *s, size_t n)
137{
138 size_t i, count;
139
140 if (!n)
141 return 0;
142 for (i = n; i > 0 && (s[i - 1] == '\n' || s[i - 1] == '\r'); i--)
143 ;
144 if (!i)
145 return 0;
146 count = 0;
147 while (i > 0 && s[i - 1] == '\\') {
148 count++;
149 i--;
150 }
151 return (count & 1) != 0;
152}
153
154static int
155iscommentline(const char *s, size_t n)
156{
157 size_t i;
158
159 for (i = 0; i < n && isspace((unsigned char)s[i]); i++)
160 ;
161 return i < n && s[i] == '#';
162}
163
164static int
165skipref(const char *s, size_t n, size_t *i)
166{
167 size_t j, depth;
168 char open, close;
169
170 j = *i;
171 if (j + 1 >= n || s[j] != '$')
172 return 0;
173 open = s[j + 1];
174 if (open != '(' && open != '{') {
175 *i = j + 1;
176 return 1;
177 }
178 close = open == '(' ? ')' : '}';
179 depth = 1;
180 j += 2;
181 while (j < n && depth) {
182 if (s[j] == '$' && j + 1 < n && (s[j + 1] == '(' || s[j + 1] == '{')) {
183 depth++;
184 j += 2;
185 continue;
186 }
187 if (s[j] == close)
188 depth--;
189 j++;
190 }
191 *i = j;
192 return 1;
193}
194
195static void
196stripcomment(char *s)
197{
198 size_t i, n, first;
199
200 n = strlen(s);
201 for (first = 0; first < n && isspace((unsigned char)s[first]); first++)
202 ;
203 if (first < n && s[first] == '#')
204 return;
205 for (i = 0; i < n; i++) {
206 if (s[i] == '$' && skipref(s, n, &i)) {
207 i--;
208 continue;
209 }
210 if (s[i] == '#') {
211 size_t cut;
212
213 s[i] = 0;
214 cut = strlen(s);
215 while (cut > 0 && isspace((unsigned char)s[cut - 1]))
216 cut--;
217 s[cut] = 0;
218 return;
219 }
220 }
221}
222
223static struct AssignScan
224findassign(const char *s, size_t n, size_t start)
225{
226 size_t i;
227 struct AssignScan out;
228
229 memset(&out, 0, sizeof(out));
230 for (i = start; i < n; i++) {
231 if (s[i] == '$' && skipref(s, n, &i)) {
232 i--;
233 continue;
234 }
235 if (i + 3 < n && s[i] == ':' && s[i + 1] == ':' && s[i + 2] == ':' && s[i + 3] == '=') {
236 out.pos = i;
237 out.len = 4;
238 out.op = ASSIGN_COLON3_EQ;
239 out.ok = 1;
240 return out;
241 }
242 if (i + 2 < n && s[i] == ':' && s[i + 1] == ':' && s[i + 2] == '=') {
243 out.pos = i;
244 out.len = 3;
245 out.op = ASSIGN_DCOLON_EQ;
246 out.ok = 1;
247 return out;
248 }
249 if (i + 1 < n && s[i] == '+' && s[i + 1] == '=') {
250 out.pos = i;
251 out.len = 2;
252 out.op = ASSIGN_PLUS_EQ;
253 out.ok = 1;
254 return out;
255 }
256 if (i + 1 < n && s[i] == ':' && s[i + 1] == '=') {
257 out.pos = i;
258 out.len = 2;
259 out.op = ASSIGN_COLON_EQ;
260 out.ok = 1;
261 return out;
262 }
263 if (i + 1 < n && s[i] == '?' && s[i + 1] == '=') {
264 out.pos = i;
265 out.len = 2;
266 out.op = ASSIGN_QMARK_EQ;
267 out.ok = 1;
268 return out;
269 }
270 if (i + 1 < n && s[i] == '!' && s[i + 1] == '=') {
271 out.pos = i;
272 out.len = 2;
273 out.op = ASSIGN_BANG_EQ;
274 out.ok = 1;
275 return out;
276 }
277 if (s[i] == '=') {
278 out.pos = i;
279 out.len = 1;
280 out.op = ASSIGN_EQ;
281 out.ok = 1;
282 return out;
283 }
284 }
285 return out;
286}
287
288static ptrdiff_t
289findtop(const char *s, size_t n, char want)
290{
291 size_t i;
292
293 for (i = 0; i < n; i++) {
294 if (s[i] == '$' && skipref(s, n, &i)) {
295 i--;
296 continue;
297 }
298 if (s[i] == want)
299 return (ptrdiff_t)i;
300 }
301 return -1;
302}
303
304void
305splitwords(struct StrList *out, const char *s, size_t n)
306{
307 size_t i, j, start;
308
309 for (i = 0; i < n;) {
310 while (i < n && isspace((unsigned char)s[i]))
311 i++;
312 if (i >= n)
313 break;
314 start = i;
315 j = i;
316 while (j < n) {
317 if (s[j] == '$' && skipref(s, n, &j))
318 continue;
319 if (isspace((unsigned char)s[j]))
320 break;
321 j++;
322 }
323 if (out->n >= out->cap) {
324 out->cap = out->cap ? out->cap * 2 : 4;
325 out->v = xrealloc(out->v, out->cap * sizeof(out->v[0]));
326 }
327 out->v[out->n++] = xstrndup(s + start, j - start);
328 i = j;
329 }
330}
331
332static void
333astsplitwords(struct StrList *out, const char *s, size_t n)
334{
335 size_t i, j, start;
336
337 for (i = 0; i < n;) {
338 while (i < n && isspace((unsigned char)s[i]))
339 i++;
340 if (i >= n)
341 break;
342 start = i;
343 j = i;
344 while (j < n) {
345 if (s[j] == '$' && skipref(s, n, &j))
346 continue;
347 if (isspace((unsigned char)s[j]))
348 break;
349 j++;
350 }
351 if (out->n >= out->cap) {
352 out->cap = out->cap ? out->cap * 2 : 4;
353 out->v = xrealloc(out->v, out->cap * sizeof(out->v[0]));
354 }
355 out->v[out->n++] = astdup(s + start, j - start);
356 i = j;
357 }
358}
359
360static void
361addrecipe_ast(struct RecipeList *dest, const char *raw)
362{
363 struct Recipe *r;
364 const char *s;
365 size_t n;
366
367 s = raw;
368 while (*s == ' ' || *s == '\t')
369 s++;
370 if (dest->n >= dest->cap) {
371 dest->cap = dest->cap ? dest->cap * 2 : 4;
372 dest->v = xrealloc(dest->v, dest->cap * sizeof(dest->v[0]));
373 }
374 r = &dest->v[dest->n++];
375 memset(r, 0, sizeof(*r));
376 while (*s == '@' || *s == '+' || *s == '-') {
377 if (*s == '@')
378 r->silent = 1;
379 else if (*s == '+')
380 r->recursive = 1;
381 else if (*s == '-')
382 r->ignore = 1;
383 s++;
384 while (*s == ' ' || *s == '\t')
385 s++;
386 }
387 n = strlen(s);
388 while (n > 0 && isspace((unsigned char)s[n - 1]))
389 n--;
390 r->body = astdup(s, n);
391 r->submake = parsesubmake(&r->sm, r->body);
392}
393
394static int
395readline(const char **src, int *lineno, struct PreLine *line, char recipeprefix)
396{
397 const char *p, *start;
398 size_t cap, len, chunk;
399 char *buf;
400 int line0, line1;
401 int comment_cont;
402 int first;
403
404 p = *src;
405 if (!*p)
406 return 0;
407
408 buf = 0;
409 cap = 0;
410 len = 0;
411 line0 = *lineno;
412 line1 = *lineno;
413 comment_cont = 0;
414 first = 1;
415 line->isrecipe = 0;
416 line->recipeprefix = 0;
417
418 for (;;) {
419 start = p;
420 while (*p && *p != '\n')
421 p++;
422 chunk = (size_t)(p - start);
423 if (*p == '\n')
424 p++;
425
426 if (first && chunk > 0 && start[0] == recipeprefix) {
427 line->isrecipe = 1;
428 line->recipeprefix = recipeprefix;
429 }
430 /* a line that starts as a comment stays a comment across
431 backslash line joins. this is a hack that bearssl uses
432 to hide microsoft nmake directives, so nmake sees them
433 and unix make doesn't, this is used to get nmake and make
434 to include different files depending on what is used. */
435 if (first && !line->isrecipe && iscommentline(start, chunk))
436 comment_cont = 1;
437
438 if (!(comment_cont && !first)) {
439 if (len + chunk + 1 > cap) {
440 cap = (len + chunk + 1) * 2;
441 buf = xrealloc(buf, cap);
442 }
443 memcpy(buf + len, start, chunk);
444 len += chunk;
445 buf[len] = 0;
446 }
447
448 if (!(comment_cont ? hascont(start, chunk) : hascont(buf, len)))
449 break;
450 if (!comment_cont) {
451 len--;
452 buf[len++] = ' ';
453 }
454 while (*p == ' ' || *p == '\t')
455 p++;
456 line1++;
457 first = 0;
458 if (!*p)
459 break;
460 }
461
462 *lineno = line1 + 1;
463 *src = p;
464 line->text = buf ? buf : xstrndup("", 0);
465 line->line0 = line0;
466 line->line1 = line1;
467 return 1;
468}
469
470static void
471popinc(struct Inc *inc)
472{
473 if (!inc->n)
474 return;
475 free(inc->v[inc->n - 1]);
476 inc->n--;
477}
478
479static int
480hasinc(struct Inc *inc, const char *path)
481{
482 size_t i;
483
484 for (i = 0; i < inc->n; i++) {
485 if (strcmp(inc->v[i], path) == 0)
486 return 1;
487 }
488 return 0;
489}
490
491static int
492isplainpath(const char *s)
493{
494 size_t i;
495
496 if (!*s)
497 return 0;
498 for (i = 0; s[i]; i++) {
499 if (isspace((unsigned char)s[i]) || s[i] == '$')
500 return 0;
501 }
502 return 1;
503}
504
505static char *
506dirpart(const char *path)
507{
508 const char *slash;
509
510 slash = strrchr(path, '/');
511 if (!slash)
512 return xstrdup(".");
513 return xstrndup(path, (size_t)(slash - path));
514}
515
516static int
517preprocinclude(const char *dir, const char *incarg, struct Pre *pre, struct Inc *inc,
518 struct SpecialTargets *targets, enum ShinMode mode)
519{
520 int rc;
521
522 rc = preprocfile0(incarg, 0, pre, inc, targets, mode);
523 if (rc == 0)
524 return 0;
525 if (strcmp(dir, ".") == 0)
526 return -1;
527 {
528 char *full;
529
530 full = joinpath(dir, incarg);
531 rc = preprocfile0(full, 0, pre, inc, targets, mode);
532 free(full);
533 return rc;
534 }
535}
536
537static int
538preprocfile0(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
539 struct SpecialTargets *targets, enum ShinMode mode)
540{
541 char *src, *dir;
542 const char *p;
543 int lineno;
544 struct PreLine line;
545
546 if (hasinc(inc, path)) {
547 dielikemake(path, 0, "include cycle", path);
548 return -1;
549 }
550 if (src_override) {
551 src = xstrdup(src_override);
552 } else {
553 src = readfile(path);
554 if (!src)
555 return -1;
556 }
557 inc->v = xrealloc(inc->v, (inc->n + 1) * sizeof(inc->v[0]));
558 inc->v[inc->n++] = xstrdup(path);
559 dir = dirpart(path);
560 p = src;
561 lineno = 1;
562 while (readline(&p, &lineno, &line, targets->recipeprefix)) {
563 line.path = xstrdup(path);
564 if (!line.isrecipe) {
565 char *trim, *incarg;
566 int opt;
567
568 stripcomment(line.text);
569 trim = trimdup(line.text, strlen(line.text));
570 if (haskw(trim, "include") || haskw(trim, "-include") || haskw(trim, "sinclude")) {
571 int rc;
572 size_t kwlen;
573
574 opt = haskw(trim, "-include") || haskw(trim, "sinclude");
575 if (mode == MODE_POSIX_2008 && opt) {
576 dielikemake(path, line.line0,
577 "optional includes are not valid in POSIX 2008", 0);
578 free(trim);
579 free(line.path);
580 free(line.text);
581 free(dir);
582 free(src);
583 popinc(inc);
584 return -2;
585 }
586 if (mode == MODE_POSIX_2024 && haskw(trim, "sinclude")) {
587 dielikemake(path, line.line0,
588 "'sinclude' is not valid in POSIX 2024; use '-include'", 0);
589 free(trim);
590 free(line.path);
591 free(line.text);
592 free(dir);
593 free(src);
594 popinc(inc);
595 return -2;
596 }
597 kwlen = (haskw(trim, "-include") || haskw(trim, "sinclude")) ? 8 : 7;
598 incarg = trimdup(trim + kwlen, strlen(trim + kwlen));
599 if (isplainpath(incarg)) {
600 rc = preprocinclude(dir, incarg, pre, inc, targets, mode);
601 if (rc == 0) {
602 free(incarg);
603 free(line.path);
604 free(line.text);
605 continue;
606 }
607 /* keep unresolved includes in the stream so that eval time
608 * include handling can attempt to remake them from rules */
609 if (opt) {
610 free(incarg);
611 free(line.path);
612 free(line.text);
613 continue;
614 }
615 }
616 free(incarg);
617 }
618 {
619 size_t n;
620 ptrdiff_t colon;
621 struct AssignScan as;
622
623 n = strlen(trim);
624 colon = findtop(trim, n, ':');
625 as = findassign(trim, n, 0);
626 if (as.ok && (colon < 0 || as.pos <= (size_t)colon)) {
627 char *lhs, *rhs;
628
629 lhs = trimdup(trim, as.pos);
630 rhs = trimdup(trim + as.pos + as.len, n - as.pos - as.len);
631 updatespecialassign(targets, lhs, rhs);
632 free(lhs);
633 free(rhs);
634 }
635 }
636 free(trim);
637 }
638 pre->v = xrealloc(pre->v, (pre->n + 1) * sizeof(pre->v[0]));
639 pre->v[pre->n++] = line;
640 }
641 free(dir);
642 free(src);
643 popinc(inc);
644 return 0;
645}
646
647static int
648preprocfile(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
649 enum ShinMode mode)
650{
651 struct SpecialTargets targets;
652
653 initspecialtargets(&targets);
654 return preprocfile0(path, src_override, pre, inc, &targets, mode);
655}
656
657int
658preproc(const char *path, struct Pre *pre, enum ShinMode mode)
659{
660 struct Inc inc;
661
662 memset(pre, 0, sizeof(*pre));
663 memset(&inc, 0, sizeof(inc));
664 if (preprocfile(path, 0, pre, &inc, mode) < 0) {
665 free(inc.v);
666 freepre(pre);
667 return -1;
668 }
669 free(inc.v);
670 return 0;
671}
672
673void
674freepre(struct Pre *pre)
675{
676 size_t i;
677
678 for (i = 0; i < pre->n; i++)
679 free(pre->v[i].path);
680 for (i = 0; i < pre->n; i++)
681 free(pre->v[i].text);
682 free(pre->v);
683 pre->v = 0;
684 pre->n = 0;
685}
686
687static struct Node
688parseinclude(const struct PreLine *line, const char *s, enum ShinMode mode)
689{
690 struct Node state;
691 size_t off;
692 size_t i, nwords;
693 const char *p;
694
695 memset(&state, 0, sizeof(state));
696 state.kind = NODE_INCLUDE;
697 state.loc.line0 = line->line0;
698 state.loc.line1 = line->line1;
699
700 if (haskw(s, "-include")) {
701 state.data.include.optional = 1;
702 off = strlen("-include");
703 } else if (haskw(s, "sinclude")) {
704 state.data.include.optional = 1;
705 state.data.include.sinclude = 1;
706 off = strlen("sinclude");
707 } else {
708 off = strlen("include");
709 }
710 state.data.include.path = atrimdup(s + off, strlen(s + off));
711 if (mode == MODE_POSIX_2008) {
712 p = state.data.include.path;
713 nwords = 0;
714 for (i = 0; p[i];) {
715 while (p[i] && isspace((unsigned char)p[i]))
716 i++;
717 if (!p[i])
718 break;
719 nwords++;
720 while (p[i] && !isspace((unsigned char)p[i]))
721 i++;
722 }
723 if (nwords != 1) {
724 memset(&state, 0, sizeof(state));
725 state.kind = NODE_BLANK;
726 state.loc.line0 = line->line0;
727 state.loc.line1 = line->line1;
728 parseerr(line, "include in POSIX 2008 must specify exactly one file", 0);
729 return state;
730 }
731 }
732 return state;
733}
734
735static struct Node
736parsecond(const struct PreLine *line, const char *s)
737{
738 struct Node state;
739 const char *p;
740 size_t n, mid;
741
742 memset(&state, 0, sizeof(state));
743 state.kind = NODE_COND;
744 state.loc.line0 = line->line0;
745 state.loc.line1 = line->line1;
746 state.data.cond.raw = astdup(s, strlen(s));
747
748 if (haskw(s, "ifeq")) {
749 state.data.cond.kind = COND_IFEQ;
750 p = s + 4;
751 } else if (haskw(s, "ifneq")) {
752 state.data.cond.kind = COND_IFNEQ;
753 p = s + 5;
754 } else if (haskw(s, "ifdef")) {
755 state.data.cond.kind = COND_IFDEF;
756 p = s + 5;
757 } else if (haskw(s, "ifndef")) {
758 state.data.cond.kind = COND_IFNDEF;
759 p = s + 6;
760 } else {
761 state.data.cond.kind = COND_ENDIF;
762 return state;
763 }
764
765 while (*p && isspace((unsigned char)*p))
766 p++;
767 n = strlen(p);
768 if (!n) {
769 state.data.cond.arg1 = astdup("", 0);
770 state.data.cond.arg2 = astdup("", 0);
771 return state;
772 }
773 if (state.data.cond.kind == COND_IFDEF || state.data.cond.kind == COND_IFNDEF) {
774 state.data.cond.arg1 = astdup(p, n);
775 state.data.cond.arg2 = astdup("", 0);
776 return state;
777 }
778 if (p[0] == '(' && p[n - 1] == ')') {
779 mid = (size_t)(findtop(p + 1, n - 2, ','));
780 if (mid != (size_t)-1) {
781 state.data.cond.arg1 = atrimdup(p + 1, mid);
782 state.data.cond.arg2 = atrimdup(p + 2 + mid, n - 3 - mid);
783 } else {
784 parseerr(line, "malformed ifeq/ifneq arguments", p);
785 state.data.cond.arg1 = astdup("", 0);
786 state.data.cond.arg2 = astdup("", 0);
787 }
788 } else if (p[0] == '"' || p[0] == '\'') {
789 size_t e1, s2, e2;
790 char q1 = p[0], q2;
791 e1 = 1;
792 while (e1 < n && p[e1] != q1)
793 e1++;
794 if (e1 >= n) {
795 parseerr(line, "malformed ifeq/ifneq arguments", p);
796 state.data.cond.arg1 = astdup("", 0);
797 state.data.cond.arg2 = astdup("", 0);
798 return state;
799 }
800 s2 = e1 + 1;
801 while (s2 < n && isspace((unsigned char)p[s2]))
802 s2++;
803 if (s2 >= n || (p[s2] != '"' && p[s2] != '\'')) {
804 parseerr(line, "malformed ifeq/ifneq arguments", p);
805 state.data.cond.arg1 = astdup("", 0);
806 state.data.cond.arg2 = astdup("", 0);
807 return state;
808 }
809 q2 = p[s2];
810 e2 = s2 + 1;
811 while (e2 < n && p[e2] != q2)
812 e2++;
813 if (e2 >= n || e2 != n - 1) {
814 parseerr(line, "malformed ifeq/ifneq arguments", p);
815 state.data.cond.arg1 = astdup("", 0);
816 state.data.cond.arg2 = astdup("", 0);
817 return state;
818 }
819 state.data.cond.arg1 = astdup(p + 1, e1 - 1);
820 state.data.cond.arg2 = astdup(p + s2 + 1, e2 - s2 - 1);
821 } else {
822 parseerr(line, "malformed ifeq/ifneq arguments", p);
823 state.data.cond.arg1 = astdup("", 0);
824 state.data.cond.arg2 = astdup("", 0);
825 }
826 return state;
827}
828
829static struct Node
830parseassign(const struct PreLine *line, const char *s, size_t n, size_t base, struct AssignScan as, int tspec, size_t tend)
831{
832 struct Node state;
833
834 memset(&state, 0, sizeof(state));
835 state.kind = NODE_ASSIGN;
836 state.loc.line0 = line->line0;
837 state.loc.line1 = line->line1;
838 state.data.assign.op = as.op;
839 state.data.assign.exported = 0;
840 state.data.assign.tspec = tspec;
841 state.data.assign.lhs = atrimdup(s + base, as.pos - base);
842 state.data.assign.rhs = atrimdup(s + as.pos + as.len, n - as.pos - as.len);
843 if (tspec)
844 astsplitwords(&state.data.assign.targets, s, tend);
845 return state;
846}
847
848static struct Node
849parserule(const struct PreLine *line, const char *s, size_t n, size_t colon, int dcolon, enum ShinMode mode)
850{
851 struct Node state;
852 const char *rhs;
853 size_t off;
854 size_t rhsn, split, semi;
855 ptrdiff_t patcolon;
856 char *recipe;
857
858 memset(&state, 0, sizeof(state));
859 state.kind = NODE_RULE;
860 state.loc.line0 = line->line0;
861 state.loc.line1 = line->line1;
862 state.data.rule.dcolon = dcolon;
863
864 off = dcolon ? 2 : 1;
865 rhs = s + colon + off;
866 rhsn = n - colon - off;
867 semi = (size_t)findtop(rhs, rhsn, ';');
868 if (semi != (size_t)-1) {
869 recipe = trimdup(rhs + semi + 1, rhsn - semi - 1);
870 if (recipe[0]) {
871 addrecipe_ast(&state.data.rule.recipes, recipe);
872 free(recipe);
873 } else {
874 free(recipe);
875 }
876 rhsn = semi;
877 }
878 astsplitwords(&state.data.rule.targets, s, colon);
879 patcolon = -1;
880 if (mode == MODE_GNU && !dcolon)
881 patcolon = findtop(rhs, rhsn, ':');
882 if (patcolon >= 0) {
883 state.data.rule.target_pattern = atrimdup(rhs, (size_t)patcolon);
884 rhs += (size_t)patcolon + 1;
885 rhsn -= (size_t)patcolon + 1;
886 }
887 split = (size_t)findtop(rhs, rhsn, '|');
888 if (split != (size_t)-1) {
889 astsplitwords(&state.data.rule.prereqs, rhs, split);
890 astsplitwords(&state.data.rule.order_only, rhs + split + 1, rhsn - split - 1);
891 } else {
892 astsplitwords(&state.data.rule.prereqs, rhs, rhsn);
893 }
894 return state;
895}
896
897static struct Node
898parseexpr(const struct PreLine *line, const char *s)
899{
900 struct Node state;
901
902 memset(&state, 0, sizeof(state));
903 state.kind = NODE_RAW;
904 state.loc.line0 = line->line0;
905 state.loc.line1 = line->line1;
906 state.data.raw.text = astdup(s, strlen(s));
907 return state;
908}
909
910static struct Node
911parseexport(const struct PreLine *line, const char *s, int exported)
912{
913 struct Node state;
914
915 memset(&state, 0, sizeof(state));
916 state.kind = NODE_EXPORT;
917 state.loc.line0 = line->line0;
918 state.loc.line1 = line->line1;
919 state.data.export.exported = exported;
920 state.data.export.all = *s == 0;
921 if (!state.data.export.all)
922 astsplitwords(&state.data.export.names, s, strlen(s));
923 return state;
924}
925
926/* unclassified line, treat as unsupported syntax */
927static struct Node
928parseraw(const struct PreLine *line, const char *s, char recipeprefix)
929{
930 struct Node state;
931
932 (void)s;
933 if (recipeprefix == '\t' && strncmp(line->text, " ", 8) == 0)
934 parseerr(line, "missing separator (did you mean TAB instead of 8 spaces?)", 0);
935 else
936 parseerr(line, "missing separator", 0);
937 memset(&state, 0, sizeof(state));
938 state.kind = NODE_BLANK;
939 state.loc.line0 = line->line0;
940 state.loc.line1 = line->line1;
941 return state;
942}
943
944static struct Node
945blanknode(const struct PreLine *line)
946{
947 struct Node state;
948
949 memset(&state, 0, sizeof(state));
950 state.kind = NODE_BLANK;
951 state.loc.line0 = line->line0;
952 state.loc.line1 = line->line1;
953 return state;
954}
955
956static int
957ruleinlist(const struct NodeList *out, const struct Node *rule)
958{
959 size_t i;
960
961 for (i = 0; i < out->n; i++) {
962 if (&out->v[i] == rule)
963 return 1;
964 }
965 return 0;
966}
967
968static struct Node *
969branchrule(struct NodeList *out, const struct Node *src, const struct PreLine *line)
970{
971 struct Node state;
972
973 memset(&state, 0, sizeof(state));
974 state.kind = NODE_RULE;
975 state.loc.line0 = line->line0;
976 state.loc.line1 = line->line1;
977 state.data.rule.dcolon = src->data.rule.dcolon;
978 addwords_ast(&state.data.rule.targets, &src->data.rule.targets);
979 if (src->data.rule.target_pattern)
980 state.data.rule.target_pattern = astdup(src->data.rule.target_pattern,
981 strlen(src->data.rule.target_pattern));
982 addnode(out, state);
983 return &out->v[out->n - 1];
984}
985
986static int
987parsedefine(const struct Pre *pre, size_t *i, struct Node *out)
988{
989 struct PreLine *line;
990 char *trim, *name;
991 size_t bodycap, bodylen;
992 int depth;
993 char *body;
994
995 line = &pre->v[*i];
996 trim = trimdup(line->text, strlen(line->text));
997 name = atrimdup(trim + strlen("define"), strlen(trim + strlen("define")));
998 free(trim);
999
1000 memset(out, 0, sizeof(*out));
1001 out->kind = NODE_ASSIGN;
1002 out->loc.line0 = line->line0;
1003 out->loc.line1 = line->line1;
1004 out->data.assign.lhs = name;
1005 out->data.assign.op = ASSIGN_EQ;
1006 out->data.assign.origin = ORIGIN_FILE;
1007 out->data.assign.define_block = 1;
1008
1009 bodycap = 64;
1010 bodylen = 0;
1011 body = xmalloc(bodycap);
1012 body[0] = 0;
1013 depth = 1;
1014 (*i)++;
1015 while (*i < pre->n) {
1016 struct PreLine *cur;
1017 char *curtrim;
1018 size_t n;
1019
1020 cur = &pre->v[*i];
1021 curtrim = trimdup(cur->text, strlen(cur->text));
1022 if (haskw(curtrim, "define")) {
1023 depth++;
1024 free(curtrim);
1025 } else if (haskw(curtrim, "endef")) {
1026 depth--;
1027 free(curtrim);
1028 if (depth == 0) {
1029 out->loc.line1 = cur->line1;
1030 (*i)++;
1031 out->data.assign.rhs = astdup(body, bodylen);
1032 free(body);
1033 return 0;
1034 }
1035 } else {
1036 free(curtrim);
1037 }
1038
1039 n = strlen(cur->text);
1040 if (bodylen + n + 2 > bodycap) {
1041 while (bodycap < bodylen + n + 2)
1042 bodycap *= 2;
1043 body = xrealloc(body, bodycap);
1044 }
1045 memcpy(body + bodylen, cur->text, n);
1046 bodylen += n;
1047 body[bodylen++] = '\n';
1048 body[bodylen] = 0;
1049 out->loc.line1 = cur->line1;
1050 (*i)++;
1051 }
1052
1053 parseerr(&pre->v[*i > 0 ? *i - 1 : 0], "unterminated 'define'", 0);
1054 free(body);
1055 return -1;
1056}
1057
1058static struct Node
1059parseline(const struct PreLine *line, const struct SpecialTargets *targets, enum ShinMode mode)
1060{
1061 struct Node state;
1062 struct AssignScan as;
1063 char *trim;
1064 int dcolon;
1065 int is_override;
1066 int is_export;
1067 int is_unexport;
1068 size_t n;
1069 ptrdiff_t colon;
1070
1071 trim = trimdup(line->text, strlen(line->text));
1072 is_override = 0;
1073 is_export = 0;
1074 is_unexport = 0;
1075 if (haskw(trim, "override")) {
1076 char *rest;
1077
1078 rest = trimdup(trim + 8, strlen(trim + 8));
1079 free(trim);
1080 trim = rest;
1081 is_override = 1;
1082 }
1083 if (haskw(trim, "export")) {
1084 char *rest;
1085
1086 rest = trimdup(trim + 6, strlen(trim + 6));
1087 free(trim);
1088 trim = rest;
1089 is_export = 1;
1090 }
1091 if (haskw(trim, "unexport")) {
1092 char *rest;
1093
1094 rest = trimdup(trim + 8, strlen(trim + 8));
1095 free(trim);
1096 trim = rest;
1097 is_unexport = 1;
1098 }
1099 n = strlen(trim);
1100
1101 if ((is_export || is_unexport) && n == 0) {
1102 state = parseexport(line, trim, is_export && !is_unexport);
1103 free(trim);
1104 return state;
1105 }
1106
1107 if (!n) {
1108 memset(&state, 0, sizeof(state));
1109 state.kind = NODE_BLANK;
1110 state.loc.line0 = line->line0;
1111 state.loc.line1 = line->line1;
1112 free(trim);
1113 return state;
1114 }
1115 if (trim[0] == '#') {
1116 memset(&state, 0, sizeof(state));
1117 state.kind = NODE_COMMENT;
1118 state.loc.line0 = line->line0;
1119 state.loc.line1 = line->line1;
1120 state.data.raw.text = astdup(trim, strlen(trim));
1121 free(trim);
1122 return state;
1123 }
1124 if (strncmp(trim, "ifeq(", 5) == 0 || strncmp(trim, "ifneq(", 6) == 0) {
1125 parseerr(line, "missing separator (ifeq/ifneq must be followed by whitespace)", 0);
1126 free(trim);
1127 return blanknode(line);
1128 }
1129 if (haskw(trim, "ifeq") || haskw(trim, "ifneq") ||
1130 haskw(trim, "ifdef") || haskw(trim, "ifndef") ||
1131 haskw(trim, "else") || haskw(trim, "endif")) {
1132 state = parsecond(line, trim);
1133 free(trim);
1134 return state;
1135 }
1136 if (haskw(trim, "include") || haskw(trim, "-include") || haskw(trim, "sinclude")) {
1137 state = parseinclude(line, trim, mode);
1138 free(trim);
1139 return state;
1140 }
1141 {
1142 size_t k;
1143
1144 for (k = 0; unsupported_kws[k]; k++) {
1145 if (haskw(trim, unsupported_kws[k])) {
1146 parseerr(line, "directive", unsupported_kws[k]);
1147 state = blanknode(line);
1148 free(trim);
1149 return state;
1150 }
1151 }
1152 }
1153
1154 colon = findtop(trim, n, ':');
1155 dcolon = colon >= 0 && (size_t)colon + 1 < n && trim[colon + 1] == ':';
1156 as = findassign(trim, n, 0);
1157 if (colon >= 0 && as.ok && (size_t)colon < as.pos) {
1158 /* some inline rule like 'all: ; @echo hi' */
1159 size_t off = dcolon ? 2 : 1;
1160 size_t base = (size_t)colon + off;
1161 int assign_override = is_override;
1162 ptrdiff_t semi = findtop(trim + colon + off, as.pos - (size_t)colon - off, ';');
1163
1164 while (base < n && isspace((unsigned char)trim[base]))
1165 base++;
1166 if (haskw(trim + base, "override")) {
1167 base += 8;
1168 while (base < n && isspace((unsigned char)trim[base]))
1169 base++;
1170 assign_override = 1;
1171 }
1172 if (semi < 0) {
1173 state = parseassign(line, trim, n, base, as, 1, (size_t)colon);
1174 if (assign_override)
1175 state.data.assign.origin = ORIGIN_OVERRIDE;
1176 if (is_export)
1177 state.data.assign.exported = 1;
1178 if (is_unexport)
1179 state.data.assign.exported = -1;
1180 free(trim);
1181 return state;
1182 }
1183 }
1184 if (as.ok && (colon < 0 || as.pos <= (size_t)colon)) {
1185 char *lhs;
1186
1187 lhs = trimdup(trim, as.pos);
1188 if (hasws(lhs)) {
1189 free(lhs);
1190 parseerr(line, "missing separator", 0);
1191 free(trim);
1192 return blanknode(line);
1193 }
1194 free(lhs);
1195 state = parseassign(line, trim, n, 0, as, 0, 0);
1196 if (is_override)
1197 state.data.assign.origin = ORIGIN_OVERRIDE;
1198 if (is_export)
1199 state.data.assign.exported = 1;
1200 if (is_unexport)
1201 state.data.assign.exported = -1;
1202 free(trim);
1203 return state;
1204 }
1205 if (is_export || is_unexport) {
1206 state = parseexport(line, trim, is_export && !is_unexport);
1207 free(trim);
1208 return state;
1209 }
1210 if (colon >= 0) {
1211 state = parserule(line, trim, n, (size_t)colon, dcolon, mode);
1212 free(trim);
1213 return state;
1214 }
1215 if (trim[0] == '$' && (trim[1] == '(' || trim[1] == '{')) {
1216 state = parseexpr(line, trim);
1217 free(trim);
1218 return state;
1219 }
1220 state = parseraw(line, trim, targets->recipeprefix);
1221 free(trim);
1222 return state;
1223}
1224
1225static int
1226parseblock(const struct Pre *pre, size_t *i, struct NodeList *out, struct Node **last_rulep,
1227 struct SpecialTargets *targets, enum ShinMode mode)
1228{
1229 struct Node state;
1230 struct Node *last_rule;
1231
1232 last_rule = last_rulep ? *last_rulep : 0;
1233 while (*i < pre->n) {
1234 struct PreLine *line;
1235 char *trim;
1236
1237 line = &pre->v[*i];
1238 if (line->isrecipe) {
1239 char *rt;
1240 const char *tabwarn;
1241
1242 rt = trimdup(line->text + 1, strlen(line->text + 1));
1243 tabwarn = 0;
1244 if (iscondkw(rt)) {
1245 tabwarn = "conditional directive lines cannot start with TAB";
1246 } else if (haskw(rt, "-include")) {
1247 tabwarn = "-include lines cannot start with TAB";
1248 } else if (haskw(rt, "sinclude")) {
1249 tabwarn = "sinclude lines cannot start with TAB";
1250 } else if (haskw(rt, "include")) {
1251 tabwarn = "include lines cannot start with TAB";
1252 } else if (haskw(rt, "override") || haskw(rt, "export") ||
1253 haskw(rt, "unexport") || haskw(rt, "define") ||
1254 haskw(rt, "endef") || haskw(rt, "undefine") ||
1255 haskw(rt, "vpath") || haskw(rt, "private") ||
1256 haskw(rt, "load")) {
1257 tabwarn = "directive lines cannot start with TAB";
1258 }
1259 free(rt);
1260 if (tabwarn) {
1261 warnlikemake(line->path, line->line0, tabwarn);
1262 memmove(line->text, line->text + 1, strlen(line->text + 1) + 1);
1263 line->isrecipe = 0;
1264 }
1265 }
1266 if (line->isrecipe) {
1267 if (last_rule) {
1268 if (!ruleinlist(out, last_rule))
1269 last_rule = branchrule(out, last_rule, line);
1270 addrecipe_ast(&last_rule->data.rule.recipes, line->text + 1);
1271 last_rule->loc.line1 = line->line1;
1272 (*i)++;
1273 continue;
1274 }
1275 memmove(line->text, line->text + 1, strlen(line->text + 1) + 1);
1276 }
1277 trim = trimdup(line->text, strlen(line->text));
1278 if (haskw(trim, "else") || haskw(trim, "endif")) {
1279 free(trim);
1280 break;
1281 }
1282 if (haskw(trim, "define")) {
1283 free(trim);
1284 if (parsedefine(pre, i, &state) < 0)
1285 return -1;
1286 addnode(out, state);
1287 last_rule = 0;
1288 continue;
1289 }
1290 if (haskw(trim, "ifeq") || haskw(trim, "ifneq") ||
1291 haskw(trim, "ifdef") || haskw(trim, "ifndef")) {
1292 size_t last_rule_idx;
1293 int last_rule_in_out;
1294
1295 last_rule_idx = 0;
1296 last_rule_in_out = last_rule && ruleinlist(out, last_rule);
1297 if (last_rule_in_out)
1298 last_rule_idx = (size_t)(last_rule - out->v);
1299 state = parsecond(line, trim);
1300 free(trim);
1301 (*i)++;
1302 if (parseblock(pre, i, &state.data.cond.thenpart, &last_rule, targets, mode) < 0)
1303 return -1;
1304 if (*i < pre->n) {
1305 struct PreLine *endline;
1306 char *endtrim;
1307
1308 endline = &pre->v[*i];
1309 endtrim = trimdup(endline->text, strlen(endline->text));
1310 if (haskw(endtrim, "else")) {
1311 const char *rest = endtrim + 4;
1312 while (*rest && isspace((unsigned char)*rest))
1313 rest++;
1314 if (*rest) {
1315 /* else-if form; rewrite the current line
1316 to drop else and recurse, the inner
1317 conditional's endif also closes us */
1318 char *rep = xstrndup(rest, strlen(rest));
1319 free(endline->text);
1320 endline->text = rep;
1321 free(endtrim);
1322 if (parseblock(pre, i, &state.data.cond.elsepart, &last_rule, targets, mode) < 0)
1323 return -1;
1324 state.loc.line1 = endline->line1;
1325 addnode(out, state);
1326 if (last_rule_in_out)
1327 last_rule = &out->v[last_rule_idx];
1328 continue;
1329 }
1330 free(endtrim);
1331 (*i)++;
1332 if (parseblock(pre, i, &state.data.cond.elsepart, &last_rule, targets, mode) < 0)
1333 return -1;
1334 if (*i >= pre->n) {
1335 parseerr(&pre->v[pre->n - 1], "missing 'endif'", 0);
1336 return -1;
1337 }
1338 endline = &pre->v[*i];
1339 endtrim = trimdup(endline->text, strlen(endline->text));
1340 if (haskw(endtrim, "else")) {
1341 free(endtrim);
1342 parseerr(endline, "only one 'else' per conditional", 0);
1343 return -1;
1344 }
1345 }
1346 if (!haskw(endtrim, "endif")) {
1347 free(endtrim);
1348 parseerr(endline, "missing 'endif'", 0);
1349 return -1;
1350 }
1351 state.loc.line1 = endline->line1;
1352 free(endtrim);
1353 (*i)++;
1354 } else {
1355 parseerr(&pre->v[pre->n - 1], "missing 'endif'", 0);
1356 return -1;
1357 }
1358 addnode(out, state);
1359 if (last_rule_in_out)
1360 last_rule = &out->v[last_rule_idx];
1361 continue;
1362 }
1363 free(trim);
1364
1365 state = parseline(line, targets, mode);
1366 if (deadlikemake)
1367 return -1;
1368 if (state.kind == NODE_ASSIGN)
1369 updatespecialassign(targets, state.data.assign.lhs, state.data.assign.rhs);
1370 addnode(out, state);
1371 if (state.kind == NODE_RULE)
1372 last_rule = &out->v[out->n - 1];
1373 else if (state.kind != NODE_BLANK && state.kind != NODE_COMMENT)
1374 last_rule = 0;
1375 (*i)++;
1376 }
1377 if (last_rulep)
1378 *last_rulep = last_rule;
1379 return 0;
1380}
1381
1382int
1383buildast(const char *path, const struct Pre *pre, struct Ast *ast, enum ShinMode mode)
1384{
1385 size_t i;
1386 struct SpecialTargets targets;
1387 int rc;
1388
1389 (void)path;
1390 memset(ast, 0, sizeof(*ast));
1391 arena_init(&ast->arena, 0);
1392 g_ast_arena = &ast->arena;
1393 parseerrs = 0;
1394 deadlikemake = 0;
1395 initspecialtargets(&targets);
1396 i = 0;
1397 rc = parseblock(pre, &i, (struct NodeList *)ast, 0, &targets, mode);
1398 g_ast_arena = 0;
1399 if (rc < 0)
1400 return deadlikemake ? -2 : -1;
1401 if (i < pre->n) {
1402 char *trim;
1403
1404 trim = trimdup(pre->v[i].text, strlen(pre->v[i].text));
1405 if (haskw(trim, "endif"))
1406 parseerr(&pre->v[i], "extraneous 'endif'", 0);
1407 else if (haskw(trim, "else"))
1408 parseerr(&pre->v[i], "extraneous 'else'", 0);
1409 free(trim);
1410 }
1411 if (parseerrs)
1412 return deadlikemake ? -2 : -1;
1413
1414 return 0;
1415}
1416
1417/* preprocess and parse */
1418int
1419parse(const char *path, const char *src, struct Ast *ast, enum ShinMode mode)
1420{
1421 struct Pre pre;
1422 struct Inc inc;
1423 int rc;
1424
1425 memset(&pre, 0, sizeof(pre));
1426 memset(&inc, 0, sizeof(inc));
1427 rc = preprocfile(path, src, &pre, &inc, mode);
1428 free(inc.v);
1429 if (rc < 0) {
1430 freepre(&pre);
1431 return rc;
1432 }
1433 rc = buildast(path, &pre, ast, mode);
1434 freepre(&pre);
1435 return rc;
1436}