main shinobi / src / parse.c
   1#include "shinobi.h"
   2#include "internal.h"
   3
   4#include <ctype.h>
   5#include <stddef.h>
   6#include <stdio.h>
   7#include <stdlib.h>
   8#include <string.h>
   9
  10/*
  11 * parse owns the first bit of the pipeline:
  12 *   preproc reads file, joins \ newlines, strips comments,
  13 *   and resolves includes, passes that to buildast.
  14 *   buildast parses that gnu make syntax into nodes.
  15 *
  16 * this is only abt syntax. we dont eval variables or
  17 * execute conditional branches.
  18 */
  19
  20struct AssignScan {
  21	size_t pos;
  22	size_t len;
  23	enum AssignOp op;
  24	int ok;
  25};
  26
  27struct Inc {
  28	char **v;
  29	size_t n;
  30};
  31
  32static int parseerrs;
  33static int deadlikemake;
  34static struct Arena *g_ast_arena;
  35
  36static int preprocfile(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
  37                       enum ShinMode mode);
  38static int preprocfile0(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
  39                        struct SpecialTargets *targets, enum ShinMode mode);
  40static int preprocinclude(const char *dir, const char *incarg, struct Pre *pre, struct Inc *inc,
  41                          struct SpecialTargets *targets, enum ShinMode mode);
  42
  43static char *
  44astdup(const char *s, size_t n)
  45{
  46	return arena_strndup(g_ast_arena, s, n);
  47}
  48
  49static char *
  50atrimdup(const char *s, size_t n)
  51{
  52	size_t i, j;
  53
  54	for (i = 0; i < n && isspace((unsigned char)s[i]); i++)
  55		;
  56	for (j = n; j > i && isspace((unsigned char)s[j - 1]); j--)
  57		;
  58	return astdup(s + i, j - i);
  59}
  60
  61static void
  62astsplitwords(struct StrList *out, const char *s, size_t n);
  63
  64static void
  65addrecipe_ast(struct RecipeList *dest, const char *raw);
  66
  67static void
  68addwords_ast(struct StrList *dest, const struct StrList *src)
  69{
  70	size_t i;
  71
  72	if (dest->n + src->n > dest->cap) {
  73		dest->cap = dest->n + src->n;
  74		dest->v = xrealloc(dest->v, dest->cap * sizeof(dest->v[0]));
  75	}
  76	for (i = 0; i < src->n; i++)
  77		dest->v[dest->n++] = src->v[i];
  78}
  79
  80static void
  81parseerr(const struct PreLine *line, const char *msg, const char *detail)
  82{
  83	dielikemake(line->path, line->line0, msg, detail);
  84	deadlikemake = 1;
  85	parseerrs++;
  86}
  87
  88
  89static const char *const unsupported_kws[] = {
  90    "undefine", "vpath", "private", "load", 0};
  91
  92static char *
  93trimdup(const char *s, size_t n)
  94{
  95	size_t i, j;
  96
  97	for (i = 0; i < n && isspace((unsigned char)s[i]); i++)
  98		;
  99	for (j = n; j > i && isspace((unsigned char)s[j - 1]); j--)
 100		;
 101	return xstrndup(s + i, j - i);
 102}
 103
 104static int
 105haskw(const char *s, const char *kw)
 106{
 107	size_t n;
 108
 109	n = strlen(kw);
 110	if (strncmp(s, kw, n) != 0)
 111		return 0;
 112	return s[n] == 0 || isspace((unsigned char)s[n]);
 113}
 114
 115static int
 116iscondkw(const char *s)
 117{
 118	return haskw(s, "ifeq") || haskw(s, "ifneq") ||
 119	       haskw(s, "ifdef") || haskw(s, "ifndef") ||
 120	       haskw(s, "else") || haskw(s, "endif");
 121}
 122
 123static int
 124hasws(const char *s)
 125{
 126	size_t i;
 127
 128	for (i = 0; s[i]; i++) {
 129		if (isspace((unsigned char)s[i]))
 130			return 1;
 131	}
 132	return 0;
 133}
 134
 135static int
 136hascont(const char *s, size_t n)
 137{
 138	size_t i, count;
 139
 140	if (!n)
 141		return 0;
 142	for (i = n; i > 0 && (s[i - 1] == '\n' || s[i - 1] == '\r'); i--)
 143		;
 144	if (!i)
 145		return 0;
 146	count = 0;
 147	while (i > 0 && s[i - 1] == '\\') {
 148		count++;
 149		i--;
 150	}
 151	return (count & 1) != 0;
 152}
 153
 154static int
 155iscommentline(const char *s, size_t n)
 156{
 157	size_t i;
 158
 159	for (i = 0; i < n && isspace((unsigned char)s[i]); i++)
 160		;
 161	return i < n && s[i] == '#';
 162}
 163
 164static int
 165skipref(const char *s, size_t n, size_t *i)
 166{
 167	size_t j, depth;
 168	char open, close;
 169
 170	j = *i;
 171	if (j + 1 >= n || s[j] != '$')
 172		return 0;
 173	open = s[j + 1];
 174	if (open != '(' && open != '{') {
 175		*i = j + 1;
 176		return 1;
 177	}
 178	close = open == '(' ? ')' : '}';
 179	depth = 1;
 180	j += 2;
 181	while (j < n && depth) {
 182		if (s[j] == '$' && j + 1 < n && (s[j + 1] == '(' || s[j + 1] == '{')) {
 183			depth++;
 184			j += 2;
 185			continue;
 186		}
 187		if (s[j] == close)
 188			depth--;
 189		j++;
 190	}
 191	*i = j;
 192	return 1;
 193}
 194
 195static void
 196stripcomment(char *s)
 197{
 198	size_t i, n, first;
 199
 200	n = strlen(s);
 201	for (first = 0; first < n && isspace((unsigned char)s[first]); first++)
 202		;
 203	if (first < n && s[first] == '#')
 204		return;
 205	for (i = 0; i < n; i++) {
 206		if (s[i] == '$' && skipref(s, n, &i)) {
 207			i--;
 208			continue;
 209		}
 210		if (s[i] == '#') {
 211			size_t cut;
 212
 213			s[i] = 0;
 214			cut = strlen(s);
 215			while (cut > 0 && isspace((unsigned char)s[cut - 1]))
 216				cut--;
 217			s[cut] = 0;
 218			return;
 219		}
 220	}
 221}
 222
 223static struct AssignScan
 224findassign(const char *s, size_t n, size_t start)
 225{
 226	size_t i;
 227	struct AssignScan out;
 228
 229	memset(&out, 0, sizeof(out));
 230	for (i = start; i < n; i++) {
 231		if (s[i] == '$' && skipref(s, n, &i)) {
 232			i--;
 233			continue;
 234		}
 235		if (i + 3 < n && s[i] == ':' && s[i + 1] == ':' && s[i + 2] == ':' && s[i + 3] == '=') {
 236			out.pos = i;
 237			out.len = 4;
 238			out.op = ASSIGN_COLON3_EQ;
 239			out.ok = 1;
 240			return out;
 241		}
 242		if (i + 2 < n && s[i] == ':' && s[i + 1] == ':' && s[i + 2] == '=') {
 243			out.pos = i;
 244			out.len = 3;
 245			out.op = ASSIGN_DCOLON_EQ;
 246			out.ok = 1;
 247			return out;
 248		}
 249		if (i + 1 < n && s[i] == '+' && s[i + 1] == '=') {
 250			out.pos = i;
 251			out.len = 2;
 252			out.op = ASSIGN_PLUS_EQ;
 253			out.ok = 1;
 254			return out;
 255		}
 256		if (i + 1 < n && s[i] == ':' && s[i + 1] == '=') {
 257			out.pos = i;
 258			out.len = 2;
 259			out.op = ASSIGN_COLON_EQ;
 260			out.ok = 1;
 261			return out;
 262		}
 263		if (i + 1 < n && s[i] == '?' && s[i + 1] == '=') {
 264			out.pos = i;
 265			out.len = 2;
 266			out.op = ASSIGN_QMARK_EQ;
 267			out.ok = 1;
 268			return out;
 269		}
 270		if (i + 1 < n && s[i] == '!' && s[i + 1] == '=') {
 271			out.pos = i;
 272			out.len = 2;
 273			out.op = ASSIGN_BANG_EQ;
 274			out.ok = 1;
 275			return out;
 276		}
 277		if (s[i] == '=') {
 278			out.pos = i;
 279			out.len = 1;
 280			out.op = ASSIGN_EQ;
 281			out.ok = 1;
 282			return out;
 283		}
 284	}
 285	return out;
 286}
 287
 288static ptrdiff_t
 289findtop(const char *s, size_t n, char want)
 290{
 291	size_t i;
 292
 293	for (i = 0; i < n; i++) {
 294		if (s[i] == '$' && skipref(s, n, &i)) {
 295			i--;
 296			continue;
 297		}
 298		if (s[i] == want)
 299			return (ptrdiff_t)i;
 300	}
 301	return -1;
 302}
 303
 304void
 305splitwords(struct StrList *out, const char *s, size_t n)
 306{
 307	size_t i, j, start;
 308
 309	for (i = 0; i < n;) {
 310		while (i < n && isspace((unsigned char)s[i]))
 311			i++;
 312		if (i >= n)
 313			break;
 314		start = i;
 315		j = i;
 316		while (j < n) {
 317			if (s[j] == '$' && skipref(s, n, &j))
 318				continue;
 319			if (isspace((unsigned char)s[j]))
 320				break;
 321			j++;
 322		}
 323		if (out->n >= out->cap) {
 324			out->cap = out->cap ? out->cap * 2 : 4;
 325			out->v = xrealloc(out->v, out->cap * sizeof(out->v[0]));
 326		}
 327		out->v[out->n++] = xstrndup(s + start, j - start);
 328		i = j;
 329	}
 330}
 331
 332static void
 333astsplitwords(struct StrList *out, const char *s, size_t n)
 334{
 335	size_t i, j, start;
 336
 337	for (i = 0; i < n;) {
 338		while (i < n && isspace((unsigned char)s[i]))
 339			i++;
 340		if (i >= n)
 341			break;
 342		start = i;
 343		j = i;
 344		while (j < n) {
 345			if (s[j] == '$' && skipref(s, n, &j))
 346				continue;
 347			if (isspace((unsigned char)s[j]))
 348				break;
 349			j++;
 350		}
 351		if (out->n >= out->cap) {
 352			out->cap = out->cap ? out->cap * 2 : 4;
 353			out->v = xrealloc(out->v, out->cap * sizeof(out->v[0]));
 354		}
 355		out->v[out->n++] = astdup(s + start, j - start);
 356		i = j;
 357	}
 358}
 359
 360static void
 361addrecipe_ast(struct RecipeList *dest, const char *raw)
 362{
 363	struct Recipe *r;
 364	const char *s;
 365	size_t n;
 366
 367	s = raw;
 368	while (*s == ' ' || *s == '\t')
 369		s++;
 370	if (dest->n >= dest->cap) {
 371		dest->cap = dest->cap ? dest->cap * 2 : 4;
 372		dest->v = xrealloc(dest->v, dest->cap * sizeof(dest->v[0]));
 373	}
 374	r = &dest->v[dest->n++];
 375	memset(r, 0, sizeof(*r));
 376	while (*s == '@' || *s == '+' || *s == '-') {
 377		if (*s == '@')
 378			r->silent = 1;
 379		else if (*s == '+')
 380			r->recursive = 1;
 381		else if (*s == '-')
 382			r->ignore = 1;
 383		s++;
 384		while (*s == ' ' || *s == '\t')
 385			s++;
 386	}
 387	n = strlen(s);
 388	while (n > 0 && isspace((unsigned char)s[n - 1]))
 389		n--;
 390	r->body = astdup(s, n);
 391	r->submake = parsesubmake(&r->sm, r->body);
 392}
 393
 394static int
 395readline(const char **src, int *lineno, struct PreLine *line, char recipeprefix)
 396{
 397	const char *p, *start;
 398	size_t cap, len, chunk;
 399	char *buf;
 400	int line0, line1;
 401	int comment_cont;
 402	int first;
 403
 404	p = *src;
 405	if (!*p)
 406		return 0;
 407
 408	buf = 0;
 409	cap = 0;
 410	len = 0;
 411	line0 = *lineno;
 412	line1 = *lineno;
 413	comment_cont = 0;
 414	first = 1;
 415	line->isrecipe = 0;
 416	line->recipeprefix = 0;
 417
 418	for (;;) {
 419		start = p;
 420		while (*p && *p != '\n')
 421			p++;
 422		chunk = (size_t)(p - start);
 423		if (*p == '\n')
 424			p++;
 425
 426		if (first && chunk > 0 && start[0] == recipeprefix) {
 427			line->isrecipe = 1;
 428			line->recipeprefix = recipeprefix;
 429		}
 430		/* a line that starts as a comment stays a comment across
 431		   backslash line joins. this is a hack that bearssl uses
 432		   to hide microsoft nmake directives, so nmake sees them
 433		   and unix make doesn't, this is used to get nmake and make
 434		   to include different files depending on what is used. */
 435		if (first && !line->isrecipe && iscommentline(start, chunk))
 436			comment_cont = 1;
 437
 438		if (!(comment_cont && !first)) {
 439			if (len + chunk + 1 > cap) {
 440				cap = (len + chunk + 1) * 2;
 441				buf = xrealloc(buf, cap);
 442			}
 443			memcpy(buf + len, start, chunk);
 444			len += chunk;
 445			buf[len] = 0;
 446		}
 447
 448		if (!(comment_cont ? hascont(start, chunk) : hascont(buf, len)))
 449			break;
 450		if (!comment_cont) {
 451			len--;
 452			buf[len++] = ' ';
 453		}
 454		while (*p == ' ' || *p == '\t')
 455			p++;
 456		line1++;
 457		first = 0;
 458		if (!*p)
 459			break;
 460	}
 461
 462	*lineno = line1 + 1;
 463	*src = p;
 464	line->text = buf ? buf : xstrndup("", 0);
 465	line->line0 = line0;
 466	line->line1 = line1;
 467	return 1;
 468}
 469
 470static void
 471popinc(struct Inc *inc)
 472{
 473	if (!inc->n)
 474		return;
 475	free(inc->v[inc->n - 1]);
 476	inc->n--;
 477}
 478
 479static int
 480hasinc(struct Inc *inc, const char *path)
 481{
 482	size_t i;
 483
 484	for (i = 0; i < inc->n; i++) {
 485		if (strcmp(inc->v[i], path) == 0)
 486			return 1;
 487	}
 488	return 0;
 489}
 490
 491static int
 492isplainpath(const char *s)
 493{
 494	size_t i;
 495
 496	if (!*s)
 497		return 0;
 498	for (i = 0; s[i]; i++) {
 499		if (isspace((unsigned char)s[i]) || s[i] == '$')
 500			return 0;
 501	}
 502	return 1;
 503}
 504
 505static char *
 506dirpart(const char *path)
 507{
 508	const char *slash;
 509
 510	slash = strrchr(path, '/');
 511	if (!slash)
 512		return xstrdup(".");
 513	return xstrndup(path, (size_t)(slash - path));
 514}
 515
 516static int
 517preprocinclude(const char *dir, const char *incarg, struct Pre *pre, struct Inc *inc,
 518               struct SpecialTargets *targets, enum ShinMode mode)
 519{
 520	int rc;
 521
 522	rc = preprocfile0(incarg, 0, pre, inc, targets, mode);
 523	if (rc == 0)
 524		return 0;
 525	if (strcmp(dir, ".") == 0)
 526		return -1;
 527	{
 528		char *full;
 529
 530		full = joinpath(dir, incarg);
 531		rc = preprocfile0(full, 0, pre, inc, targets, mode);
 532		free(full);
 533		return rc;
 534	}
 535}
 536
 537static int
 538preprocfile0(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
 539             struct SpecialTargets *targets, enum ShinMode mode)
 540{
 541	char *src, *dir;
 542	const char *p;
 543	int lineno;
 544	struct PreLine line;
 545
 546	if (hasinc(inc, path)) {
 547		dielikemake(path, 0, "include cycle", path);
 548		return -1;
 549	}
 550	if (src_override) {
 551		src = xstrdup(src_override);
 552	} else {
 553		src = readfile(path);
 554		if (!src)
 555			return -1;
 556	}
 557	inc->v = xrealloc(inc->v, (inc->n + 1) * sizeof(inc->v[0]));
 558	inc->v[inc->n++] = xstrdup(path);
 559	dir = dirpart(path);
 560	p = src;
 561	lineno = 1;
 562	while (readline(&p, &lineno, &line, targets->recipeprefix)) {
 563		line.path = xstrdup(path);
 564		if (!line.isrecipe) {
 565			char *trim, *incarg;
 566			int opt;
 567
 568			stripcomment(line.text);
 569			trim = trimdup(line.text, strlen(line.text));
 570				if (haskw(trim, "include") || haskw(trim, "-include") || haskw(trim, "sinclude")) {
 571					int rc;
 572					size_t kwlen;
 573
 574					opt = haskw(trim, "-include") || haskw(trim, "sinclude");
 575					if (mode == MODE_POSIX_2008 && opt) {
 576						dielikemake(path, line.line0,
 577						            "optional includes are not valid in POSIX 2008", 0);
 578						free(trim);
 579						free(line.path);
 580						free(line.text);
 581						free(dir);
 582						free(src);
 583						popinc(inc);
 584						return -2;
 585					}
 586					if (mode == MODE_POSIX_2024 && haskw(trim, "sinclude")) {
 587						dielikemake(path, line.line0,
 588						            "'sinclude' is not valid in POSIX 2024; use '-include'", 0);
 589						free(trim);
 590						free(line.path);
 591						free(line.text);
 592						free(dir);
 593						free(src);
 594						popinc(inc);
 595						return -2;
 596					}
 597					kwlen = (haskw(trim, "-include") || haskw(trim, "sinclude")) ? 8 : 7;
 598				incarg = trimdup(trim + kwlen, strlen(trim + kwlen));
 599				if (isplainpath(incarg)) {
 600					rc = preprocinclude(dir, incarg, pre, inc, targets, mode);
 601					if (rc == 0) {
 602						free(incarg);
 603						free(line.path);
 604						free(line.text);
 605						continue;
 606					}
 607					/* keep unresolved includes in the stream so that eval time
 608					 * include handling can attempt to remake them from rules */
 609					if (opt) {
 610						free(incarg);
 611						free(line.path);
 612						free(line.text);
 613						continue;
 614					}
 615				}
 616				free(incarg);
 617			}
 618			{
 619				size_t n;
 620				ptrdiff_t colon;
 621				struct AssignScan as;
 622
 623				n = strlen(trim);
 624				colon = findtop(trim, n, ':');
 625				as = findassign(trim, n, 0);
 626				if (as.ok && (colon < 0 || as.pos <= (size_t)colon)) {
 627					char *lhs, *rhs;
 628
 629					lhs = trimdup(trim, as.pos);
 630					rhs = trimdup(trim + as.pos + as.len, n - as.pos - as.len);
 631					updatespecialassign(targets, lhs, rhs);
 632					free(lhs);
 633					free(rhs);
 634				}
 635			}
 636			free(trim);
 637		}
 638		pre->v = xrealloc(pre->v, (pre->n + 1) * sizeof(pre->v[0]));
 639		pre->v[pre->n++] = line;
 640	}
 641	free(dir);
 642	free(src);
 643	popinc(inc);
 644	return 0;
 645}
 646
 647static int
 648preprocfile(const char *path, const char *src_override, struct Pre *pre, struct Inc *inc,
 649            enum ShinMode mode)
 650{
 651	struct SpecialTargets targets;
 652
 653	initspecialtargets(&targets);
 654	return preprocfile0(path, src_override, pre, inc, &targets, mode);
 655}
 656
 657int
 658preproc(const char *path, struct Pre *pre, enum ShinMode mode)
 659{
 660	struct Inc inc;
 661
 662	memset(pre, 0, sizeof(*pre));
 663	memset(&inc, 0, sizeof(inc));
 664	if (preprocfile(path, 0, pre, &inc, mode) < 0) {
 665		free(inc.v);
 666		freepre(pre);
 667		return -1;
 668	}
 669	free(inc.v);
 670	return 0;
 671}
 672
 673void
 674freepre(struct Pre *pre)
 675{
 676	size_t i;
 677
 678	for (i = 0; i < pre->n; i++)
 679		free(pre->v[i].path);
 680	for (i = 0; i < pre->n; i++)
 681		free(pre->v[i].text);
 682	free(pre->v);
 683	pre->v = 0;
 684	pre->n = 0;
 685}
 686
 687static struct Node
 688parseinclude(const struct PreLine *line, const char *s, enum ShinMode mode)
 689{
 690	struct Node state;
 691	size_t off;
 692	size_t i, nwords;
 693	const char *p;
 694
 695	memset(&state, 0, sizeof(state));
 696	state.kind = NODE_INCLUDE;
 697	state.loc.line0 = line->line0;
 698	state.loc.line1 = line->line1;
 699
 700	if (haskw(s, "-include")) {
 701		state.data.include.optional = 1;
 702		off = strlen("-include");
 703	} else if (haskw(s, "sinclude")) {
 704		state.data.include.optional = 1;
 705		state.data.include.sinclude = 1;
 706		off = strlen("sinclude");
 707	} else {
 708		off = strlen("include");
 709	}
 710	state.data.include.path = atrimdup(s + off, strlen(s + off));
 711	if (mode == MODE_POSIX_2008) {
 712		p = state.data.include.path;
 713		nwords = 0;
 714		for (i = 0; p[i];) {
 715			while (p[i] && isspace((unsigned char)p[i]))
 716				i++;
 717			if (!p[i])
 718				break;
 719			nwords++;
 720			while (p[i] && !isspace((unsigned char)p[i]))
 721				i++;
 722		}
 723		if (nwords != 1) {
 724			memset(&state, 0, sizeof(state));
 725			state.kind = NODE_BLANK;
 726			state.loc.line0 = line->line0;
 727			state.loc.line1 = line->line1;
 728			parseerr(line, "include in POSIX 2008 must specify exactly one file", 0);
 729			return state;
 730		}
 731	}
 732	return state;
 733}
 734
 735static struct Node
 736parsecond(const struct PreLine *line, const char *s)
 737{
 738	struct Node state;
 739	const char *p;
 740	size_t n, mid;
 741
 742	memset(&state, 0, sizeof(state));
 743	state.kind = NODE_COND;
 744	state.loc.line0 = line->line0;
 745	state.loc.line1 = line->line1;
 746	state.data.cond.raw = astdup(s, strlen(s));
 747
 748	if (haskw(s, "ifeq")) {
 749		state.data.cond.kind = COND_IFEQ;
 750		p = s + 4;
 751	} else if (haskw(s, "ifneq")) {
 752		state.data.cond.kind = COND_IFNEQ;
 753		p = s + 5;
 754	} else if (haskw(s, "ifdef")) {
 755		state.data.cond.kind = COND_IFDEF;
 756		p = s + 5;
 757	} else if (haskw(s, "ifndef")) {
 758		state.data.cond.kind = COND_IFNDEF;
 759		p = s + 6;
 760	} else {
 761		state.data.cond.kind = COND_ENDIF;
 762		return state;
 763	}
 764
 765	while (*p && isspace((unsigned char)*p))
 766		p++;
 767	n = strlen(p);
 768	if (!n) {
 769		state.data.cond.arg1 = astdup("", 0);
 770		state.data.cond.arg2 = astdup("", 0);
 771		return state;
 772	}
 773	if (state.data.cond.kind == COND_IFDEF || state.data.cond.kind == COND_IFNDEF) {
 774		state.data.cond.arg1 = astdup(p, n);
 775		state.data.cond.arg2 = astdup("", 0);
 776		return state;
 777	}
 778	if (p[0] == '(' && p[n - 1] == ')') {
 779		mid = (size_t)(findtop(p + 1, n - 2, ','));
 780		if (mid != (size_t)-1) {
 781			state.data.cond.arg1 = atrimdup(p + 1, mid);
 782			state.data.cond.arg2 = atrimdup(p + 2 + mid, n - 3 - mid);
 783		} else {
 784			parseerr(line, "malformed ifeq/ifneq arguments", p);
 785			state.data.cond.arg1 = astdup("", 0);
 786			state.data.cond.arg2 = astdup("", 0);
 787		}
 788	} else if (p[0] == '"' || p[0] == '\'') {
 789		size_t e1, s2, e2;
 790		char q1 = p[0], q2;
 791		e1 = 1;
 792		while (e1 < n && p[e1] != q1)
 793			e1++;
 794		if (e1 >= n) {
 795			parseerr(line, "malformed ifeq/ifneq arguments", p);
 796			state.data.cond.arg1 = astdup("", 0);
 797			state.data.cond.arg2 = astdup("", 0);
 798			return state;
 799		}
 800		s2 = e1 + 1;
 801		while (s2 < n && isspace((unsigned char)p[s2]))
 802			s2++;
 803		if (s2 >= n || (p[s2] != '"' && p[s2] != '\'')) {
 804			parseerr(line, "malformed ifeq/ifneq arguments", p);
 805			state.data.cond.arg1 = astdup("", 0);
 806			state.data.cond.arg2 = astdup("", 0);
 807			return state;
 808		}
 809		q2 = p[s2];
 810		e2 = s2 + 1;
 811		while (e2 < n && p[e2] != q2)
 812			e2++;
 813		if (e2 >= n || e2 != n - 1) {
 814			parseerr(line, "malformed ifeq/ifneq arguments", p);
 815			state.data.cond.arg1 = astdup("", 0);
 816			state.data.cond.arg2 = astdup("", 0);
 817			return state;
 818		}
 819		state.data.cond.arg1 = astdup(p + 1, e1 - 1);
 820		state.data.cond.arg2 = astdup(p + s2 + 1, e2 - s2 - 1);
 821	} else {
 822		parseerr(line, "malformed ifeq/ifneq arguments", p);
 823		state.data.cond.arg1 = astdup("", 0);
 824		state.data.cond.arg2 = astdup("", 0);
 825	}
 826	return state;
 827}
 828
 829static struct Node
 830parseassign(const struct PreLine *line, const char *s, size_t n, size_t base, struct AssignScan as, int tspec, size_t tend)
 831{
 832	struct Node state;
 833
 834	memset(&state, 0, sizeof(state));
 835	state.kind = NODE_ASSIGN;
 836	state.loc.line0 = line->line0;
 837	state.loc.line1 = line->line1;
 838	state.data.assign.op = as.op;
 839	state.data.assign.exported = 0;
 840	state.data.assign.tspec = tspec;
 841	state.data.assign.lhs = atrimdup(s + base, as.pos - base);
 842	state.data.assign.rhs = atrimdup(s + as.pos + as.len, n - as.pos - as.len);
 843	if (tspec)
 844		astsplitwords(&state.data.assign.targets, s, tend);
 845	return state;
 846}
 847
 848static struct Node
 849parserule(const struct PreLine *line, const char *s, size_t n, size_t colon, int dcolon, enum ShinMode mode)
 850{
 851	struct Node state;
 852	const char *rhs;
 853	size_t off;
 854	size_t rhsn, split, semi;
 855	ptrdiff_t patcolon;
 856	char *recipe;
 857
 858	memset(&state, 0, sizeof(state));
 859	state.kind = NODE_RULE;
 860	state.loc.line0 = line->line0;
 861	state.loc.line1 = line->line1;
 862	state.data.rule.dcolon = dcolon;
 863
 864	off = dcolon ? 2 : 1;
 865	rhs = s + colon + off;
 866	rhsn = n - colon - off;
 867	semi = (size_t)findtop(rhs, rhsn, ';');
 868	if (semi != (size_t)-1) {
 869		recipe = trimdup(rhs + semi + 1, rhsn - semi - 1);
 870		if (recipe[0]) {
 871			addrecipe_ast(&state.data.rule.recipes, recipe);
 872			free(recipe);
 873		} else {
 874			free(recipe);
 875		}
 876		rhsn = semi;
 877	}
 878	astsplitwords(&state.data.rule.targets, s, colon);
 879	patcolon = -1;
 880	if (mode == MODE_GNU && !dcolon)
 881		patcolon = findtop(rhs, rhsn, ':');
 882	if (patcolon >= 0) {
 883		state.data.rule.target_pattern = atrimdup(rhs, (size_t)patcolon);
 884		rhs += (size_t)patcolon + 1;
 885		rhsn -= (size_t)patcolon + 1;
 886	}
 887	split = (size_t)findtop(rhs, rhsn, '|');
 888	if (split != (size_t)-1) {
 889		astsplitwords(&state.data.rule.prereqs, rhs, split);
 890		astsplitwords(&state.data.rule.order_only, rhs + split + 1, rhsn - split - 1);
 891	} else {
 892		astsplitwords(&state.data.rule.prereqs, rhs, rhsn);
 893	}
 894	return state;
 895}
 896
 897static struct Node
 898parseexpr(const struct PreLine *line, const char *s)
 899{
 900	struct Node state;
 901
 902	memset(&state, 0, sizeof(state));
 903	state.kind = NODE_RAW;
 904	state.loc.line0 = line->line0;
 905	state.loc.line1 = line->line1;
 906	state.data.raw.text = astdup(s, strlen(s));
 907	return state;
 908}
 909
 910static struct Node
 911parseexport(const struct PreLine *line, const char *s, int exported)
 912{
 913	struct Node state;
 914
 915	memset(&state, 0, sizeof(state));
 916	state.kind = NODE_EXPORT;
 917	state.loc.line0 = line->line0;
 918	state.loc.line1 = line->line1;
 919	state.data.export.exported = exported;
 920	state.data.export.all = *s == 0;
 921	if (!state.data.export.all)
 922		astsplitwords(&state.data.export.names, s, strlen(s));
 923	return state;
 924}
 925
 926/* unclassified line, treat as unsupported syntax */
 927static struct Node
 928parseraw(const struct PreLine *line, const char *s, char recipeprefix)
 929{
 930	struct Node state;
 931
 932	(void)s;
 933	if (recipeprefix == '\t' && strncmp(line->text, "        ", 8) == 0)
 934		parseerr(line, "missing separator (did you mean TAB instead of 8 spaces?)", 0);
 935	else
 936		parseerr(line, "missing separator", 0);
 937	memset(&state, 0, sizeof(state));
 938	state.kind = NODE_BLANK;
 939	state.loc.line0 = line->line0;
 940	state.loc.line1 = line->line1;
 941	return state;
 942}
 943
 944static struct Node
 945blanknode(const struct PreLine *line)
 946{
 947	struct Node state;
 948
 949	memset(&state, 0, sizeof(state));
 950	state.kind = NODE_BLANK;
 951	state.loc.line0 = line->line0;
 952	state.loc.line1 = line->line1;
 953	return state;
 954}
 955
 956static int
 957ruleinlist(const struct NodeList *out, const struct Node *rule)
 958{
 959	size_t i;
 960
 961	for (i = 0; i < out->n; i++) {
 962		if (&out->v[i] == rule)
 963			return 1;
 964	}
 965	return 0;
 966}
 967
 968static struct Node *
 969branchrule(struct NodeList *out, const struct Node *src, const struct PreLine *line)
 970{
 971	struct Node state;
 972
 973	memset(&state, 0, sizeof(state));
 974	state.kind = NODE_RULE;
 975	state.loc.line0 = line->line0;
 976	state.loc.line1 = line->line1;
 977	state.data.rule.dcolon = src->data.rule.dcolon;
 978	addwords_ast(&state.data.rule.targets, &src->data.rule.targets);
 979	if (src->data.rule.target_pattern)
 980		state.data.rule.target_pattern = astdup(src->data.rule.target_pattern,
 981		                                        strlen(src->data.rule.target_pattern));
 982	addnode(out, state);
 983	return &out->v[out->n - 1];
 984}
 985
 986static int
 987parsedefine(const struct Pre *pre, size_t *i, struct Node *out)
 988{
 989	struct PreLine *line;
 990	char *trim, *name;
 991	size_t bodycap, bodylen;
 992	int depth;
 993	char *body;
 994
 995	line = &pre->v[*i];
 996	trim = trimdup(line->text, strlen(line->text));
 997	name = atrimdup(trim + strlen("define"), strlen(trim + strlen("define")));
 998	free(trim);
 999
1000	memset(out, 0, sizeof(*out));
1001	out->kind = NODE_ASSIGN;
1002	out->loc.line0 = line->line0;
1003	out->loc.line1 = line->line1;
1004	out->data.assign.lhs = name;
1005	out->data.assign.op = ASSIGN_EQ;
1006	out->data.assign.origin = ORIGIN_FILE;
1007	out->data.assign.define_block = 1;
1008
1009	bodycap = 64;
1010	bodylen = 0;
1011	body = xmalloc(bodycap);
1012	body[0] = 0;
1013	depth = 1;
1014	(*i)++;
1015	while (*i < pre->n) {
1016		struct PreLine *cur;
1017		char *curtrim;
1018		size_t n;
1019
1020		cur = &pre->v[*i];
1021		curtrim = trimdup(cur->text, strlen(cur->text));
1022		if (haskw(curtrim, "define")) {
1023			depth++;
1024			free(curtrim);
1025		} else if (haskw(curtrim, "endef")) {
1026			depth--;
1027			free(curtrim);
1028			if (depth == 0) {
1029				out->loc.line1 = cur->line1;
1030				(*i)++;
1031				out->data.assign.rhs = astdup(body, bodylen);
1032				free(body);
1033				return 0;
1034			}
1035		} else {
1036			free(curtrim);
1037		}
1038
1039		n = strlen(cur->text);
1040		if (bodylen + n + 2 > bodycap) {
1041			while (bodycap < bodylen + n + 2)
1042				bodycap *= 2;
1043			body = xrealloc(body, bodycap);
1044		}
1045		memcpy(body + bodylen, cur->text, n);
1046		bodylen += n;
1047		body[bodylen++] = '\n';
1048		body[bodylen] = 0;
1049		out->loc.line1 = cur->line1;
1050		(*i)++;
1051	}
1052
1053	parseerr(&pre->v[*i > 0 ? *i - 1 : 0], "unterminated 'define'", 0);
1054	free(body);
1055	return -1;
1056}
1057
1058static struct Node
1059parseline(const struct PreLine *line, const struct SpecialTargets *targets, enum ShinMode mode)
1060{
1061	struct Node state;
1062	struct AssignScan as;
1063	char *trim;
1064	int dcolon;
1065	int is_override;
1066	int is_export;
1067	int is_unexport;
1068	size_t n;
1069	ptrdiff_t colon;
1070
1071	trim = trimdup(line->text, strlen(line->text));
1072	is_override = 0;
1073	is_export = 0;
1074	is_unexport = 0;
1075	if (haskw(trim, "override")) {
1076		char *rest;
1077
1078		rest = trimdup(trim + 8, strlen(trim + 8));
1079		free(trim);
1080		trim = rest;
1081		is_override = 1;
1082	}
1083	if (haskw(trim, "export")) {
1084		char *rest;
1085
1086		rest = trimdup(trim + 6, strlen(trim + 6));
1087		free(trim);
1088		trim = rest;
1089		is_export = 1;
1090	}
1091	if (haskw(trim, "unexport")) {
1092		char *rest;
1093
1094		rest = trimdup(trim + 8, strlen(trim + 8));
1095		free(trim);
1096		trim = rest;
1097		is_unexport = 1;
1098	}
1099	n = strlen(trim);
1100
1101	if ((is_export || is_unexport) && n == 0) {
1102		state = parseexport(line, trim, is_export && !is_unexport);
1103		free(trim);
1104		return state;
1105	}
1106
1107	if (!n) {
1108		memset(&state, 0, sizeof(state));
1109		state.kind = NODE_BLANK;
1110		state.loc.line0 = line->line0;
1111		state.loc.line1 = line->line1;
1112		free(trim);
1113		return state;
1114	}
1115	if (trim[0] == '#') {
1116		memset(&state, 0, sizeof(state));
1117		state.kind = NODE_COMMENT;
1118		state.loc.line0 = line->line0;
1119		state.loc.line1 = line->line1;
1120		state.data.raw.text = astdup(trim, strlen(trim));
1121		free(trim);
1122		return state;
1123	}
1124	if (strncmp(trim, "ifeq(", 5) == 0 || strncmp(trim, "ifneq(", 6) == 0) {
1125		parseerr(line, "missing separator (ifeq/ifneq must be followed by whitespace)", 0);
1126		free(trim);
1127		return blanknode(line);
1128	}
1129	if (haskw(trim, "ifeq") || haskw(trim, "ifneq") ||
1130	    haskw(trim, "ifdef") || haskw(trim, "ifndef") ||
1131	    haskw(trim, "else") || haskw(trim, "endif")) {
1132		state = parsecond(line, trim);
1133		free(trim);
1134		return state;
1135	}
1136	if (haskw(trim, "include") || haskw(trim, "-include") || haskw(trim, "sinclude")) {
1137		state = parseinclude(line, trim, mode);
1138		free(trim);
1139		return state;
1140	}
1141	{
1142		size_t k;
1143
1144		for (k = 0; unsupported_kws[k]; k++) {
1145			if (haskw(trim, unsupported_kws[k])) {
1146				parseerr(line, "directive", unsupported_kws[k]);
1147				state = blanknode(line);
1148				free(trim);
1149				return state;
1150			}
1151		}
1152	}
1153
1154	colon = findtop(trim, n, ':');
1155	dcolon = colon >= 0 && (size_t)colon + 1 < n && trim[colon + 1] == ':';
1156	as = findassign(trim, n, 0);
1157	if (colon >= 0 && as.ok && (size_t)colon < as.pos) {
1158		/* some inline rule like 'all: ; @echo hi' */
1159		size_t off = dcolon ? 2 : 1;
1160		size_t base = (size_t)colon + off;
1161		int assign_override = is_override;
1162		ptrdiff_t semi = findtop(trim + colon + off, as.pos - (size_t)colon - off, ';');
1163
1164		while (base < n && isspace((unsigned char)trim[base]))
1165			base++;
1166		if (haskw(trim + base, "override")) {
1167			base += 8;
1168			while (base < n && isspace((unsigned char)trim[base]))
1169				base++;
1170			assign_override = 1;
1171		}
1172		if (semi < 0) {
1173			state = parseassign(line, trim, n, base, as, 1, (size_t)colon);
1174			if (assign_override)
1175				state.data.assign.origin = ORIGIN_OVERRIDE;
1176			if (is_export)
1177				state.data.assign.exported = 1;
1178			if (is_unexport)
1179				state.data.assign.exported = -1;
1180			free(trim);
1181			return state;
1182		}
1183	}
1184	if (as.ok && (colon < 0 || as.pos <= (size_t)colon)) {
1185		char *lhs;
1186
1187		lhs = trimdup(trim, as.pos);
1188		if (hasws(lhs)) {
1189			free(lhs);
1190			parseerr(line, "missing separator", 0);
1191			free(trim);
1192			return blanknode(line);
1193		}
1194		free(lhs);
1195		state = parseassign(line, trim, n, 0, as, 0, 0);
1196		if (is_override)
1197			state.data.assign.origin = ORIGIN_OVERRIDE;
1198		if (is_export)
1199			state.data.assign.exported = 1;
1200		if (is_unexport)
1201			state.data.assign.exported = -1;
1202		free(trim);
1203		return state;
1204	}
1205	if (is_export || is_unexport) {
1206		state = parseexport(line, trim, is_export && !is_unexport);
1207		free(trim);
1208		return state;
1209	}
1210	if (colon >= 0) {
1211		state = parserule(line, trim, n, (size_t)colon, dcolon, mode);
1212		free(trim);
1213		return state;
1214	}
1215	if (trim[0] == '$' && (trim[1] == '(' || trim[1] == '{')) {
1216		state = parseexpr(line, trim);
1217		free(trim);
1218		return state;
1219	}
1220	state = parseraw(line, trim, targets->recipeprefix);
1221	free(trim);
1222	return state;
1223}
1224
1225static int
1226parseblock(const struct Pre *pre, size_t *i, struct NodeList *out, struct Node **last_rulep,
1227           struct SpecialTargets *targets, enum ShinMode mode)
1228{
1229	struct Node state;
1230	struct Node *last_rule;
1231
1232	last_rule = last_rulep ? *last_rulep : 0;
1233	while (*i < pre->n) {
1234		struct PreLine *line;
1235		char *trim;
1236
1237		line = &pre->v[*i];
1238		if (line->isrecipe) {
1239			char *rt;
1240			const char *tabwarn;
1241
1242			rt = trimdup(line->text + 1, strlen(line->text + 1));
1243			tabwarn = 0;
1244			if (iscondkw(rt)) {
1245				tabwarn = "conditional directive lines cannot start with TAB";
1246			} else if (haskw(rt, "-include")) {
1247				tabwarn = "-include lines cannot start with TAB";
1248			} else if (haskw(rt, "sinclude")) {
1249				tabwarn = "sinclude lines cannot start with TAB";
1250			} else if (haskw(rt, "include")) {
1251				tabwarn = "include lines cannot start with TAB";
1252			} else if (haskw(rt, "override") || haskw(rt, "export") ||
1253			           haskw(rt, "unexport") || haskw(rt, "define") ||
1254			           haskw(rt, "endef") || haskw(rt, "undefine") ||
1255			           haskw(rt, "vpath") || haskw(rt, "private") ||
1256			           haskw(rt, "load")) {
1257				tabwarn = "directive lines cannot start with TAB";
1258			}
1259			free(rt);
1260			if (tabwarn) {
1261				warnlikemake(line->path, line->line0, tabwarn);
1262				memmove(line->text, line->text + 1, strlen(line->text + 1) + 1);
1263				line->isrecipe = 0;
1264			}
1265		}
1266		if (line->isrecipe) {
1267			if (last_rule) {
1268				if (!ruleinlist(out, last_rule))
1269					last_rule = branchrule(out, last_rule, line);
1270				addrecipe_ast(&last_rule->data.rule.recipes, line->text + 1);
1271				last_rule->loc.line1 = line->line1;
1272				(*i)++;
1273				continue;
1274			}
1275			memmove(line->text, line->text + 1, strlen(line->text + 1) + 1);
1276		}
1277		trim = trimdup(line->text, strlen(line->text));
1278		if (haskw(trim, "else") || haskw(trim, "endif")) {
1279			free(trim);
1280			break;
1281		}
1282		if (haskw(trim, "define")) {
1283			free(trim);
1284			if (parsedefine(pre, i, &state) < 0)
1285				return -1;
1286			addnode(out, state);
1287			last_rule = 0;
1288			continue;
1289		}
1290		if (haskw(trim, "ifeq") || haskw(trim, "ifneq") ||
1291		    haskw(trim, "ifdef") || haskw(trim, "ifndef")) {
1292			size_t last_rule_idx;
1293			int last_rule_in_out;
1294
1295			last_rule_idx = 0;
1296			last_rule_in_out = last_rule && ruleinlist(out, last_rule);
1297			if (last_rule_in_out)
1298				last_rule_idx = (size_t)(last_rule - out->v);
1299			state = parsecond(line, trim);
1300			free(trim);
1301			(*i)++;
1302			if (parseblock(pre, i, &state.data.cond.thenpart, &last_rule, targets, mode) < 0)
1303				return -1;
1304			if (*i < pre->n) {
1305				struct PreLine *endline;
1306				char *endtrim;
1307
1308				endline = &pre->v[*i];
1309				endtrim = trimdup(endline->text, strlen(endline->text));
1310				if (haskw(endtrim, "else")) {
1311					const char *rest = endtrim + 4;
1312					while (*rest && isspace((unsigned char)*rest))
1313						rest++;
1314					if (*rest) {
1315						/* else-if form; rewrite the current line
1316						   to drop else and recurse, the inner
1317						   conditional's endif also closes us */
1318						char *rep = xstrndup(rest, strlen(rest));
1319						free(endline->text);
1320						endline->text = rep;
1321						free(endtrim);
1322						if (parseblock(pre, i, &state.data.cond.elsepart, &last_rule, targets, mode) < 0)
1323							return -1;
1324						state.loc.line1 = endline->line1;
1325						addnode(out, state);
1326						if (last_rule_in_out)
1327							last_rule = &out->v[last_rule_idx];
1328						continue;
1329					}
1330					free(endtrim);
1331					(*i)++;
1332					if (parseblock(pre, i, &state.data.cond.elsepart, &last_rule, targets, mode) < 0)
1333						return -1;
1334					if (*i >= pre->n) {
1335						parseerr(&pre->v[pre->n - 1], "missing 'endif'", 0);
1336						return -1;
1337					}
1338					endline = &pre->v[*i];
1339					endtrim = trimdup(endline->text, strlen(endline->text));
1340					if (haskw(endtrim, "else")) {
1341						free(endtrim);
1342						parseerr(endline, "only one 'else' per conditional", 0);
1343						return -1;
1344					}
1345				}
1346				if (!haskw(endtrim, "endif")) {
1347					free(endtrim);
1348					parseerr(endline, "missing 'endif'", 0);
1349					return -1;
1350				}
1351				state.loc.line1 = endline->line1;
1352				free(endtrim);
1353				(*i)++;
1354			} else {
1355				parseerr(&pre->v[pre->n - 1], "missing 'endif'", 0);
1356				return -1;
1357			}
1358			addnode(out, state);
1359			if (last_rule_in_out)
1360				last_rule = &out->v[last_rule_idx];
1361			continue;
1362		}
1363		free(trim);
1364
1365		state = parseline(line, targets, mode);
1366		if (deadlikemake)
1367			return -1;
1368		if (state.kind == NODE_ASSIGN)
1369			updatespecialassign(targets, state.data.assign.lhs, state.data.assign.rhs);
1370		addnode(out, state);
1371		if (state.kind == NODE_RULE)
1372			last_rule = &out->v[out->n - 1];
1373		else if (state.kind != NODE_BLANK && state.kind != NODE_COMMENT)
1374			last_rule = 0;
1375		(*i)++;
1376	}
1377	if (last_rulep)
1378		*last_rulep = last_rule;
1379	return 0;
1380}
1381
1382int
1383buildast(const char *path, const struct Pre *pre, struct Ast *ast, enum ShinMode mode)
1384{
1385	size_t i;
1386	struct SpecialTargets targets;
1387	int rc;
1388
1389	(void)path;
1390	memset(ast, 0, sizeof(*ast));
1391	arena_init(&ast->arena, 0);
1392	g_ast_arena = &ast->arena;
1393	parseerrs = 0;
1394	deadlikemake = 0;
1395	initspecialtargets(&targets);
1396	i = 0;
1397	rc = parseblock(pre, &i, (struct NodeList *)ast, 0, &targets, mode);
1398	g_ast_arena = 0;
1399	if (rc < 0)
1400		return deadlikemake ? -2 : -1;
1401	if (i < pre->n) {
1402		char *trim;
1403
1404		trim = trimdup(pre->v[i].text, strlen(pre->v[i].text));
1405		if (haskw(trim, "endif"))
1406			parseerr(&pre->v[i], "extraneous 'endif'", 0);
1407		else if (haskw(trim, "else"))
1408			parseerr(&pre->v[i], "extraneous 'else'", 0);
1409		free(trim);
1410	}
1411	if (parseerrs)
1412		return deadlikemake ? -2 : -1;
1413
1414	return 0;
1415}
1416
1417/* preprocess and parse */
1418int
1419parse(const char *path, const char *src, struct Ast *ast, enum ShinMode mode)
1420{
1421	struct Pre pre;
1422	struct Inc inc;
1423	int rc;
1424
1425	memset(&pre, 0, sizeof(pre));
1426	memset(&inc, 0, sizeof(inc));
1427	rc = preprocfile(path, src, &pre, &inc, mode);
1428	free(inc.v);
1429	if (rc < 0) {
1430		freepre(&pre);
1431		return rc;
1432	}
1433	rc = buildast(path, &pre, ast, mode);
1434	freepre(&pre);
1435	return rc;
1436}