master xplshn/aruu / cmd / posix / pax.c
   1
   2
   3/* taken from: https://github.com/michaelforney/pax */
   4#ifndef _GNU_SOURCE
   5#define _GNU_SOURCE  /* needed for major/minor (non-posix) */
   6#endif
   7
   8#include "arg.h"
   9
  10#include <assert.h>
  11#include <cpio.h>
  12#include <ctype.h>
  13#include <dirent.h>
  14#include <errno.h>
  15#include <fcntl.h>
  16#include <fnmatch.h>
  17#include <grp.h>
  18#include <limits.h>
  19#include <pwd.h>
  20#include <regex.h>
  21#include <spawn.h>
  22#include <stdarg.h>
  23#include <stdint.h>
  24#include <stdio.h>
  25#include <stdlib.h>
  26#include <string.h>
  27#include <sys/stat.h>
  28#include <sys/sysmacros.h>
  29#include <sys/types.h>
  30#include <sys/uio.h>
  31#include <sys/wait.h>
  32#include <tar.h>
  33#include <time.h>
  34#include <unistd.h>
  35
  36#ifndef O_SEARCH  /* not present on some bsds */
  37#define O_SEARCH 0
  38#endif
  39
  40#if __APPLE__  /* macos lacks st_*tim from posix.1-2008 */
  41#define st_atim st_atimespec
  42#define st_ctim st_ctimespec
  43#define st_mtim st_mtimespec
  44#endif
  45
  46#ifndef HAVE_REALLOCARRAY
  47static void *
  48pax_reallocarray(void *p, size_t n, size_t m)
  49{
  50	if (m && n > SIZE_MAX / m) {
  51		errno = ENOMEM;
  52		return NULL;
  53	}
  54	return realloc(p, n * m);
  55}
  56#undef reallocarray
  57#define reallocarray pax_reallocarray
  58#endif
  59
  60#ifndef HAVE_PIPE2
  61static int
  62pax_pipe2(int fd[2], int flag)
  63{
  64	assert((flag | O_CLOEXEC) == O_CLOEXEC);
  65	if (pipe(fd) != 0)
  66		return -1;
  67	if (flag & O_CLOEXEC && (fcntl(fd[0], F_SETFD, FD_CLOEXEC) != 0 || fcntl(fd[1], F_SETFD, FD_CLOEXEC) != 0))
  68		return -1;
  69	return 0;
  70}
  71#undef pipe2
  72#define pipe2 pax_pipe2
  73#endif
  74
  75#define LEN(a) (sizeof (a) / sizeof *(a))
  76#define ROUNDUP(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
  77#define MAXTIME 077777777777
  78#define MAXSIZE 077777777777
  79#define MAXUGID 07777777
  80
  81enum mode {
  82	LIST,
  83	READ,
  84	WRITE,
  85	COPY,
  86};
  87
  88enum format {
  89	CPIO,
  90	PAX,
  91	USTAR,
  92	GNUTAR,
  93	V7,
  94};
  95
  96enum field {
  97	ATIME    = 1<<0,
  98	CTIME    = 1<<1,
  99	GID      = 1<<2,
 100	GNAME    = 1<<3,
 101	LINKPATH = 1<<4,
 102	MODE     = 1<<5,
 103	MTIME    = 1<<6,
 104	PATH     = 1<<7,
 105	SIZE     = 1<<8,
 106	UID      = 1<<9,
 107	UNAME    = 1<<10,
 108};
 109
 110struct keyword {
 111	const char *name;
 112	enum field field;
 113};
 114
 115struct strbuf {
 116	char *str;
 117	size_t len, cap;
 118};
 119
 120struct header {
 121	/* keywords present in this header */
 122	enum field fields;
 123	/* keywords ignored because they were overridden by an option */
 124	enum field delete;
 125
 126	char type;
 127
 128	char *path;
 129	size_t pathlen;
 130	dev_t dev;
 131	ino_t ino;
 132	mode_t mode;
 133	uid_t uid;
 134	gid_t gid;
 135	nlink_t nlink;
 136	dev_t rdev;
 137	off_t size;
 138	struct timespec atime, mtime, ctime;
 139	char *link;
 140	size_t linklen;
 141	char *uname;
 142	char *gname;
 143
 144	struct strbuf pathbuf;
 145	struct strbuf linkbuf;
 146	struct strbuf unamebuf;
 147	struct strbuf gnamebuf;
 148
 149	/* tar-specific, pre-calculated split point between name and prefix */
 150	char *slash;
 151	/* read this data instead of stdin */
 152	char *data;
 153	/* source file path and flags for hard link (-l flag) */
 154	char *file;
 155	struct timespec fileatime;
 156	int flag;
 157};
 158
 159struct account {
 160	char *name;
 161	enum field type;
 162	uid_t uid;
 163	gid_t gid;
 164};
 165
 166struct bufio {
 167	int fd, err;
 168	off_t off;
 169	char buf[64 * 1024];
 170	char *pos, *end;
 171};
 172
 173struct replstr {
 174	regex_t old;
 175	char *new;
 176	int global;
 177	int print;
 178	int symlink;
 179	struct replstr *next;
 180};
 181
 182struct file {
 183	size_t namelen;
 184	size_t pathlen;
 185	dev_t dev;
 186	struct file *next;
 187	char name[];
 188};
 189
 190struct filelist {
 191	FILE *input;
 192	struct file *pending;
 193};
 194
 195typedef int readfn(struct bufio *, struct header *);
 196typedef void writefn(FILE *, struct header *);
 197
 198static int exitstatus;
 199static int aflag;
 200static int cflag;
 201static int dflag;
 202static int iflag;
 203static int kflag;
 204static int lflag;
 205static int nflag;
 206static int tflag;
 207static int uflag;
 208static int vflag;
 209static int Xflag;
 210static int follow;
 211static int preserve = ATIME | MTIME;
 212static const struct keyword keywords[] = {
 213	{"atime", ATIME},
 214	{"ctime", CTIME},
 215	{"gid", GID},
 216	{"gname", GNAME},
 217	{"linkpath", LINKPATH},
 218	{"mtime", MTIME},
 219	{"path", PATH},
 220	{"size", SIZE},
 221	{"uid", UID},
 222	{"uname", UNAME},
 223};
 224static struct {
 225	enum field delete;
 226	int linkdata;
 227	const char *listopt;
 228	const char *exthdrname;
 229	const char *globexthdrname;
 230	const char *invalid;
 231	int times;
 232} opt;
 233static struct header exthdr, globexthdr;
 234static struct replstr *replstr;
 235static time_t curtime;
 236static char **pats;
 237static size_t patslen;
 238static int *patsused;
 239static struct filelist files;
 240static struct bufio bioin;
 241static char *dest = "";
 242static int destfd = AT_FDCWD;
 243
 244static void
 245fatal(const char *fmt, ...)
 246{
 247	va_list ap;
 248
 249	if (fmt) {
 250		va_start(ap, fmt);
 251		vfprintf(stderr, fmt, ap);
 252		va_end(ap);
 253		if (fmt[0] && fmt[strlen(fmt) - 1] == ':') {
 254			fputc(' ', stderr);
 255			perror(NULL);
 256		} else {
 257			fputc('\n', stderr);
 258		}
 259	} else {
 260		perror(NULL);
 261	}
 262	exit(1);
 263}
 264
 265static char *
 266sbufalloc(struct strbuf *b, size_t n, size_t a)
 267{
 268	char *s;
 269
 270	if (n > b->cap - b->len) {
 271		if (n > SIZE_MAX - a || n + a > SIZE_MAX - b->len)
 272			fatal("path is too long");
 273		b->cap = ROUNDUP(n, a);
 274		s = malloc(b->cap);
 275		if (!s)
 276			fatal(NULL);
 277		if (b->len)
 278			memcpy(s, b->str, b->len);
 279		free(b->str);
 280		b->str = s;
 281	}
 282	return b->str + b->len;
 283}
 284
 285static int
 286sbuffmtv(struct strbuf *b, size_t a, const char *fmt, va_list ap)
 287{
 288	va_list aptmp;
 289	int n;
 290
 291	va_copy(aptmp, ap);
 292	n = vsnprintf(b->str ? b->str + b->len : NULL, b->cap - b->len, fmt, aptmp);
 293	va_end(aptmp);
 294	if (n < 0)
 295		fatal("vsnprintf:");
 296	if ((size_t)n >= b->cap - b->len) {
 297		sbufalloc(b, (size_t)n + 1, a);
 298		n = vsnprintf(b->str + b->len, b->cap - b->len, fmt, ap);
 299		if (n < 0)
 300			fatal("vsnprintf:");
 301		if ((size_t)n >= b->cap - b->len)
 302			fatal("vsnprintf: formatted size changed");
 303	}
 304	b->len += n;
 305	return n;
 306}
 307
 308static int
 309sbuffmt(struct strbuf *b, size_t a, const char *fmt, ...)
 310{
 311	va_list ap;
 312	int n;
 313
 314	va_start(ap, fmt);
 315	n = sbuffmtv(b, a, fmt, ap);
 316	va_end(ap);
 317	return n;
 318}
 319
 320static void
 321sbufcat(struct strbuf *b, const char *s, size_t n, size_t a)
 322{
 323	char *d;
 324
 325	d = sbufalloc(b, n + 1, a);
 326	memcpy(d, s, n);
 327	d[n] = 0;
 328	b->len += n;
 329}
 330
 331static void
 332bioinit(struct bufio *f, int fd)
 333{
 334	f->fd = fd;
 335	f->pos = f->end = f->buf;
 336	f->off = 0;
 337}
 338
 339static size_t
 340bioread(struct bufio *f, void *p, size_t n)
 341{
 342	size_t l;
 343	unsigned char *d;
 344	struct iovec iov[2];
 345	ssize_t r;
 346
 347	d = p;
 348	if (f->pos != f->end) {
 349		l = f->end - f->pos;
 350		if (n < l)
 351			l = n;
 352		memcpy(d, f->pos, l);
 353		f->pos += l;
 354		n -= l;
 355		d += l;
 356	}
 357	iov[1].iov_base = f->buf;
 358	iov[1].iov_len = sizeof f->buf;
 359	for (; n > 0; n -= r, d += r) {
 360		iov[0].iov_base = d;
 361		iov[0].iov_len = n;
 362		r = readv(f->fd, iov, 2);
 363		if (r < 0)
 364			f->err = errno;
 365		if (r <= 0)
 366			break;
 367		if ((size_t)r >= n) {
 368			f->pos = f->buf;
 369			f->end = f->buf + (r - n);
 370			r = n;
 371		}
 372	}
 373	l = d - (unsigned char *)p;
 374	f->off += l;
 375	return l;
 376}
 377
 378static int
 379bioskip(struct bufio *f, off_t n)
 380{
 381	static int seekfail;
 382	size_t l;
 383	ssize_t r;
 384
 385	if (f->pos != f->end) {
 386		l = f->end - f->pos;
 387		if (n < (off_t)l) {
 388			f->pos += n;
 389			return 0;
 390		}
 391		n -= l;
 392		f->pos = f->end = f->buf;
 393	}
 394	if (!seekfail) {
 395		if (n == 0 || lseek(f->fd, n, SEEK_CUR) >= 0)
 396			return 0;
 397		seekfail = 1;
 398	}
 399	for (; n > 0; n -= r) {
 400		l = sizeof f->buf;
 401		if (n < (off_t)l)
 402			l = n;
 403		r = read(f->fd, f->buf, l);
 404		if (r <= 0)
 405			return -1;
 406	}
 407	return 0;
 408}
 409
 410static void
 411copyblock(char *b, struct bufio *r, size_t nr, FILE *w, size_t nw)
 412{
 413	if (bioread(r, b, nr) != nr) {
 414		if (r->err)
 415			fatal("read: %s", strerror(r->err));
 416		fatal("archive truncated");
 417	}
 418	if (nw > nr)
 419		memset(b + nr, 0, nw - nr);
 420	if (nw && fwrite(b, 1, nw, w) != nw)
 421		fatal("write:");
 422}
 423
 424/* nr and nw must differ by at most 8192 */
 425static void
 426copy(struct bufio *r, off_t nr, FILE *w, off_t nw)
 427{
 428	char b[8192];
 429
 430	assert(nr - nw <= (off_t)sizeof b || nw - nr <= (off_t)sizeof b);
 431	for (; nr > (off_t)sizeof b && nw > (off_t)sizeof b; nr -= (off_t)sizeof b, nw -= (off_t)sizeof b)
 432		copyblock(b, r, sizeof b, w, sizeof b);
 433	copyblock(b, r, nr, w, nw);
 434}
 435
 436static struct account *
 437findaccount(const char *name, uid_t uid, gid_t gid)
 438{
 439	static struct account *accts;
 440	static size_t acctslen;
 441	struct account *a;
 442
 443	for (a = accts; a < accts + acctslen; ++a) {
 444		if ((uid == (uid_t)-1 || uid == a->uid) && (gid == (gid_t)-1 || gid == a->gid)
 445		 && (!name || (a->name && strcmp(a->name, name) == 0)))
 446			return a;
 447	}
 448	if ((acctslen & (acctslen - 1)) == 0) {
 449		accts = reallocarray(accts, acctslen ? acctslen * 2 : 16, sizeof *accts);
 450		if (!accts)
 451			fatal(NULL);
 452	}
 453	a = &accts[acctslen++];
 454	a->name = NULL;
 455	a->type = 0;
 456	a->uid = -1;
 457	a->gid = -1;
 458	if (name) {
 459		a->name = strdup(name);
 460		if (!a->name)
 461			fatal(NULL);
 462	}
 463	return a;
 464}
 465
 466static uid_t
 467unametouid(const char *uname, uid_t fallback)
 468{
 469	struct account *a;
 470	struct passwd *pw;
 471
 472	if (!*uname)
 473		return fallback;
 474	a = findaccount(uname, (uid_t)-1, (gid_t)-1);
 475	if (~a->type & UID) {
 476		a->type |= UID;
 477		pw = getpwnam(uname);
 478		a->uid = pw ? pw->pw_uid : (uid_t)-1;
 479	}
 480	return a->uid != (uid_t)-1 ? a->uid : fallback;
 481}
 482
 483static gid_t
 484gnametogid(const char *gname, gid_t fallback)
 485{
 486	struct account *a;
 487	struct group *gr;
 488
 489	if (!*gname)
 490		return fallback;
 491	a = findaccount(gname, (uid_t)-1, (gid_t)-1);
 492	if (~a->type & GID) {
 493		a->type |= GID;
 494		gr = getgrnam(gname);
 495		a->gid = gr ? gr->gr_gid : (gid_t)-1;
 496	}
 497	return a->gid != (gid_t)-1 ? a->gid : fallback;
 498}
 499
 500static char *
 501uidtouname(uid_t uid, char *fallback)
 502{
 503	struct account *a;
 504	struct passwd *pw;
 505
 506	a = findaccount(NULL, uid, (gid_t)-1);
 507	if (~a->type & UID) {
 508		a->type |= UID;
 509		a->uid = uid;
 510		assert(!a->name);
 511		pw = getpwuid(uid);
 512		if (pw) {
 513			a->name = strdup(pw->pw_name);
 514			if (!a->name)
 515				fatal(NULL);
 516		}
 517	}
 518	return a->name ? a->name : fallback;
 519}
 520
 521static char *
 522gidtogname(gid_t gid, char *fallback)
 523{
 524	struct account *a;
 525	struct group *gr;
 526
 527	a = findaccount(NULL, (uid_t)-1, gid);
 528	if (~a->type & GID) {
 529		a->type |= GID;
 530		a->gid = gid;
 531		assert(!a->name);
 532		gr = getgrgid(gid);
 533		if (gr) {
 534			a->name = strdup(gr->gr_name);
 535			if (!a->name)
 536				fatal(NULL);
 537		}
 538	}
 539	return a->name ? a->name : fallback;
 540}
 541
 542static unsigned long long
 543octnum(const char *str, size_t len)
 544{
 545	const char *end;
 546	unsigned c;
 547	unsigned long long n;
 548
 549	n = 0;
 550	end = str + len;
 551	/* some archives have leading spaces, so skip them */
 552	for (; str != end && *str == ' '; ++str)
 553		;
 554	for (; str != end; ++str) {
 555		c = *str;
 556		if (c == ' ' || c == '\0')
 557			break;
 558		c -= '0';
 559		if (c > 7)
 560			fatal("invalid number field");
 561		n = n * 8 + c;
 562	}
 563	return n;
 564}
 565
 566static unsigned long long
 567decnum(const char *str, size_t len, char **pos)
 568{
 569	const char *end;
 570	unsigned c;
 571	unsigned long long n;
 572
 573	n = 0;
 574	end = str + len;
 575	for (; str != end; ++str) {
 576		c = *str - '0';
 577		if (c > 9)
 578			break;
 579		n = n * 10 + c;
 580	}
 581	if (pos)
 582		*pos = (char *)str;
 583	return n;
 584}
 585
 586static int
 587readustar(struct bufio *f, struct header *h)
 588{
 589	static char buf[512];
 590	static off_t end;
 591	size_t namelen, prefixlen, linklen;
 592	unsigned long sum;
 593	int i;
 594	enum format format;
 595
 596	assert(bioin.off <= end);
 597	if (bioskip(f, end - bioin.off) != 0 || bioread(f, buf, sizeof buf) != sizeof buf) {
 598		if (f->err)
 599			fatal("read: %s", strerror(f->err));
 600		fatal("archive truncated");
 601	}
 602	sum = 0;
 603	for (i = 0; i < 512; ++i)
 604		sum += ((unsigned char *)buf)[i];
 605	if (sum == 0)
 606		return 0;
 607	for (i = 148; i < 156; ++i)
 608		sum += ' ' - ((unsigned char *)buf)[i];
 609	if (sum != octnum(buf + 148, 8))
 610		fatal("invalid tar header: bad checksum");
 611	if (memcmp(buf + 257, "ustar\0" "00", 8) == 0)
 612		format = USTAR;
 613	else if (memcmp(buf + 257, "ustar  ", 8) == 0)
 614		format = GNUTAR;
 615	else
 616		format = V7;
 617	h->fields = PATH | UID | GID | SIZE;
 618	namelen = strnlen(buf, 100);
 619	prefixlen = format == USTAR ? strnlen(buf + 345, 155) : 0;
 620	if (namelen == 100 || prefixlen > 0) {
 621		h->pathbuf.len = 0;
 622		if (prefixlen > 0) {
 623			sbufcat(&h->pathbuf, buf + 345, prefixlen, 1024);
 624			sbufcat(&h->pathbuf, "/", 1, 1024);
 625		}
 626		sbufcat(&h->pathbuf, buf, namelen, 1024);
 627		h->path = h->pathbuf.str;
 628		h->pathlen = h->pathbuf.len;
 629	} else {
 630		h->path = buf;
 631		h->pathlen = namelen;
 632	}
 633	h->dev = 0;
 634	h->ino = 0;
 635	h->mode = octnum(buf + 100, 8);
 636	h->uid = octnum(buf + 108, 8);
 637	h->gid = octnum(buf + 116, 8);
 638	h->nlink = 1;
 639	h->size = octnum(buf + 124, 12);
 640	end = bioin.off + ROUNDUP(h->size, 512);
 641	h->mtime = (struct timespec){.tv_sec = octnum(buf + 136, 12)};
 642	if (format == GNUTAR) {
 643		h->fields |= ATIME | CTIME;
 644		h->atime = (struct timespec){.tv_sec = octnum(buf + 345, 12)};
 645		h->ctime = (struct timespec){.tv_sec = octnum(buf + 357, 12)};
 646	}
 647	h->type = buf[156];
 648	if (h->type == AREGTYPE)
 649		h->type = REGTYPE;
 650
 651	linklen = strnlen(buf + 157, 100);
 652	if (linklen > 0)
 653		h->fields |= LINKPATH;
 654	if (linklen == 100) {
 655		h->linkbuf.len = 0;
 656		sbufcat(&h->linkbuf, buf + 157, 100, 1024);
 657		h->link = h->linkbuf.str;
 658		h->linklen = h->linkbuf.len;
 659	} else {
 660		h->link = buf + 157;
 661	}
 662	h->linklen = linklen;
 663	if (format == V7) {
 664		h->uname = "";
 665		h->gname = "";
 666	} else {
 667		h->fields |= UNAME | GNAME;
 668		h->uname = buf + 265;
 669		if (!memchr(h->uname, '\0', 32))
 670			fatal("uname is not NUL-terminated");
 671		h->gname = buf + 297;
 672		if (!memchr(h->gname, '\0', 32))
 673			fatal("gname is not NUL-terminated");
 674		if (h->type == CHRTYPE || h->type == BLKTYPE) {
 675			unsigned major, minor;
 676
 677			major = octnum(buf + 329, 8);
 678			minor = octnum(buf + 337, 8);
 679			h->rdev = makedev(major, minor);
 680		}
 681	}
 682	return 1;
 683}
 684
 685static void
 686parsetime(struct timespec *ts, const char *field, const char *str, size_t len)
 687{
 688	const char *end = str + len;
 689	char *pos;
 690	unsigned long long subsec;
 691	size_t sublen;
 692
 693	ts->tv_sec = decnum(str, len, &pos);
 694	if (*pos == '.') {
 695		str = ++pos;
 696		subsec = decnum(str, end - str, &pos);
 697		for (sublen = pos - str; sublen < 9; ++sublen)
 698			subsec *= 10;
 699		ts->tv_nsec = subsec % 1000000000;
 700	}
 701	if (pos != end)
 702		fatal("invalid extended header: bad %s", field);
 703}
 704
 705static void
 706extkeyval(struct header *h, const char *key, const char *val, size_t vallen)
 707{
 708	enum field field;
 709	char *end;
 710	const struct keyword *kw;
 711
 712	field = 0;
 713	for (kw = keywords; kw != keywords + LEN(keywords); ++kw) {
 714		if (strcmp(key, kw->name) == 0) {
 715			field = kw->field;
 716			break;
 717		}
 718	}
 719	if (!field) {
 720		if (strcmp(key, "charset") == 0) {
 721		} else if (strcmp(key, "comment") == 0) {
 722			/* ignore */
 723		} else if (strcmp(key, "hdrcharset") == 0) {
 724		} else if (strncmp(key, "realtime.", 9) == 0) {
 725		} else if (strncmp(key, "security.", 9) == 0) {
 726		} else {
 727			fprintf(stderr, "ignoring unknown keyword '%s'\n", key);
 728		}
 729		return;
 730	}
 731	if ((h->delete | opt.delete) & field)
 732		return;
 733
 734	switch (field) {
 735	case ATIME:
 736		parsetime(&h->atime, "atime", val, vallen);
 737		break;
 738	case CTIME:
 739		parsetime(&h->ctime, "ctime", val, vallen);
 740		break;
 741	case GID:
 742		h->gid = decnum(val, vallen, &end);
 743		if (end != val + vallen)
 744			fatal("invalid extended header: bad gid");
 745		break;
 746	case GNAME:
 747		h->gnamebuf.len = 0;
 748		sbufcat(&h->gnamebuf, val, vallen, 256);
 749		h->gname = h->gnamebuf.str;
 750		break;
 751	case LINKPATH:
 752		h->linkbuf.len = 0;
 753		sbufcat(&h->linkbuf, val, vallen, 1024);
 754		h->link = h->linkbuf.str;
 755		h->linklen = h->linkbuf.len;
 756		break;
 757	case MTIME:
 758		parsetime(&h->mtime, "mtime", val, vallen);
 759		break;
 760	case PATH:
 761		h->pathbuf.len = 0;
 762		sbufcat(&h->pathbuf, val, vallen, 1024);
 763		h->path = h->pathbuf.str;
 764		h->pathlen = h->pathbuf.len;
 765		break;
 766	case SIZE:
 767		h->size = decnum(val, vallen, &end);
 768		if (end != val + vallen)
 769			fatal("invalid extended header: bad size");
 770		break;
 771	case UID:
 772		h->uid = decnum(val, vallen, &end);
 773		if (end != val + vallen)
 774			fatal("invalid extended header: bad uid");
 775		break;
 776	case UNAME:
 777		h->unamebuf.len = 0;
 778		sbufcat(&h->unamebuf, val, vallen, 256);
 779		h->uname = h->unamebuf.str;
 780		break;
 781	default:
 782		return;
 783	}
 784	h->fields |= field;
 785}
 786
 787static void
 788readexthdr(struct bufio *f, struct header *h, off_t len)
 789{
 790	static struct strbuf buf;
 791	size_t reclen, vallen;
 792	char *rec, *end, *key, *val;
 793
 794	if (len > (off_t)SIZE_MAX)
 795		fatal("extended header is too large");
 796	buf.len = 0;
 797	sbufalloc(&buf, (size_t)len, 8192);
 798	if (bioread(f, buf.str, (size_t)len) != (size_t)len) {
 799		if (f->err)
 800			fatal("read: %s", strerror(f->err));
 801		fatal("archive truncated");
 802	}
 803	rec = buf.str;
 804	while (len > 0) {
 805		end = memchr(rec, '\n', len);
 806		if (!end)
 807			fatal("invalid extended header: record is missing newline");
 808		*end = '\0';
 809		reclen = decnum(rec, (size_t)(end - rec), &key);
 810		if (*key != ' ' || reclen != (unsigned long long)(end - rec + 1))
 811			fatal("invalid extended header: invalid record");
 812		++key;
 813		val = strchr(key, '=');
 814		if (!val)
 815			fatal("invalid extended header: record has no '='");
 816		*val++ = '\0';
 817		vallen = end - val;
 818		extkeyval(h, key, val, vallen);
 819		len -= reclen;
 820		rec += reclen;
 821	}
 822}
 823
 824static void
 825readgnuhdr(struct bufio *f, struct strbuf *b, off_t len)
 826{
 827	if (len > (off_t)(SIZE_MAX - 1))
 828		fatal("GNU header is too large");
 829	b->len = 0;
 830	sbufalloc(b, (size_t)(len + 1), 1024);
 831	if (bioread(f, b->str, (size_t)len) != (size_t)len) {
 832		if (f->err)
 833			fatal("read: %s", strerror(f->err));
 834		fatal("archive truncated");
 835	}
 836	b->str[len] = '\0';
 837	b->len = len;
 838}
 839
 840static int
 841readpax(struct bufio *f, struct header *h)
 842{
 843	exthdr.fields = exthdr.delete;
 844	while (readustar(f, h)) {
 845		switch (h->type) {
 846		case 'g':
 847			readexthdr(f, &globexthdr, h->size);
 848			break;
 849		// ?man -x: hex format or match whole lines
 850	case 'x':
 851			readexthdr(f, &exthdr, h->size);
 852			break;
 853		// ?man -L: specify option flag
 854	case 'L':
 855			if ((exthdr.delete | opt.delete) & PATH)
 856				break;
 857			readgnuhdr(f, &exthdr.pathbuf, h->size);
 858			exthdr.path = exthdr.pathbuf.str;
 859			exthdr.pathlen = exthdr.pathbuf.len;
 860			exthdr.fields |= PATH;
 861			break;
 862		case 'K':
 863			if ((exthdr.delete | opt.delete) & LINKPATH)
 864				break;
 865			readgnuhdr(f, &exthdr.linkbuf, h->size);
 866			exthdr.link = exthdr.linkbuf.str;
 867			exthdr.linklen = exthdr.linkbuf.len;
 868			exthdr.fields |= LINKPATH;
 869			break;
 870		default:
 871			return 1;
 872		}
 873	}
 874	return 0;
 875}
 876
 877static int
 878readcpio(struct bufio *f, struct header *h)
 879{
 880	static off_t end;
 881	unsigned long type;
 882	char buf[76];
 883
 884	if (bioskip(f, end - bioin.off) != 0 || bioread(f, buf, sizeof buf) != sizeof buf) {
 885		if (f->err)
 886			fatal("read: %s", strerror(f->err));
 887		fatal("archive truncated");
 888	}
 889	if (memcmp(buf, "070707", 6) != 0)
 890		fatal("invalid cpio header: bad magic");
 891	h->pathlen = octnum(buf + 59, 6);
 892	if (h->pathlen == 0)
 893		fatal("invalid cpio header: c_namesize is 0");
 894	h->pathbuf.len = 0;
 895	sbufalloc(&h->pathbuf, h->pathlen, 1024);
 896	h->path = h->pathbuf.str;
 897	if (bioread(f, h->path, h->pathlen) != h->pathlen) {
 898		if (f->err)
 899			fatal("read: %s", strerror(f->err));
 900		fatal("archive truncated");
 901	}
 902	if (h->path[--h->pathlen] != '\0')
 903		fatal("invalid cpio header: name is not NUL-terminated");
 904	if (strcmp(h->path, "TRAILER!!!") == 0)
 905		return 0;
 906
 907	h->fields = PATH | MODE | UID | GID | MTIME | SIZE;
 908	h->dev = octnum(buf + 6, 6);
 909	h->ino = octnum(buf + 12, 6);
 910	type = octnum(buf + 18, 6);
 911	h->mode = type & 07777;
 912	type &= ~07777;
 913	switch (type) {
 914	case C_ISDIR: h->type = DIRTYPE; break;
 915	case C_ISFIFO: h->type = FIFOTYPE; break;
 916	case C_ISREG: h->type = REGTYPE; break;
 917	case C_ISLNK: h->type = SYMTYPE; break;
 918	case C_ISBLK: h->type = BLKTYPE; break;
 919	case C_ISCHR: h->type = CHRTYPE; break;
 920	default: fatal("invalid cpio header: invalid or unsupported file type: %#o", type);
 921	}
 922	h->uid = octnum(buf + 24, 6);
 923	h->gid = octnum(buf + 30, 6);
 924	h->nlink = octnum(buf + 36, 6);
 925	h->rdev = octnum(buf + 42, 6);
 926	h->mtime = (struct timespec){.tv_sec = octnum(buf + 48, 11)};
 927	h->size = octnum(buf + 65, 11);
 928	h->uname = "";
 929	h->gname = "";
 930	if (h->type == SYMTYPE) {
 931		if (h->size > (off_t)(SIZE_MAX - 1))
 932			fatal("symlink target is too long");
 933		h->linklen = h->size;
 934		h->linkbuf.len = 0;
 935		h->link = sbufalloc(&h->linkbuf, h->linklen + 1, 1024);
 936		if (bioread(f, h->link, h->linklen) != h->linklen) {
 937			if (f->err)
 938				fatal("read: %s", strerror(f->err));
 939			fatal("archive truncated");
 940		}
 941		h->link[h->linklen] = '\0';
 942		h->size = 0;
 943		h->fields |= LINKPATH;
 944	} else {
 945		h->link = "";
 946		h->linklen = 0;
 947	}
 948	end = bioin.off + h->size;
 949	return 1;
 950}
 951
 952static int
 953decompress(const char *algo, int fd, pid_t *pid)
 954{
 955	extern char **environ;
 956	posix_spawn_file_actions_t fa;
 957	int p[2], err;
 958	char *argv[3];
 959
 960	if (!algo)
 961		return fd;
 962	argv[0] = (char *)algo;
 963	argv[1] = "-dc";
 964	argv[2] = NULL;
 965	if (pipe2(p, O_CLOEXEC) != 0)
 966		fatal("pipe2:");
 967	err = posix_spawn_file_actions_init(&fa);
 968	if (err)
 969		fatal("posix_spawn_file_actions_init: %s", strerror(errno));
 970	err = posix_spawn_file_actions_adddup2(&fa, fd, 0);
 971	if (err)
 972		fatal("posix_spawn_file_actions_adddup2: %s", strerror(errno));
 973	err = posix_spawn_file_actions_adddup2(&fa, p[1], 1);
 974	if (err)
 975		fatal("posix_spawn_file_actions_adddup2: %s", strerror(errno));
 976	err = posix_spawnp(pid, algo, &fa, NULL, argv, environ);
 977	if (err)
 978		fatal("posix_spawnp %s: %s", algo, strerror(errno));
 979	close(p[1]);
 980	return p[0];
 981}
 982
 983static readfn *
 984detectformat(struct bufio *f, const char *algo, pid_t *pid)
 985{
 986	size_t l, i;
 987	ssize_t n;
 988	unsigned char *b;
 989
 990again:
 991	f->fd = decompress(algo, f->fd, pid);
 992	b = (unsigned char *)f->buf;
 993	for (l = 0; l < 512; l += n) {
 994		n = read(f->fd, b + l, 512 - l);
 995		if (n < 0)
 996			fatal("read:");
 997		if (n == 0)
 998			break;
 999	}
1000	f->pos = f->buf;
1001	f->end = f->buf + l;
1002	if (l == 512) {
1003		unsigned long sum, hdrsum;
1004
1005		sum = 0;
1006		for (i = 0; i < 512; ++i)
1007			sum += b[i];
1008		if (sum == 0)
1009			return readpax;
1010		hdrsum = 0;
1011		for (i = 148; i < 156; ++i) {
1012			sum += ' ' - b[i];
1013			if (b[i] >= '0' && b[i] <= '9')
1014				hdrsum = hdrsum * 8 + (b[i] - '0');
1015		}
1016		if (sum == hdrsum)
1017			return readpax;
1018	}
1019	if (l >= 76) {
1020		if (memcmp(b, "070707", 6) == 0)
1021			return readcpio;
1022	}
1023	if (!algo) {
1024		static const struct command {
1025			char algo[6];
1026			unsigned char magiclen;
1027			unsigned char magic[6];
1028		} cmds[] = {
1029			{"gzip", 2, {0x1F, 0x8B}},
1030			{"bzip2", 2, {'B', 'Z'}},
1031			{"xz", 6, {0xFD, '7', 'z', 'X', 'Z', 0x00}},
1032			{"zstd", 4, {0x28, 0xB5, 0x2F, 0xFD}},
1033			{"lzip", 4, {'L', 'Z', 'I', 'P'}},
1034		};
1035		const struct command *c;
1036
1037		for (c = cmds; c < cmds + LEN(cmds); ++c) {
1038			if (l >= c->magiclen && memcmp(b, c->magic, c->magiclen) == 0) {
1039				if (lseek(f->fd, 0, SEEK_SET) != 0)
1040					fatal("compression detection requires seekable input");
1041				algo = c->algo;
1042				goto again;
1043			}
1044		}
1045	}
1046	return NULL;
1047}
1048
1049static FILE *
1050compress(const char *algo, const char *name, pid_t *pid)
1051{
1052	extern char **environ;
1053	FILE *f;
1054	int fd, p[2], err;
1055	posix_spawn_file_actions_t fa;
1056	char *argv[3];
1057
1058	if (!algo) {
1059		if (name && !freopen(name, aflag ? "r+" : "w", stdout))
1060			fatal("open %s:");
1061		if (aflag && name) {
1062			if (fseek(stdout, 0, SEEK_END) != 0)
1063				fatal("fseek %s:");
1064		}
1065		return stdout;
1066	}
1067	argv[0] = (char *)algo;
1068	argv[1] = "-c";
1069	argv[2] = NULL;
1070	if (name) {
1071		fd = open(name, O_WRONLY | O_CREAT, 0666);
1072		if (fd < 0)
1073			fatal("open %s:");
1074	} else {
1075		fd = 1;
1076	}
1077	if (pipe2(p, O_CLOEXEC) != 0)
1078		fatal("pipe2:");
1079	f = fdopen(p[1], "w");
1080	if (!f)
1081		fatal("fdopen:");
1082	err = posix_spawn_file_actions_init(&fa);
1083	if (err)
1084		fatal("posix_spawn_file_actions_init: %s", strerror(errno));
1085	err = posix_spawn_file_actions_adddup2(&fa, p[0], 0);
1086	if (err)
1087		fatal("posix_spawn_file_actions_adddup2: %s", strerror(errno));
1088	err = posix_spawn_file_actions_adddup2(&fa, fd, 1);
1089	if (err)
1090		fatal("posix_spawn_file_actions_adddup2: %s", strerror(errno));
1091	err = posix_spawnp(pid, algo, &fa, NULL, argv, environ);
1092	if (err)
1093		fatal("posix_spawnp %s: %s", algo, strerror(errno));
1094	close(fd);
1095	close(p[0]);
1096	return f;
1097}
1098
1099static char *
1100splitname(char *name, size_t namelen)
1101{
1102	char *slash;
1103
1104	if (namelen > 256)
1105		return NULL;
1106	slash = memchr(name + namelen - 100, '/', 100);
1107	if (!slash || slash - name > 155)
1108		return NULL;
1109	return slash;
1110}
1111
1112static void
1113openfile(struct header *h)
1114{
1115	int fd;
1116
1117	if (h->file) {
1118		fd = open(h->file, O_RDONLY);
1119		if (fd < 0)
1120			fatal("open %s:", h->file);
1121		bioinit(&bioin, fd);
1122	}
1123}
1124
1125static void
1126closefile(struct header *h)
1127{
1128	if (h->file) {
1129		if (tflag)
1130			futimens(bioin.fd, (struct timespec[2]){h->fileatime, {.tv_nsec = UTIME_OMIT}});
1131		close(bioin.fd);
1132	}
1133}
1134
1135static void
1136closeustar(FILE *f)
1137{
1138	char pad[512];
1139
1140	memset(pad, 0, 512);
1141	if (fwrite(pad, 512, 1, f) != 1)
1142		fatal("write:");
1143	if (fwrite(pad, 512, 1, f) != 1)
1144		fatal("write:");
1145}
1146
1147static void
1148writeustar(FILE *f, struct header *h)
1149{
1150	char buf[512], *slash, tmp[32];
1151	unsigned long sum;
1152	int i;
1153
1154	if (!h) {
1155		closeustar(f);
1156		return;
1157	}
1158	slash = h->slash;
1159	if (!slash && h->pathlen > 100) {
1160		slash = splitname(h->path, h->pathlen);
1161		if (!slash)
1162			fatal("path is too long: %s\n", h->path);
1163	}
1164	if (slash) {
1165		size_t len;
1166
1167		strncpy(buf, slash + 1, 100);
1168		len = slash - h->path;
1169		memcpy(buf + 345, h->path, len);
1170		memset(buf + 345 + len, 0, 155 - len);
1171	} else {
1172		strncpy(buf, h->path, 100);
1173		memset(buf + 345, 0, 155);
1174	}
1175	if (h->mode > 07777777)
1176		fatal("mode is too large: %ju", (uintmax_t)h->mode);
1177	snprintf(tmp, sizeof(tmp), "%.7lo", (unsigned long)h->mode & 07777777);
1178	memcpy(buf + 100, tmp, 8);
1179	if (h->uid > MAXUGID)
1180		fatal("uid is too large: %ju", (uintmax_t)h->uid);
1181	snprintf(tmp, sizeof(tmp), "%.7lo", (unsigned long)h->uid & 07777777);
1182	memcpy(buf + 108, tmp, 8);
1183	if (h->gid > MAXUGID)
1184		fatal("gid is too large: %ju", (uintmax_t)h->gid);
1185	snprintf(tmp, sizeof(tmp), "%.7lo", (unsigned long)h->gid & 07777777);
1186	memcpy(buf + 116, tmp, 8);
1187	if (h->size < 0 || h->size > MAXSIZE)
1188		fatal("size is too large: %ju", (uintmax_t)h->size);
1189	snprintf(buf + 124, 12, "%.11llo", (unsigned long long)h->size);
1190	if (h->mtime.tv_sec < 0 || h->mtime.tv_sec > MAXTIME)
1191		fatal("mtime is too large: %ju", (uintmax_t)h->mtime.tv_sec);
1192	snprintf(buf + 136, 12, "%.11llo", (unsigned long long)h->mtime.tv_sec);
1193	memset(buf + 148, ' ', 8);
1194	buf[156] = h->type;
1195	if (h->linklen > 100)
1196		fatal("link name is too long: %s\n", h->link);
1197	strncpy(buf + 157, h->link, 100);
1198	memcpy(buf + 257, "ustar", 6);
1199	memcpy(buf + 263, "00", 2);
1200	if (strlen(h->uname) > 31)
1201		fatal("user name is too long: %s\n", h->uname);
1202	strncpy(buf + 265, h->uname, 32);
1203	if (strlen(h->gname) > 31)
1204		fatal("group name is too long: %s\n", h->gname);
1205	strncpy(buf + 297, h->gname, 32);
1206	if (major(h->rdev) > 07777777)
1207		fatal("device major is too large: %ju\n", (uintmax_t)major(h->rdev));
1208	snprintf(tmp, sizeof(tmp), "%.7lo", (unsigned long)major(h->rdev) & 07777777);
1209	memcpy(buf + 329, tmp, 8);
1210	if (minor(h->rdev) > 07777777)
1211		fatal("device minor is too large: %ju\n", (uintmax_t)minor(h->rdev));
1212	snprintf(tmp, sizeof(tmp), "%.7lo", (unsigned long)minor(h->rdev) & 07777777);
1213	memcpy(buf + 337, tmp, 8);
1214	memset(buf + 500, 0, 12);
1215	sum = 0;
1216	for (i = 0; i < 512; ++i)
1217		sum += ((unsigned char *)buf)[i];
1218	snprintf(tmp, sizeof(tmp), "%.7lo", sum & 07777777);
1219	memcpy(buf + 148, tmp, 8);
1220	if (fwrite(buf, 512, 1, f) != 1)
1221		fatal("write:");
1222	if (h->data) {
1223		size_t pad;
1224
1225		if (fwrite(h->data, 1, (size_t)h->size, f) != (size_t)h->size)
1226			fatal("write:");
1227		pad = (size_t)(ROUNDUP(h->size, 512) - h->size);
1228		memset(bioin.buf, 0, pad);
1229		if (fwrite(bioin.buf, 1, pad, f) != pad)
1230			fatal("write:");
1231	} else if (h->size > 0) {
1232		openfile(h);
1233		copy(&bioin, h->size, f, ROUNDUP(h->size, 512));
1234		closefile(h);
1235	}
1236}
1237
1238static void
1239writerec(struct strbuf *ext, const char *fmt, ...)
1240{
1241	static struct strbuf buf;
1242	va_list ap;
1243	int d, n, m, l;
1244
1245	buf.len = 0;
1246	va_start(ap, fmt);
1247	l = sbuffmtv(&buf, 256, fmt, ap);
1248	va_end(ap);
1249
1250	d = 0;
1251	m = 1;
1252	for (n = l; n > 0; n /= 10) {
1253		m *= 10;
1254		++d;
1255	}
1256	n = d + 1 + l + 1;
1257	if (n >= m)
1258		++n;
1259	sbuffmt(ext, 256, "%d %.*s\n", n, l, buf.str);
1260}
1261
1262static void
1263writetimerec(struct strbuf *ext, char *kw, struct timespec *ts)
1264{
1265	if (ts->tv_nsec != 0)
1266		writerec(ext, "%s=%ju.%.9ld", kw, (uintmax_t)ts->tv_sec, ts->tv_nsec % 1000000000);
1267	else
1268		writerec(ext, "%s=%ju", kw, (uintmax_t)ts->tv_sec);
1269}
1270
1271static void
1272writeexthdr(FILE *f, int type, struct header *h)
1273{
1274	static struct strbuf ext;
1275	struct header exthdr;
1276
1277	ext.len = 0;
1278	if (h->fields & PATH)
1279		writerec(&ext, "path=%s", h->path);
1280	if (h->fields & UID)
1281		writerec(&ext, "uid=%ju", (uintmax_t)h->uid);
1282	if (h->fields & GID)
1283		writerec(&ext, "gid=%ju", (uintmax_t)h->gid);
1284	if (h->fields & SIZE)
1285		writerec(&ext, "size=%ju", (uintmax_t)h->size);
1286	if (h->fields & MTIME)
1287		writetimerec(&ext, "mtime", &h->mtime);
1288	if (h->fields & ATIME)
1289		writetimerec(&ext, "atime", &h->atime);
1290	if (h->fields & CTIME)
1291		writetimerec(&ext, "ctime", &h->ctime);
1292	if (h->fields & UNAME)
1293		writerec(&ext, "uname=%s", h->uname);
1294	if (h->fields & GNAME)
1295		writerec(&ext, "gname=%s", h->gname);
1296	if (ext.len > 0) {
1297		memset(&exthdr, 0, sizeof exthdr);
1298		exthdr.path = "pax_extended_header";
1299		exthdr.pathlen = 20;
1300		exthdr.mode = 0600;
1301		exthdr.link = "";
1302		exthdr.uname = "";
1303		exthdr.gname = "";
1304		exthdr.size = ext.len;
1305		exthdr.type = type;
1306		exthdr.data = ext.str;
1307		writeustar(f, &exthdr);
1308	}
1309}
1310
1311static void
1312mergehdr(struct header *dst, struct header *src, enum field fields)
1313{
1314	fields &= src->fields;
1315	if (fields & PATH) {
1316		dst->path = src->path;
1317		dst->pathlen = src->pathlen;
1318	}
1319	if (fields & UID)
1320		dst->uid = src->uid;
1321	if (fields & GID)
1322		dst->gid = src->gid;
1323	if (fields & SIZE)
1324		dst->size = src->size;
1325	if (fields & MTIME)
1326		dst->mtime = src->mtime;
1327	if (fields & ATIME)
1328		dst->atime = src->atime;
1329	if (fields & CTIME)
1330		dst->ctime = src->ctime;
1331	if (fields & UNAME)
1332		dst->uname = src->uname;
1333	if (fields & GNAME)
1334		dst->gname = src->gname;
1335	if (fields & LINKPATH) {
1336		dst->link = src->link;
1337		dst->linklen = src->linklen;
1338	}
1339	dst->fields |= fields;
1340}
1341
1342static void
1343writepax(FILE *f, struct header *h)
1344{
1345	enum field fields;
1346
1347	if (!h) {
1348		closeustar(f);
1349		return;
1350	}
1351	if (vflag)
1352		fprintf(stderr, "%s\n", h->path);
1353	fields = 0;
1354	if (h->pathlen > 100) {
1355		h->slash = splitname(h->path, h->pathlen);
1356		if (!h->slash)
1357			fields |= PATH;
1358	}
1359	if (h->uid > MAXUGID)
1360		fields |= UID;
1361	if (h->gid > MAXUGID)
1362		fields |= GID;
1363	if (h->size > MAXSIZE)
1364		fields |= SIZE;
1365	if (h->mtime.tv_sec > MAXTIME || h->mtime.tv_nsec != 0)
1366		fields |= MTIME;
1367	if (opt.times)
1368		fields |= ATIME | CTIME;
1369	if (strlen(h->uname) > 31)
1370		fields |= UNAME;
1371	if (strlen(h->gname) > 31)
1372		fields |= GNAME;
1373	if (h->linklen > 100)
1374		fields |= LINKPATH;
1375	fields &= ~(exthdr.fields | opt.delete);
1376	mergehdr(&exthdr, h, fields);
1377	writeexthdr(f, 'x', &exthdr);
1378
1379	/* reset fields merged into extended header */
1380	if (fields & PATH)
1381		h->path = "", h->pathlen = 0;
1382	if (fields & UID)
1383		h->uid = 0;
1384	if (fields & GID)
1385		h->gid = 0;
1386	if (fields & SIZE)
1387		h->size = 0;
1388	if (fields & MTIME) {
1389		if (h->mtime.tv_sec > MAXTIME)
1390			h->mtime.tv_sec = MAXTIME;
1391		h->mtime.tv_nsec = 0;
1392	}
1393	if (fields & ATIME) {
1394		if (h->atime.tv_sec > MAXTIME)
1395			h->atime.tv_sec = MAXTIME;
1396		h->atime.tv_nsec = 0;
1397	}
1398	if (fields & CTIME) {
1399		if (h->ctime.tv_sec > MAXTIME)
1400			h->ctime.tv_sec = MAXTIME;
1401		h->ctime.tv_nsec = 0;
1402	}
1403	if (fields & UNAME)
1404		h->uname = "";
1405	if (fields & GNAME)
1406		h->gname = "";
1407	if (fields & LINKPATH)
1408		h->link = "", h->linklen = 0;
1409	h->fields &= ~fields;
1410	writeustar(f, h);
1411}
1412
1413static void
1414writecpio(FILE *f, struct header *h)
1415{
1416	static unsigned long ino;
1417	char buf[77];
1418	unsigned long mode;
1419	uintmax_t size;
1420	size_t namesize;
1421	int len;
1422
1423	if (!h) {
1424		memcpy(buf, "070707", 6);
1425		memset(buf + 6, '0', 70);
1426		memcpy(buf + 59, "000013", 6);
1427		if (fwrite(buf, 1, 76, f) != 76)
1428			fatal("write:");
1429		if (fwrite("TRAILER!!!", 1, 11, f) != 11)
1430			fatal("write:");
1431		return;
1432	}
1433	if (vflag)
1434		fprintf(stderr, "%s\n", h->path);
1435	mode = h->mode;
1436	switch (h->type) {
1437	case DIRTYPE: mode |= S_IFDIR; break;
1438	case FIFOTYPE: mode |= S_IFIFO; break;
1439	case REGTYPE: mode |= S_IFREG; break;
1440	case SYMTYPE: mode |= S_IFLNK; break;
1441	case BLKTYPE: mode |= S_IFBLK; break;
1442	case CHRTYPE: mode |= S_IFCHR; break;
1443	default: fatal("unknown or unsupported header type");
1444	}
1445	if (h->dev > 0777777)
1446		fatal("device is too large: %ju", (uintmax_t)h->dev);
1447	if (++ino > 0777777)
1448		fatal("inode is too large: %lu", ino);
1449	if (mode > 0777777)
1450		fatal("mode is too large: %lu", mode);
1451	if (h->uid > MAXUGID)
1452		fatal("uid is too large: %ju", (uintmax_t)h->uid);
1453	if (h->gid > MAXUGID)
1454		fatal("gid is too large: %ju", (uintmax_t)h->gid);
1455	if (h->nlink > 0777777)
1456		fatal("nlink is too large: %ju", (uintmax_t)h->nlink);
1457	if (h->rdev > 0777777)
1458		fatal("device is too large: %ju", (uintmax_t)h->rdev);
1459	if (h->mtime.tv_sec > MAXTIME)
1460		fatal("mtime is too large: %ju", (uintmax_t)h->mtime.tv_sec);
1461	namesize = h->pathlen;
1462	if (namesize > 0 && h->path[namesize - 1] == '/')
1463		--namesize;
1464	if (namesize > 077777777777 - 1)
1465		fatal("path is too large: %ju", (uintmax_t)h->pathlen + 1);
1466	size = h->type == SYMTYPE ? (uintmax_t)h->linklen : (uintmax_t)h->size;
1467	if (size > MAXSIZE)
1468		fatal("size is too large: %ju", h->size);
1469	len = snprintf(buf, sizeof buf, "070707%.6lo%.6lo%.6lo%.6lo%.6lo%.6lo%.6lo%.11llo%.6lo%.11jo",
1470		(unsigned long)h->dev, ino, mode,
1471		(unsigned long)h->uid, (unsigned long)h->gid,
1472		(unsigned long)h->nlink, (unsigned long)h->rdev,
1473		(unsigned long long)h->mtime.tv_sec, (unsigned long)namesize + 1, size);
1474	assert(len == 76);
1475	if (fwrite(buf, 1, 76, f) != 76)
1476		fatal("write:");
1477	if (fwrite(h->path, 1, namesize, f) != namesize || fputc('\0', f) == EOF)
1478		fatal("write:");
1479	switch (h->type) {
1480	case SYMTYPE:
1481		if (fwrite(h->link, 1, h->linklen, f) != h->linklen)
1482			fatal("write:");
1483		break;
1484	case REGTYPE:
1485		openfile(h);
1486		copy(&bioin, h->size, f, h->size);
1487		closefile(h);
1488		break;
1489	default:
1490		break;
1491	}
1492}
1493
1494static void
1495filepush(struct filelist *files, const char *name, size_t pathlen, dev_t dev)
1496{
1497	struct file *f;
1498	size_t namelen;
1499
1500	namelen = strlen(name);
1501	f = malloc(sizeof *f + namelen + 1);
1502	if (!f)
1503		fatal(NULL);
1504	memcpy(f->name, name, namelen + 1);
1505	f->namelen = namelen;
1506	f->pathlen = pathlen;
1507	f->dev = dev;
1508	f->next = files->pending;
1509	files->pending = f;
1510}
1511
1512static int
1513readfile(struct bufio *f, struct header *h)
1514{
1515	/* use our own path buffer, since we use it for traversal */
1516	static struct strbuf path;
1517	struct stat st;
1518	int flag;
1519	DIR *dir;
1520	struct dirent *d;
1521	ssize_t ret;
1522	dev_t dev;
1523
1524	(void)f;
1525
1526next:
1527	flag = follow == 'L' ? 0 : AT_SYMLINK_NOFOLLOW;
1528	if (files.pending) {
1529		struct file *f;
1530
1531		f = files.pending;
1532		files.pending = f->next;
1533		assert(f->pathlen <= path.len);
1534		path.len = f->pathlen;
1535		sbufcat(&path, f->name, f->namelen, 1024);
1536		if (follow == 'H' && f->pathlen > 0)
1537			flag &= ~AT_SYMLINK_NOFOLLOW;
1538		dev = f->dev;
1539		free(f);
1540	} else {
1541		if (!files.input)
1542			return 0;
1543		ret = getline(&path.str, &path.cap, files.input);
1544		if (ret < 0) {
1545			if (ferror(files.input))
1546				fatal("getline:");
1547			return 0;
1548		}
1549		if (ret > 0 && path.str[ret - 1] == '\n')
1550			path.str[--ret] = '\0';
1551		path.len = ret;
1552		dev = 0;
1553	}
1554
1555	if (fstatat(AT_FDCWD, path.str, &st, flag) != 0)
1556		fatal("stat %s:", path.str);
1557	if (Xflag && dev && st.st_dev != dev)
1558		goto next;
1559	if (S_ISDIR(st.st_mode) && path.str[path.len - 1] != '/')
1560		sbufcat(&path, "/", 1, 1024);
1561	h->fields = PATH | UID | GID | ATIME | MTIME | CTIME;
1562	h->path = path.str;
1563	h->pathlen = path.len;
1564	h->dev = st.st_dev;
1565	h->ino = st.st_ino;
1566	h->mode = st.st_mode & ~S_IFMT;
1567	h->uid = st.st_uid;
1568	h->gid = st.st_gid;
1569	h->nlink = st.st_nlink;
1570	h->rdev = 0;
1571	h->size = 0;
1572	h->atime = st.st_atim;
1573	h->mtime = st.st_mtim;
1574	h->ctime = st.st_ctim;
1575	h->uname = uidtouname(st.st_uid, "");
1576	h->gname = gidtogname(st.st_gid, "");
1577	h->link = "";
1578	h->linklen = 0;
1579	h->slash = NULL;
1580	h->data = NULL;
1581	h->file = h->path;
1582	h->fileatime = st.st_atim;
1583	h->flag = flag;
1584	switch (st.st_mode & S_IFMT) {
1585	case S_IFREG:
1586		h->type = REGTYPE;
1587		h->size = st.st_size;
1588		break;
1589	case S_IFLNK:
1590		h->type = SYMTYPE;
1591		h->linkbuf.len = 0;
1592		sbufalloc(&h->linkbuf, 1024, 1024);
1593		for (;;) {
1594			ret = readlink(h->path, h->linkbuf.str, h->linkbuf.cap - 1);
1595			if (ret < 0)
1596				fatal("readlink %s:", h->path);
1597			if ((size_t)ret < h->linkbuf.cap)
1598				break;
1599			if (h->linkbuf.cap > (size_t)SSIZE_MAX / 2)
1600				fatal("symlink target is too long");
1601			sbufalloc(&h->linkbuf, h->linkbuf.cap * 2, 1024);
1602		}
1603		h->linkbuf.str[ret] = '\0';
1604		h->linkbuf.len = ret;
1605		h->link = h->linkbuf.str;
1606		h->linklen = h->linkbuf.len;
1607		break;
1608	case S_IFCHR:
1609		h->type = CHRTYPE;
1610		h->rdev = st.st_rdev;
1611		break;
1612	case S_IFBLK:
1613		h->type = BLKTYPE;
1614		h->rdev = st.st_rdev;
1615		break;
1616	case S_IFDIR:
1617		h->type = DIRTYPE;
1618		dir = opendir(h->path);
1619		if (!dir)
1620			fatal("opendir %s:", h->path);
1621		for (;;) {
1622			errno = 0;
1623			d = readdir(dir);
1624			if (!d)
1625				break;
1626			if (strcmp(d->d_name, ".") == 0 || strcmp(d->d_name, "..") == 0)
1627				continue;
1628			filepush(&files, d->d_name, path.len, st.st_dev);
1629		}
1630		if (errno != 0)
1631			fatal("readdir %s:", h->path);
1632		closedir(dir);
1633		break;
1634	case S_IFIFO:
1635		h->type = FIFOTYPE;
1636		break;
1637	}
1638	return 1;
1639}
1640
1641static void
1642usage(void)
1643{
1644	fprintf(stderr, "usage: pax\n");
1645	exit(2);
1646}
1647
1648static void
1649print_listopt(struct header *h)
1650{
1651	const char *s;
1652	size_t len;
1653
1654	s = opt.listopt;
1655	while (*s) {
1656		if (*s == '%') {
1657			s++;
1658			if (*s == '%') {
1659				putchar('%');
1660				s++;
1661			} else {
1662				len = 0;
1663				while (s[len] && isalnum((unsigned char)s[len]))
1664					len++;
1665				if (len > 0) {
1666					if (strncmp(s, "path", len) == 0 && len == 4)
1667						printf("%s", h->path);
1668					else if (strncmp(s, "size", len) == 0 && len == 4)
1669						printf("%ju", (uintmax_t)h->size);
1670					else if (strncmp(s, "uid", len) == 0 && len == 3)
1671						printf("%ju", (uintmax_t)h->uid);
1672					else if (strncmp(s, "gid", len) == 0 && len == 3)
1673						printf("%ju", (uintmax_t)h->gid);
1674					else if (strncmp(s, "uname", len) == 0 && len == 5)
1675						printf("%s", h->uname);
1676					else if (strncmp(s, "gname", len) == 0 && len == 5)
1677						printf("%s", h->gname);
1678					else if (strncmp(s, "mode", len) == 0 && len == 4)
1679						printf("%04o", (unsigned int)(h->mode & 07777));
1680					else if (strncmp(s, "mtime", len) == 0 && len == 5)
1681						printf("%ju", (uintmax_t)h->mtime.tv_sec);
1682					else if (strncmp(s, "atime", len) == 0 && len == 5)
1683						printf("%ju", (uintmax_t)h->atime.tv_sec);
1684					else if (strncmp(s, "ctime", len) == 0 && len == 5)
1685						printf("%ju", (uintmax_t)h->ctime.tv_sec);
1686					else if (strncmp(s, "linkpath", len) == 0 && len == 8)
1687						printf("%s", h->link);
1688					else {
1689						putchar('%');
1690						fwrite(s, 1, len, stdout);
1691					}
1692					s += len;
1693				} else {
1694					putchar('%');
1695				}
1696			}
1697		} else if (*s == '\\') {
1698			s++;
1699			if (*s == 'n') {
1700				putchar('\n');
1701				s++;
1702			} else if (*s == 't') {
1703				putchar('\t');
1704				s++;
1705			} else if (*s == '\\') {
1706				putchar('\\');
1707				s++;
1708			} else if (*s) {
1709				putchar(*s);
1710				s++;
1711			}
1712		} else {
1713			putchar(*s);
1714			s++;
1715		}
1716	}
1717	putchar('\n');
1718}
1719
1720static void
1721parseopts(char *s)
1722{
1723	char *key, *val, *end, *d;
1724	int ext;
1725
1726	for (;;) {
1727		s += strspn(s, " \t\n\v\f\r");
1728		if (!*s)
1729			break;
1730		key = s;
1731		while (*s && *s != ',' && *s != '=')
1732			++s;
1733		val = NULL;
1734		end = NULL, ext = 0;  /* silence gcc uninitialized warning */
1735		if (*s == '=') {
1736			ext = s > key && s[-1] == ':';
1737			s[-ext] = '\0';
1738			val = ++s;
1739			for (d = s; *s && *s != ','; ++s, ++d) {
1740				if (*s == '\\')
1741					++s;
1742				if (d < s)
1743					*d = *s;
1744			}
1745			end = d;
1746		}
1747		if (*s == ',')
1748			*s++ = '\0';
1749		if (strcmp(key, "linkdata") == 0) {
1750			if (val)
1751				fatal("option 'linkdata' must not have a value");
1752			opt.linkdata = 1;
1753		} else if (strcmp(key, "times") == 0) {
1754			if (val)
1755				fatal("option 'times' must not have a value");
1756			opt.times = 1;
1757		} else if (!val) {
1758			fatal("option '%s' must have a value", key);
1759		} else if (strcmp(key, "delete") == 0) {
1760			const struct keyword *kw;
1761
1762			for (kw = keywords; kw != keywords + LEN(keywords); ++kw) {
1763				switch (fnmatch(val, kw->name, 0)) {
1764				case 0: opt.delete |= kw->field; break;
1765				case FNM_NOMATCH: break;
1766				default: fatal("fnmatch error");
1767				}
1768			}
1769		} else if (strcmp(key, "exthdr.name") == 0) {
1770			opt.exthdrname = val;
1771		} else if (strcmp(key, "globexthdr.name") == 0) {
1772			opt.globexthdrname = val;
1773		} else if (strcmp(key, "invalid") == 0) {
1774			if (strcmp(val, "bypass") != 0 && strcmp(val, "rename") != 0 &&
1775			    strcmp(val, "UTF-8") != 0 && strcmp(val, "write") != 0) {
1776				fatal("invalid action '%s' for option 'invalid'", val);
1777			}
1778			opt.invalid = val;
1779		} else if (strcmp(key, "listopt") == 0) {
1780			opt.listopt = val;
1781		} else {
1782			extkeyval(ext ? &exthdr : &globexthdr, key, val, end - val);
1783		}
1784	}
1785}
1786
1787static void
1788listhdr(FILE *f, struct header *h)
1789{
1790	char mode[11], time[13], info[23];
1791	char unamebuf[(sizeof(uid_t) * CHAR_BIT + 2) / 3 + 1];
1792	char gnamebuf[(sizeof(gid_t) * CHAR_BIT + 2) / 3 + 1];
1793	const char *uname, *gname, *timefmt;
1794	struct tm *tm;
1795
1796	if (!h)
1797		return;
1798	(void)f;
1799	if (opt.listopt) {
1800		print_listopt(h);
1801		return;
1802	}
1803	if (!vflag) {
1804		printf("%s\n", h->path);
1805		return;
1806	}
1807	memset(mode, '-', sizeof mode - 1);
1808	mode[10] = '\0';
1809	switch (h->type) {
1810	case SYMTYPE: mode[0] = 'l'; break;
1811	case CHRTYPE: mode[0] = 'c'; break;
1812	case BLKTYPE: mode[0] = 'b'; break;
1813	case DIRTYPE: mode[0] = 'd'; break;
1814	case FIFOTYPE: mode[0] = 'p'; break;
1815	}
1816	if (h->mode & S_IRUSR) mode[1] = 'r';
1817	if (h->mode & S_IWUSR) mode[2] = 'w';
1818	if (h->mode & S_IXUSR) mode[3] = 'x';
1819	if (h->mode & S_IRGRP) mode[4] = 'r';
1820	if (h->mode & S_IWGRP) mode[5] = 'w';
1821	if (h->mode & S_IXGRP) mode[6] = 'x';
1822	if (h->mode & S_IROTH) mode[7] = 'r';
1823	if (h->mode & S_IWOTH) mode[8] = 'w';
1824	if (h->mode & S_IXOTH) mode[9] = 'x';
1825	if (h->mode & S_ISUID) mode[3] = mode[3] == 'x' ? 's' : 'S';
1826	if (h->mode & S_ISGID) mode[3] = mode[6] == 'x' ? 's' : 'S';
1827	if (h->mode & S_ISVTX) mode[9] = mode[9] == 'x' ? 't' : 'T';
1828	uname = h->uname;
1829	if (!uname[0]) {
1830		snprintf(unamebuf, sizeof unamebuf, "%ju", (uintmax_t)h->uid);
1831		uname = unamebuf;
1832	}
1833	gname = h->gname;
1834	if (!gname[0]) {
1835		snprintf(gnamebuf, sizeof gnamebuf, "%ju", (uintmax_t)h->gid);
1836		gname = gnamebuf;
1837	}
1838	timefmt = h->mtime.tv_sec + 15780000 < curtime || h->mtime.tv_sec > curtime
1839		? "%b %e  %Y" : "%b %e %H:%M";
1840	tm = localtime(&h->mtime.tv_sec);
1841	if (!tm)
1842		fatal("localtime:");
1843	strftime(time, sizeof time, timefmt, tm);
1844	if (h->type == CHRTYPE || h->type == BLKTYPE)
1845		snprintf(info, sizeof info, "%u, %u", major(h->rdev), minor(h->rdev));
1846	else
1847		snprintf(info, sizeof info, "%ju", (uintmax_t)h->size);
1848	printf("%s %2ju %-8s %-8s %9s %s %s", mode, (uintmax_t)h->nlink, uname, gname, info, time, h->path);
1849	switch (h->type) {
1850	case LNKTYPE: printf(" == %s", h->link); break;
1851	case SYMTYPE: printf(" -> %s", h->link); break;
1852	}
1853	putchar('\n');
1854}
1855
1856static void
1857mkdirp(int fd, char *name, size_t len)
1858{
1859	char *p;
1860
1861	if (len == 0)
1862		return;
1863	for (p = name + 1; p < name + len - 1; ++p) {
1864		if (*p != '/')
1865			continue;
1866		*p = 0;
1867		if (mkdirat(fd, name, 0777) != 0 && errno != EEXIST)
1868			fatal("mkdir %s:", name);
1869		*p = '/';
1870	}
1871}
1872
1873static void
1874writefile(FILE *unused, struct header *h)
1875{
1876	FILE *f;
1877	int fd, retry, flags;
1878	struct stat st;
1879	mode_t mode;
1880
1881	(void)unused;
1882	if (!h)
1883		return;
1884	if (uflag && fstatat(destfd, h->path, &st, 0) == 0) {
1885		if (h->mtime.tv_sec < st.st_mtime || (h->mtime.tv_sec == st.st_mtime
1886		 && h->mtime.tv_nsec < st.st_mtim.tv_nsec))
1887			return;
1888	}
1889	if (vflag)
1890		fprintf(stderr, "%s\n", h->path);
1891	if (lflag && h->file && h->type != DIRTYPE) {
1892		if (linkat(AT_FDCWD, h->file, destfd, h->path, h->flag) == 0)
1893			return;
1894	}
1895	retry = 1;
1896	if (0) {
1897	retry:
1898		retry = 0;
1899		mkdirp(destfd, h->path, h->pathlen);
1900	}
1901	mode = h->mode & ~(S_ISUID | S_ISGID);
1902	switch (h->type) {
1903	case REGTYPE:
1904		flags = O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC;
1905		if (kflag)
1906			flags |= O_EXCL;
1907		fd = openat(destfd, h->path, flags, mode);
1908		if (fd < 0) {
1909			if (retry && errno == ENOENT)
1910				goto retry;
1911			fatal("open %s%s:", dest, h->path);
1912		}
1913		f = fdopen(fd, "w");
1914		if (!f)
1915			fatal("open %s:", h->path);
1916		openfile(h);
1917		copy(&bioin, h->size, f, h->size);
1918		closefile(h);
1919		fclose(f);
1920		break;
1921	case LNKTYPE:
1922		if (linkat(destfd, h->link, destfd, h->path, 0) != 0) {
1923			if (retry && errno == ENOENT)
1924				goto retry;
1925			fatal("link %s%s:", dest, h->path);
1926		}
1927		break;
1928	case SYMTYPE:
1929		if (symlinkat(h->link, destfd, h->path) != 0) {
1930			if (retry && errno == ENOENT)
1931				goto retry;
1932			fatal("symlink %s%s:", dest, h->path);
1933		}
1934		break;
1935	case CHRTYPE:
1936	case BLKTYPE:
1937		mode |= h->type == CHRTYPE ? S_IFCHR : S_IFBLK;
1938		if (mknodat(destfd, h->path, mode, h->rdev) != 0) {
1939			if (retry && errno == ENOENT)
1940				goto retry;
1941			fatal("mknod %s%s:", dest, h->path);
1942		}
1943		break;
1944	case DIRTYPE:
1945		if (mkdirat(destfd, h->path, mode) != 0) {
1946			if (retry && errno == ENOENT)
1947				goto retry;
1948			if (errno == EEXIST) {
1949				if (fstatat(destfd, h->path, &st, 0) == 0 && S_ISDIR(st.st_mode))
1950					break;
1951				errno = EEXIST;
1952			}
1953			fatal("mkdir %s%s:", dest, h->path);
1954		}
1955		break;
1956	case FIFOTYPE:
1957		if (mkfifoat(destfd, h->path, mode) != 0) {
1958			if (retry && errno == ENOENT)
1959				goto retry;
1960			if (errno == EEXIST) {
1961				if (fstatat(destfd, h->path, &st, 0) == 0 && S_ISFIFO(st.st_mode))
1962					break;
1963				errno = EEXIST;
1964			}
1965			fatal("mkfifo %s%s:", dest, h->path);
1966		}
1967		break;
1968	}
1969	if (preserve & (ATIME | MTIME)) {
1970		struct timespec ts[2];
1971
1972		ts[0] = preserve & ATIME ? h->atime : (struct timespec){.tv_nsec = UTIME_OMIT};
1973		ts[1] = preserve & MTIME ? h->mtime : (struct timespec){.tv_nsec = UTIME_OMIT};
1974		if (utimensat(destfd, h->path, ts, AT_SYMLINK_NOFOLLOW) != 0) {
1975			fprintf(stderr, "utimens %s%s: %s\n", dest, h->path, strerror(errno));
1976			exitstatus = 1;
1977		}
1978	}
1979	if (preserve & (UID | GID)) {
1980		uid_t uid;
1981		gid_t gid;
1982
1983		uid = preserve & UID ? unametouid(h->uname, h->uid) : (uid_t)-1;
1984		gid = preserve & GID ? gnametogid(h->gname, h->gid) : (gid_t)-1;
1985		if (fchownat(destfd, h->path, uid, gid, 0) != 0) {
1986			fprintf(stderr, "chown %s%s: %s\n", dest, h->path, strerror(errno));
1987			exitstatus = 1;
1988		} else {
1989			/* add back setuid/setgid bits if we preserved the uid/gid */
1990			mode = h->mode;
1991		}
1992	}
1993	if (preserve & MODE && h->type != SYMTYPE) {
1994		if (fchmodat(destfd, h->path, mode, 0) != 0) {
1995			fprintf(stderr, "chmod %s%s: %s\n", dest, h->path, strerror(errno));
1996			exitstatus = 1;
1997		}
1998	}
1999}
2000
2001static int
2002match(struct header *h)
2003{
2004	static struct dir {
2005		char *path;
2006		size_t pathlen;
2007	} *dirs;
2008	static size_t dirslen;
2009	size_t i;
2010
2011	if (patslen == 0)
2012		return 1;
2013	if (!dflag) {
2014		struct dir *d;
2015
2016		for (d = dirs; d < dirs + dirslen; ++d) {
2017			if (h->pathlen >= d->pathlen && memcmp(h->path, d->path, d->pathlen) == 0)
2018				return !cflag;
2019		}
2020	}
2021	for (i = 0; i < patslen; ++i) {
2022		if (nflag && patsused[i])
2023			continue;
2024		switch (fnmatch(pats[i], h->path, FNM_PATHNAME | FNM_PERIOD)) {
2025		case 0:
2026			patsused[i] = 1;
2027			if (!dflag && h->type == DIRTYPE) {
2028				struct dir *d;
2029
2030				if ((dirslen & (dirslen - 1)) == 0) {
2031					dirs = reallocarray(dirs, dirslen ? dirslen * 2 : 32, sizeof *dirs);
2032					if (!dirs)
2033						fatal(NULL);
2034				}
2035				d = &dirs[dirslen++];
2036				d->pathlen = h->pathlen;
2037				d->path = malloc(d->pathlen + 1);
2038				if (!d->path)
2039					fatal(NULL);
2040				memcpy(d->path, h->path, h->pathlen);
2041				/* add trailing slash if not already present */
2042				if (d->path[d->pathlen - 1] != '/')
2043					d->path[d->pathlen++] = '/';
2044			}
2045			return !cflag;
2046		case FNM_NOMATCH:
2047			break;
2048		default:
2049			fatal("fnmatch error");
2050		}
2051	}
2052	return cflag;
2053}
2054
2055static void
2056parsereplstr(char *str)
2057{
2058	static struct replstr **end = &replstr;
2059	struct replstr *r;
2060	char *old, *new, delim;
2061	int err;
2062
2063	delim = str[0];
2064	if (!delim)
2065		usage();
2066	old = str + 1;
2067	str = strchr(old, delim);
2068	if (!str)
2069		usage();
2070	*str = 0;
2071	new = str + 1;
2072	str = strchr(new, delim);
2073	if (!str)
2074		usage();
2075	*str = 0;
2076
2077	r = malloc(sizeof *r);
2078	if (!r)
2079		fatal(NULL);
2080	r->next = NULL;
2081	r->global = 0;
2082	r->print = 0;
2083	r->symlink = 0;
2084	for (;;) {
2085		switch (*++str) {
2086		case 'g': r->global = 1; break;
2087		// ?man -p: preserve file attributes
2088	case 'p': r->print = 1; break;
2089		// ?man -s: silent mode or print summary
2090	case 's': r->symlink = 0; break;
2091		case 'S': r->symlink = 1; break;
2092		case 0: goto done;
2093		}
2094	}
2095done:
2096	err = regcomp(&r->old, old, REG_NEWLINE);
2097	if (err != 0) {
2098		char errbuf[256];
2099
2100		regerror(err, &r->old, errbuf, sizeof errbuf);
2101		fatal("invalid regular expression: %s", errbuf);
2102	}
2103	r->new = new;
2104	*end = r;
2105	end = &r->next;
2106}
2107
2108static int
2109applyrepl(struct replstr *r, struct strbuf *b, const char *old, size_t oldlen)
2110{
2111	regmatch_t match[10];
2112	size_t i, n, l;
2113	const char *s, *p;
2114	char *d;
2115	int flags;
2116
2117	flags = 0;
2118	b->len = 0;
2119	p = old;
2120	while (regexec(&r->old, p, LEN(match), match, flags) == 0) {
2121		n = match[0].rm_so;
2122		for (s = r->new; *s; ++s) {
2123			switch (*s) {
2124			case '&':  i = 0; break;
2125			case '\\': i = *++s - '0'; break;
2126			default:   i = -1; break;
2127			}
2128			n += i <= 9 ? match[i].rm_eo - match[i].rm_so : 1;
2129		}
2130		d = sbufalloc(b, n + 1, 1024);
2131		b->len += n;
2132		memcpy(d, p, match[0].rm_so);
2133		d += match[0].rm_so;
2134		for (s = r->new; *s; ++s) {
2135			switch (*s) {
2136			case '&':  i = 0; break;
2137			case '\\': i = *++s - '0'; break;
2138			default:   i = -1; break;
2139			}
2140			if (i <= 9) {
2141				l = match[i].rm_eo - match[i].rm_so;
2142				memcpy(d, p + match[i].rm_so, l);
2143				d += l;
2144			} else {
2145				*d++ = *s;
2146			}
2147		}
2148		flags |= REG_NOTBOL;
2149		p += match[0].rm_eo;
2150		if (!r->global)
2151			break;
2152	}
2153	if (flags == 0)
2154		return 0;
2155	sbufcat(b, p, oldlen - (p - old), 1024);
2156	if (r->print)
2157		fprintf(stderr, "%s >> %s\n", old, b->str);
2158	return 1;
2159}
2160
2161static void
2162replace(struct header *h)
2163{
2164	static struct strbuf path, link;
2165	struct replstr *r;
2166
2167	for (r = replstr; r; r = r->next) {
2168		if (applyrepl(r, &path, h->path, h->pathlen)) {
2169			h->path = path.str;
2170			h->pathlen = path.len;
2171			break;
2172		}
2173	}
2174	if (h->type != LNKTYPE && h->type != SYMTYPE)
2175		return;
2176	for (r = replstr; r; r = r->next) {
2177		if (h->type == SYMTYPE && !r->symlink)
2178			continue;
2179		if (applyrepl(r, &link, h->link, h->linklen)) {
2180			h->link = link.str;
2181			h->linklen = link.len;
2182			break;
2183		}
2184	}
2185}
2186
2187static int
2188is_invalid_name(const char *name)
2189{
2190	unsigned char c;
2191
2192	if (!name || !*name)
2193		return 1;
2194	for (; *name; name++) {
2195		c = (unsigned char)*name;
2196		if (c < 32 || c >= 127)
2197			return 1;
2198	}
2199	return 0;
2200}
2201
2202static void
2203interactiverename(struct header *h)
2204{
2205	static FILE *ttyfp = NULL;
2206	static char ttybuf[1024];
2207	char *res;
2208
2209	if (!iflag)
2210		return;
2211
2212	if (!ttyfp) {
2213		ttyfp = fopen("/dev/tty", "r+");
2214		if (!ttyfp)
2215			ttyfp = stdin;
2216	}
2217
2218	fprintf(stderr, "rename %s? ", h->path);
2219	fflush(stderr);
2220
2221	if (ttyfp == stdin)
2222		res = fgets(ttybuf, sizeof(ttybuf), stdin);
2223	else
2224		res = fgets(ttybuf, sizeof(ttybuf), ttyfp);
2225
2226	if (!res) {
2227		h->path = "";
2228		h->pathlen = 0;
2229		return;
2230	}
2231
2232	ttybuf[strcspn(ttybuf, "\n")] = '\0';
2233
2234	if (ttybuf[0] == '\0')
2235		return;
2236
2237	if (strcmp(ttybuf, ".") == 0) {
2238		h->path = "";
2239		h->pathlen = 0;
2240		return;
2241	}
2242
2243	h->pathbuf.len = 0;
2244	sbufcat(&h->pathbuf, ttybuf, strlen(ttybuf), 1024);
2245	h->path = h->pathbuf.str;
2246	h->pathlen = h->pathbuf.len;
2247}
2248
2249static void
2250checkinvalid(struct header *h)
2251{
2252	int saved_iflag;
2253
2254	if (!opt.invalid)
2255		return;
2256	if (is_invalid_name(h->path)) {
2257		if (strcmp(opt.invalid, "bypass") == 0) {
2258			h->path = "";
2259			h->pathlen = 0;
2260		} else if (strcmp(opt.invalid, "rename") == 0) {
2261			saved_iflag = iflag;
2262			iflag = 1;
2263			interactiverename(h);
2264			iflag = saved_iflag;
2265		}
2266	}
2267}
2268
2269static off_t
2270locate_tar_end(const char *filename)
2271{
2272	char buf[512];
2273	off_t offset = 0;
2274	int zero_blocks = 0;
2275	int fd, i, is_zero;
2276	ssize_t r;
2277
2278	fd = open(filename, O_RDONLY);
2279	if (fd < 0) {
2280		if (errno == ENOENT)
2281			return 0;
2282		fatal("open %s for append check:", filename);
2283	}
2284
2285	while ((r = read(fd, buf, 512)) == 512) {
2286		is_zero = 1;
2287		for (i = 0; i < 512; i++) {
2288			if (buf[i] != 0) {
2289				is_zero = 0;
2290				break;
2291			}
2292		}
2293		if (is_zero) {
2294			zero_blocks++;
2295			if (zero_blocks == 2) {
2296				close(fd);
2297				return offset;
2298			}
2299		} else {
2300			zero_blocks = 0;
2301		}
2302		offset += 512;
2303	}
2304
2305	close(fd);
2306	return offset;
2307}
2308
2309static void
2310handle_append(const char *filename, const char *algo, const char *format)
2311{
2312	off_t offset;
2313	int fd;
2314
2315	if (!aflag)
2316		return;
2317	if (algo)
2318		fatal("cannot append to compressed archives");
2319	if (strcmp(format, "ustar") != 0 && strcmp(format, "pax") != 0)
2320		fatal("append is only supported for ustar and pax formats");
2321
2322	if (filename) {
2323		offset = locate_tar_end(filename);
2324		if (offset > 0) {
2325			fd = open(filename, O_RDWR);
2326			if (fd >= 0) {
2327				if (ftruncate(fd, offset) != 0)
2328					fatal("ftruncate %s for append:", filename);
2329				close(fd);
2330			}
2331		}
2332	}
2333}
2334
2335// ?man pax: portable archive interchange
2336// ?man read, write, and list member files of archive files
2337int
2338main(int argc, char *argv[])
2339{
2340	const char *name = NULL, *arg, *format = "pax";
2341	const char *algo = NULL;
2342	enum mode mode = LIST;
2343	struct header hdr;
2344	readfn *readhdr = NULL;
2345	writefn *writehdr = listhdr;
2346	FILE *out = NULL;
2347	pid_t pid = -1;
2348	int i;
2349	size_t l;
2350
2351	ARGBEGIN {
2352	// ?man -a: print or show all entries
2353	case 'a':
2354		aflag = 1;
2355		break;
2356	// ?man -b:str: specify block size or base directory
2357	case 'b':
2358		EARGF(usage());
2359		break;
2360	// ?man -c: print count or perform stdout action
2361	case 'c':
2362		cflag = 1;
2363		break;
2364	// ?man -d: specify directory
2365	case 'd':
2366		dflag = 1;
2367		break;
2368	// ?man -f:str: force the operation
2369	case 'f':
2370		name = EARGF(usage());
2371		break;
2372	// ?man -H: specify option flag
2373	case 'H':
2374		follow = 'H';
2375		break;
2376	// ?man -i: interactive mode or prompt for confirmation
2377	case 'i':
2378		iflag = 1;
2379		break;
2380	// ?man -j: specify option flag
2381	case 'j':
2382		algo = "bzip2";
2383		break;
2384	// ?man -J: specify option flag
2385	case 'J':
2386		algo = "xz";
2387		break;
2388	// ?man -k: specify option flag
2389	case 'k':
2390		kflag = 1;
2391		break;
2392	// ?man -l: list in long format
2393	case 'l':
2394		lflag = 1;
2395		break;
2396	// ?man -L: specify option flag
2397	case 'L':
2398		follow = 'L';
2399		break;
2400	// ?man -n: print line numbers or counts
2401	case 'n':
2402		nflag = 1;
2403		break;
2404	// ?man -o:str: specify output file
2405	case 'o':
2406		parseopts(EARGF(usage()));
2407		break;
2408	// ?man -p:str: preserve file attributes
2409	case 'p':
2410		for (arg = EARGF(usage()); *arg; ++arg) {
2411			switch (*arg) {
2412	// ?man -a: print or show all entries
2413	case 'a': preserve &= ~ATIME; break;
2414	// ?man -e: specify expression or pattern
2415	case 'e': preserve = ~0; break;
2416	// ?man -m: specify mode or limit
2417	case 'm': preserve &= ~MTIME; break;
2418	// ?man -o: specify output file
2419	case 'o': preserve |= UID | GID; break;
2420	// ?man -p: preserve file attributes
2421	case 'p': preserve |= MODE; break;
2422			default: fatal("unknown -p option");
2423			}
2424		}
2425		break;
2426	// ?man -r: operate recursively
2427	case 'r':
2428		mode |= READ;
2429		break;
2430	// ?man -s:str: silent mode or print summary
2431	case 's':
2432		parsereplstr(EARGF(usage()));
2433		break;
2434	// ?man -t: sort or specify timestamp
2435	case 't':
2436		tflag = 1;
2437		break;
2438	// ?man -u: unbuffered output
2439	case 'u':
2440		uflag = 1;
2441		break;
2442	// ?man -v: verbose mode; show progress
2443	case 'v':
2444		vflag = 1;
2445		break;
2446	// ?man -w: wait for completion
2447	case 'w':
2448		mode |= WRITE;
2449		break;
2450	// ?man -x:str: hex format or match whole lines
2451	case 'x':
2452		format = EARGF(usage());
2453		break;
2454	// ?man -X: specify option flag
2455	case 'X':
2456		Xflag = 1;
2457		break;
2458	// ?man -z: specify option flag
2459	case 'z':
2460		algo = "gzip";
2461		break;
2462	default:
2463		usage();
2464	} ARGEND;
2465
2466	curtime = time(NULL);
2467	if (curtime == (time_t)-1)
2468		fatal("time:");
2469	exthdr.fields &= ~opt.delete;
2470	exthdr.delete = exthdr.fields;
2471	globexthdr.fields &= ~opt.delete;
2472	globexthdr.delete = globexthdr.fields;
2473	if ((exthdr.fields | globexthdr.fields | opt.delete) & SIZE)
2474		fatal("field 'size' cannot be overridden or deleted");
2475
2476	switch (mode) {
2477	case READ:
2478		writehdr = writefile;
2479		/* fallthrough */
2480	case LIST:
2481		if (name && strcmp(name, "-") != 0) {
2482			bioin.fd = open(name, O_RDONLY);
2483			if (bioin.fd < 0)
2484				fatal("open %s:", name);
2485		}
2486		readhdr = detectformat(&bioin, algo, &pid);
2487		if (!readhdr)
2488			fatal("could not detect archive format");
2489		if (argc) {
2490			pats = argv;
2491			patslen = argc;
2492			patsused = calloc(1, argc);
2493			if (!patsused)
2494				fatal(NULL);
2495		}
2496		break;
2497	case WRITE:
2498		if (name && strcmp(name, "-") == 0)
2499			name = NULL;
2500		handle_append(name, algo, format);
2501		out = compress(algo, name, &pid);
2502		if (strcmp(format, "ustar") == 0) {
2503			writehdr = writeustar;
2504		} else if (strcmp(format, "pax") == 0) {
2505			writehdr = writepax;
2506			if (globexthdr.fields)
2507				writeexthdr(stdout, 'g', &globexthdr);
2508		} else if (strcmp(format, "cpio") == 0) {
2509			writehdr = writecpio;
2510		} else {
2511			fatal("unsupported archive format '%s'", format);
2512		}
2513		break;
2514	case COPY:
2515		if (name || argc == 0)
2516			usage();
2517		l = strlen(argv[--argc]);
2518		dest = malloc(l + 2);
2519		if (!dest)
2520			fatal(NULL);
2521		memcpy(dest, argv[argc], l);
2522		memcpy(dest + l, "/", 2);
2523		destfd = open(dest, O_SEARCH|O_DIRECTORY);
2524		if (destfd < 0)
2525			fatal("open %s:", dest);
2526		writehdr = writefile;
2527		break;
2528	}
2529	if (mode & WRITE) {
2530		readhdr = readfile;
2531		bioin.fd = -1;
2532		for (i = 0; i < argc; ++i)
2533			filepush(&files, argv[i], 0, 0);
2534		if (argc == 0)
2535			files.input = stdin;
2536	}
2537
2538	memset(&hdr, 0, sizeof hdr);
2539	while (readhdr(&bioin, &hdr)) {
2540		mergehdr(&hdr, &exthdr, ~0);
2541		mergehdr(&hdr, &globexthdr, ~exthdr.fields);
2542		if (match(&hdr)) {
2543			replace(&hdr);
2544			checkinvalid(&hdr);
2545			interactiverename(&hdr);
2546			if (*hdr.path)
2547				writehdr(out, &hdr);
2548		}
2549	}
2550	writehdr(out, NULL);
2551	if (out) {
2552		if (fflush(out) != 0)
2553			fatal("write:");
2554		fclose(out);
2555	}
2556	for (i = 0; i < (int)patslen; ++i) {
2557		if (!patsused[i])
2558			fatal("pattern not matched: %s", pats[i]);
2559	}
2560
2561	if (pid != -1) {
2562		int st;
2563
2564		if (waitpid(pid, &st, 0) == -1)
2565			fatal("waitpid:");
2566		if (WIFEXITED(st) && WEXITSTATUS(st) != 0)
2567			fatal("child exited with status %d", WEXITSTATUS(st));
2568		if (WIFSIGNALED(st))
2569			fatal("child terminated by signal %d", WTERMSIG(st));
2570	}
2571	return exitstatus;
2572}