master xplshn/aruu / cmd / posix / awk / run.c
   1/****************************************************************
   2Copyright (C) Lucent Technologies 1997
   3All Rights Reserved
   4
   5Permission to use, copy, modify, and distribute this software and
   6its documentation for any purpose and without fee is hereby
   7granted, provided that the above copyright notice appear in all
   8copies and that both that the copyright notice and this
   9permission notice and warranty disclaimer appear in supporting
  10documentation, and that the name Lucent Technologies or any of
  11its entities not be used in advertising or publicity pertaining
  12to distribution of the software without specific, written prior
  13permission.
  14
  15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
  16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
  17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
  18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
  22THIS SOFTWARE.
  23****************************************************************/
  24
  25#ifdef __GNUC__
  26#pragma GCC diagnostic ignored "-Wunused-parameter"
  27#endif
  28
  29#define DEBUG
  30#include <stdio.h>
  31#include <ctype.h>
  32#include <wctype.h>
  33#include <fcntl.h>
  34#include <setjmp.h>
  35#include <limits.h>
  36#include <math.h>
  37#include <string.h>
  38#include <stdlib.h>
  39#include <time.h>
  40#include <sys/types.h>
  41#include <sys/stat.h>
  42#include <sys/wait.h>
  43#include "awk.h"
  44#include "awkgram.tab.h"
  45
  46
  47static void stdinit(void);
  48static void flush_all(void);
  49static char *wide_char_to_byte_str(int rune, size_t *outlen);
  50
  51#if 1
  52#define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
  53#else
  54void tempfree(Cell *p) {
  55	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
  56		WARNING("bad csub %d in Cell %d %s",
  57			p->csub, p->ctype, p->sval);
  58	}
  59	if (istemp(p))
  60		tfree(p);
  61}
  62#endif
  63
  64/* do we really need these? */
  65/* #ifdef _NFILE */
  66/* #ifndef FOPEN_MAX */
  67/* #define FOPEN_MAX _NFILE */
  68/* #endif */
  69/* #endif */
  70
  71/* #ifndef	FOPEN_MAX */
  72/* #define	FOPEN_MAX	40 */	/* max number of open files */
  73/* #endif */
  74
  75
  76jmp_buf env;
  77extern	int	pairstack[];
  78extern	Awkfloat	srand_seed;
  79
  80Node	*winner = NULL;	/* root of parse tree */
  81Cell	*tmps;		/* free temporary cells for execution */
  82
  83static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
  84Cell	*True	= &truecell;
  85static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
  86Cell	*False	= &falsecell;
  87static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
  88Cell	*jbreak	= &breakcell;
  89static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
  90Cell	*jcont	= &contcell;
  91static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
  92Cell	*jnext	= &nextcell;
  93static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
  94Cell	*jnextfile	= &nextfilecell;
  95static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
  96Cell	*jexit	= &exitcell;
  97static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
  98Cell	*jret	= &retcell;
  99static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
 100
 101Node	*curnode = NULL;	/* the node being executed, for debugging */
 102
 103/* buffer memory management */
 104int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
 105	const char *whatrtn)
 106/* pbuf:    address of pointer to buffer being managed
 107 * psiz:    address of buffer size variable
 108 * minlen:  minimum length of buffer needed
 109 * quantum: buffer size quantum
 110 * pbptr:   address of movable pointer into buffer, or 0 if none
 111 * whatrtn: name of the calling routine if failure should cause fatal error
 112 *
 113 * return   0 for realloc failure, !=0 for success
 114 */
 115{
 116	if (minlen > *psiz) {
 117		char *tbuf;
 118		int rminlen = quantum ? minlen % quantum : 0;
 119		int boff = pbptr ? *pbptr - *pbuf : 0;
 120		/* round up to next multiple of quantum */
 121		if (rminlen)
 122			minlen += quantum - rminlen;
 123		tbuf = (char *) realloc(*pbuf, minlen);
 124		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
 125		if (tbuf == NULL) {
 126			if (whatrtn)
 127				FATAL("out of memory in %s", whatrtn);
 128			return 0;
 129		}
 130		*pbuf = tbuf;
 131		*psiz = minlen;
 132		if (pbptr)
 133			*pbptr = tbuf + boff;
 134	}
 135	return 1;
 136}
 137
 138void run(Node *a)	/* execution of parse tree starts here */
 139{
 140
 141	stdinit();
 142	execute(a);
 143	closeall();
 144}
 145
 146Cell *execute(Node *u)	/* execute a node of the parse tree */
 147{
 148	Cell *(*proc)(Node **, int);
 149	Cell *x;
 150	Node *a;
 151
 152	if (u == NULL)
 153		return(True);
 154	for (a = u; ; a = a->nnext) {
 155		curnode = a;
 156		if (isvalue(a)) {
 157			x = (Cell *) (a->narg[0]);
 158			if (isfld(x) && !donefld)
 159				fldbld();
 160			else if (isrec(x) && !donerec)
 161				recbld();
 162			return(x);
 163		}
 164		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
 165			FATAL("illegal statement");
 166		proc = proctab[a->nobj-FIRSTTOKEN];
 167		x = (*proc)(a->narg, a->nobj);
 168		if (isfld(x) && !donefld)
 169			fldbld();
 170		else if (isrec(x) && !donerec)
 171			recbld();
 172		if (isexpr(a))
 173			return(x);
 174		if (isjump(x))
 175			return(x);
 176		if (a->nnext == NULL)
 177			return(x);
 178		tempfree(x);
 179	}
 180}
 181
 182
 183Cell *program(Node **a, int n)	/* execute an awk program */
 184{				/* a[0] = BEGIN, a[1] = body, a[2] = END */
 185	Cell *x;
 186
 187	if (setjmp(env) != 0)
 188		goto ex;
 189	if (a[0]) {		/* BEGIN */
 190		x = execute(a[0]);
 191		if (isexit(x))
 192			return(True);
 193		if (isjump(x))
 194			FATAL("illegal break, continue, next or nextfile from BEGIN");
 195		tempfree(x);
 196	}
 197	if (a[1] || a[2])
 198		while (getrec(&record, &recsize, true) > 0) {
 199			x = execute(a[1]);
 200			if (isexit(x))
 201				break;
 202			tempfree(x);
 203		}
 204  ex:
 205	if (setjmp(env) != 0)	/* handles exit within END */
 206		goto ex1;
 207	if (a[2]) {		/* END */
 208		x = execute(a[2]);
 209		if (isbreak(x) || isnext(x) || iscont(x))
 210			FATAL("illegal break, continue, next or nextfile from END");
 211		tempfree(x);
 212	}
 213  ex1:
 214	return(True);
 215}
 216
 217struct Frame {	/* stack frame for awk function calls */
 218	int nargs;	/* number of arguments in this call */
 219	Cell *fcncell;	/* pointer to Cell for function */
 220	Cell **args;	/* pointer to array of arguments after execute */
 221	Cell *retval;	/* return value */
 222};
 223
 224#define	NARGS	50	/* max args in a call */
 225
 226struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
 227int	nframe = 0;		/* number of frames allocated */
 228struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
 229
 230Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
 231{
 232	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
 233	int i, ncall, ndef;
 234	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
 235	Node *x;
 236	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
 237	Cell *y, *z, *fcn;
 238	char *s;
 239
 240	fcn = execute(a[0]);	/* the function itself */
 241	s = fcn->nval;
 242	if (!isfcn(fcn))
 243		FATAL("calling undefined function %s", s);
 244	if (frame == NULL) {
 245		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
 246		if (frame == NULL)
 247			FATAL("out of space for stack frames calling %s", s);
 248	}
 249	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
 250		ncall++;
 251	ndef = (int) fcn->fval;			/* args in defn */
 252	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
 253	if (ncall > ndef)
 254		WARNING("function %s called with %d args, uses only %d",
 255			s, ncall, ndef);
 256	if (ncall + ndef > NARGS)
 257		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
 258	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
 259		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
 260		y = execute(x);
 261		oargs[i] = y;
 262		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
 263			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
 264		if (isfcn(y))
 265			FATAL("can't use function %s as argument in %s", y->nval, s);
 266		if (isarr(y))
 267			args[i] = y;	/* arrays by ref */
 268		else
 269			args[i] = copycell(y);
 270		tempfree(y);
 271	}
 272	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
 273		args[i] = gettemp();
 274		*args[i] = newcopycell;
 275	}
 276	frp++;	/* now ok to up frame */
 277	if (frp >= frame + nframe) {
 278		int dfp = frp - frame;	/* old index */
 279		frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
 280		if (frame == NULL)
 281			FATAL("out of space for stack frames in %s", s);
 282		frp = frame + dfp;
 283	}
 284	frp->fcncell = fcn;
 285	frp->args = args;
 286	frp->nargs = ndef;	/* number defined with (excess are locals) */
 287	frp->retval = gettemp();
 288
 289	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
 290	y = execute((Node *)(fcn->sval));	/* execute body */
 291	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
 292
 293	for (i = 0; i < ndef; i++) {
 294		Cell *t = frp->args[i];
 295		if (isarr(t)) {
 296			if (t->csub == CCOPY) {
 297				if (i >= ncall) {
 298					freesymtab(t);
 299					t->csub = CTEMP;
 300					tempfree(t);
 301				} else {
 302					oargs[i]->tval = t->tval;
 303					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
 304					oargs[i]->sval = t->sval;
 305					tempfree(t);
 306				}
 307			}
 308		} else if (t != y) {	/* kludge to prevent freeing twice */
 309			t->csub = CTEMP;
 310			tempfree(t);
 311		} else if (t == y && t->csub == CCOPY) {
 312			t->csub = CTEMP;
 313			tempfree(t);
 314			freed = 1;
 315		}
 316	}
 317	tempfree(fcn);
 318	if (isexit(y) || isnext(y))
 319		return y;
 320	if (freed == 0) {
 321		tempfree(y);	/* don't free twice! */
 322	}
 323	z = frp->retval;			/* return value */
 324	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
 325	frp--;
 326	return(z);
 327}
 328
 329Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
 330{
 331	Cell *y;
 332
 333	/* copy is not constant or field */
 334
 335	y = gettemp();
 336	y->tval = x->tval & ~(CON|FLD|REC);
 337	y->csub = CCOPY;	/* prevents freeing until call is over */
 338	y->nval = x->nval;	/* BUG? */
 339	if (isstr(x) /* || x->ctype == OCELL */) {
 340		y->sval = tostring(x->sval);
 341		y->tval &= ~DONTFREE;
 342	} else
 343		y->tval |= DONTFREE;
 344	y->fval = x->fval;
 345	return y;
 346}
 347
 348Cell *arg(Node **a, int n)	/* nth argument of a function */
 349{
 350
 351	n = ptoi(a[0]);	/* argument number, counting from 0 */
 352	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
 353	if (n+1 > frp->nargs)
 354		FATAL("argument #%d of function %s was not supplied",
 355			n+1, frp->fcncell->nval);
 356	return frp->args[n];
 357}
 358
 359Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
 360{
 361	Cell *y;
 362
 363	switch (n) {
 364	case EXIT:
 365		if (a[0] != NULL) {
 366			y = execute(a[0]);
 367			errorflag = (int) getfval(y);
 368			tempfree(y);
 369		}
 370		longjmp(env, 1);
 371	case RETURN:
 372		if (a[0] != NULL) {
 373			y = execute(a[0]);
 374			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
 375				setsval(frp->retval, getsval(y));
 376				frp->retval->fval = getfval(y);
 377				frp->retval->tval |= NUM;
 378			}
 379			else if (y->tval & STR)
 380				setsval(frp->retval, getsval(y));
 381			else if (y->tval & NUM)
 382				setfval(frp->retval, getfval(y));
 383			else		/* can't happen */
 384				FATAL("bad type variable %d", y->tval);
 385			tempfree(y);
 386		}
 387		return(jret);
 388	case NEXT:
 389		return(jnext);
 390	case NEXTFILE:
 391		nextfile();
 392		return(jnextfile);
 393	case BREAK:
 394		return(jbreak);
 395	case CONTINUE:
 396		return(jcont);
 397	default:	/* can't happen */
 398		FATAL("illegal jump type %d", n);
 399	}
 400	return 0;	/* not reached */
 401}
 402
 403Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
 404{		/* a[0] is variable, a[1] is operator, a[2] is filename */
 405	Cell *r, *x;
 406	extern Cell **fldtab;
 407	FILE *fp;
 408	char *buf;
 409	int bufsize = recsize;
 410	int mode;
 411	bool newflag;
 412	double result;
 413
 414	if ((buf = (char *) malloc(bufsize)) == NULL)
 415		FATAL("out of memory in getline");
 416
 417	fflush(stdout);	/* in case someone is waiting for a prompt */
 418	r = gettemp();
 419	if (a[1] != NULL) {		/* getline < file */
 420		x = execute(a[2]);		/* filename */
 421		mode = ptoi(a[1]);
 422		if (mode == '|')		/* input pipe */
 423			mode = LE;	/* arbitrary flag */
 424		fp = openfile(mode, getsval(x), &newflag);
 425		tempfree(x);
 426		if (fp == NULL)
 427			n = -1;
 428		else
 429			n = readrec(&buf, &bufsize, fp, newflag);
 430		if (n <= 0) {
 431			;
 432		} else if (a[0] != NULL) {	/* getline var <file */
 433			x = execute(a[0]);
 434			setsval(x, buf);
 435			if (is_number(x->sval, & result)) {
 436				x->fval = result;
 437				x->tval |= NUM;
 438			}
 439			tempfree(x);
 440		} else {			/* getline <file */
 441			setsval(fldtab[0], buf);
 442			if (is_number(fldtab[0]->sval, & result)) {
 443				fldtab[0]->fval = result;
 444				fldtab[0]->tval |= NUM;
 445			}
 446		}
 447	} else {			/* bare getline; use current input */
 448		if (a[0] == NULL)	/* getline */
 449			n = getrec(&record, &recsize, true);
 450		else {			/* getline var */
 451			n = getrec(&buf, &bufsize, false);
 452			if (n > 0) {
 453				x = execute(a[0]);
 454				setsval(x, buf);
 455				if (is_number(x->sval, & result)) {
 456					x->fval = result;
 457					x->tval |= NUM;
 458				}
 459				tempfree(x);
 460			}
 461		}
 462	}
 463	setfval(r, (Awkfloat) n);
 464	free(buf);
 465	return r;
 466}
 467
 468Cell *getnf(Node **a, int n)	/* get NF */
 469{
 470	if (!donefld)
 471		fldbld();
 472	return (Cell *) a[0];
 473}
 474
 475static char *
 476makearraystring(Node *p, const char *func)
 477{
 478	char *buf;
 479	int bufsz = recsize;
 480	size_t blen;
 481
 482	if ((buf = (char *) malloc(bufsz)) == NULL) {
 483		FATAL("%s: out of memory", func);
 484	}
 485
 486	blen = 0;
 487	buf[blen] = '\0';
 488
 489	for (; p; p = p->nnext) {
 490		Cell *x = execute(p);	/* expr */
 491		char *s = getsval(x);
 492		size_t seplen = strlen(getsval(subseploc));
 493		size_t nsub = p->nnext ? seplen : 0;
 494		size_t slen = strlen(s);
 495		size_t tlen = blen + slen + nsub;
 496
 497		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
 498			FATAL("%s: out of memory %s[%s...]",
 499			    func, x->nval, buf);
 500		}
 501		memcpy(buf + blen, s, slen);
 502		if (nsub) {
 503			memcpy(buf + blen + slen, *SUBSEP, nsub);
 504		}
 505		buf[tlen] = '\0';
 506		blen = tlen;
 507		tempfree(x);
 508	}
 509	return buf;
 510}
 511
 512Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
 513{
 514	Cell *x, *z;
 515	char *buf;
 516
 517	x = execute(a[0]);	/* Cell* for symbol table */
 518	buf = makearraystring(a[1], __func__);
 519	if (!isarr(x)) {
 520		DPRINTF("making %s into an array\n", NN(x->nval));
 521		if (freeable(x))
 522			xfree(x->sval);
 523		x->tval &= ~(STR|NUM|DONTFREE);
 524		x->tval |= ARR;
 525		x->sval = (char *) makesymtab(NSYMTAB);
 526	}
 527	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
 528	z->ctype = OCELL;
 529	z->csub = CVAR;
 530	tempfree(x);
 531	free(buf);
 532	return(z);
 533}
 534
 535Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
 536{
 537	Cell *x;
 538
 539	x = execute(a[0]);	/* Cell* for symbol table */
 540	if (x == symtabloc) {
 541		FATAL("cannot delete SYMTAB or its elements");
 542	}
 543	if (!isarr(x))
 544		return True;
 545	if (a[1] == NULL) {	/* delete the elements, not the table */
 546		freesymtab(x);
 547		x->tval &= ~STR;
 548		x->tval |= ARR;
 549		x->sval = (char *) makesymtab(NSYMTAB);
 550	} else {
 551		char *buf = makearraystring(a[1], __func__);
 552		freeelem(x, buf);
 553		free(buf);
 554	}
 555	tempfree(x);
 556	return True;
 557}
 558
 559Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
 560{
 561	Cell *ap, *k;
 562	char *buf;
 563
 564	ap = execute(a[1]);	/* array name */
 565	if (!isarr(ap)) {
 566		DPRINTF("making %s into an array\n", ap->nval);
 567		if (freeable(ap))
 568			xfree(ap->sval);
 569		ap->tval &= ~(STR|NUM|DONTFREE);
 570		ap->tval |= ARR;
 571		ap->sval = (char *) makesymtab(NSYMTAB);
 572	}
 573	buf = makearraystring(a[0], __func__);
 574	k = lookup(buf, (Array *) ap->sval);
 575	tempfree(ap);
 576	free(buf);
 577	if (k == NULL)
 578		return(False);
 579	else
 580		return(True);
 581}
 582
 583
 584/* ======== utf-8 code ========== */
 585
 586/*
 587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
 588 * or utf-8.  u8_isutf tests whether a string starts with a valid
 589 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
 590 * u8_nextlen returns length of next valid sequence, which is
 591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
 592 * u8_strlen returns length of string in valid utf-8 sequences
 593 * and/or high-bit bytes.  Conversion functions go between byte
 594 * number and character number.
 595 *
 596 * In theory, this behaves the same as before for non-utf8 bytes.
 597 *
 598 * Limited checking! This is a potential security hole.
 599 */
 600
 601/* is s the beginning of a valid utf-8 string? */
 602/* return length 1..4 if yes, 0 if no */
 603int u8_isutf(const char *s)
 604{
 605	int n, ret;
 606	unsigned char c;
 607
 608	c = s[0];
 609	if (c < 128 || awk_mb_cur_max == 1)
 610		return 1; /* what if it's 0? */
 611
 612	n = strlen(s);
 613	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
 614		ret = 2; /* 110xxxxx 10xxxxxx */
 615	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
 616			 && (s[2] & 0xC0) == 0x80) {
 617		ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
 618	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
 619			 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
 620		ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
 621	} else {
 622		ret = 0;
 623	}
 624	return ret;
 625}
 626
 627/* Convert (prefix of) utf8 string to utf-32 rune. */
 628/* Sets *rune to the value, returns the length. */
 629/* No error checking: watch out. */
 630int u8_rune(int *rune, const char *s)
 631{
 632	int n, ret;
 633	unsigned char c;
 634
 635	c = s[0];
 636	if (c < 128 || awk_mb_cur_max == 1) {
 637		*rune = c;
 638		return 1;
 639	}
 640
 641	n = strlen(s);
 642	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
 643		*rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
 644		ret = 2;
 645	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
 646			  && (s[2] & 0xC0) == 0x80) {
 647		*rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
 648			/* 1110xxxx 10xxxxxx 10xxxxxx */
 649		ret = 3;
 650	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
 651			  && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
 652		*rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
 653			/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
 654		ret = 4;
 655	} else {
 656		*rune = c;
 657		ret = 1;
 658	}
 659	return ret; /* returns one byte if sequence doesn't look like utf */
 660}
 661
 662/* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
 663int u8_nextlen(const char *s)
 664{
 665	int len;
 666
 667	len = u8_isutf(s);
 668	if (len == 0)
 669		len = 1;
 670	return len;
 671}
 672
 673/* return number of utf characters or single non-utf bytes */
 674int u8_strlen(const char *s)
 675{
 676	int i, len, n, totlen;
 677	unsigned char c;
 678
 679	n = strlen(s);
 680	totlen = 0;
 681	for (i = 0; i < n; i += len) {
 682		c = s[i];
 683		if (c < 128 || awk_mb_cur_max == 1) {
 684			len = 1;
 685		} else {
 686			len = u8_nextlen(&s[i]);
 687		}
 688		totlen++;
 689		if (i > n)
 690			FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
 691	}
 692	return totlen;
 693}
 694
 695/* convert utf-8 char number in a string to its byte offset */
 696int u8_char2byte(const char *s, int charnum)
 697{
 698	int n;
 699	int bytenum = 0;
 700
 701	while (charnum > 0) {
 702		n = u8_nextlen(s);
 703		s += n;
 704		bytenum += n;
 705		charnum--;
 706	}
 707	return bytenum;
 708}
 709
 710/* convert byte offset in s to utf-8 char number that starts there */
 711int u8_byte2char(const char *s, int bytenum)
 712{
 713	int i, len, b;
 714	int charnum = 0; /* BUG: what origin? */
 715	/* should be 0 to match start==0 which means no match */	
 716
 717	b = strlen(s);
 718	if (bytenum > b) {
 719		return -1; /* ??? */
 720	}
 721	for (i = 0; i <= bytenum; i += len) {
 722		len = u8_nextlen(s+i);
 723		charnum++;
 724	}
 725	return charnum;
 726}
 727
 728/* runetochar() adapted from rune.c in the Plan 9 distribution */
 729
 730enum
 731{
 732	Runeerror = 128, /* from somewhere else */
 733	Runemax = 0x10FFFF,
 734
 735	Bit1    = 7,
 736	Bitx    = 6,
 737	Bit2    = 5,
 738	Bit3    = 4,
 739	Bit4    = 3,
 740	Bit5    = 2,
 741
 742	T1      = ((1<<(Bit1+1))-1) ^ 0xFF,     /* 0000 0000 */
 743	Tx      = ((1<<(Bitx+1))-1) ^ 0xFF,     /* 1000 0000 */
 744	T2      = ((1<<(Bit2+1))-1) ^ 0xFF,     /* 1100 0000 */
 745	T3      = ((1<<(Bit3+1))-1) ^ 0xFF,     /* 1110 0000 */
 746	T4      = ((1<<(Bit4+1))-1) ^ 0xFF,     /* 1111 0000 */
 747	T5      = ((1<<(Bit5+1))-1) ^ 0xFF,     /* 1111 1000 */
 748
 749	Rune1   = (1<<(Bit1+0*Bitx))-1,	 	/* 0000 0000 0000 0000 0111 1111 */
 750	Rune2   = (1<<(Bit2+1*Bitx))-1,	 	/* 0000 0000 0000 0111 1111 1111 */
 751	Rune3   = (1<<(Bit3+2*Bitx))-1,	 	/* 0000 0000 1111 1111 1111 1111 */
 752	Rune4   = (1<<(Bit4+3*Bitx))-1,	 	/* 0011 1111 1111 1111 1111 1111 */
 753
 754	Maskx   = (1<<Bitx)-1,		  	/* 0011 1111 */
 755	Testx   = Maskx ^ 0xFF,		 	/* 1100 0000 */
 756
 757};
 758
 759int runetochar(char *str, int c)
 760{	
 761	/* one character sequence 00000-0007F => 00-7F */     
 762	if (c <= Rune1) {
 763		str[0] = c;
 764		return 1;
 765	}
 766	
 767	/* two character sequence 00080-007FF => T2 Tx */
 768	if (c <= Rune2) {
 769		str[0] = T2 | (c >> 1*Bitx);
 770		str[1] = Tx | (c & Maskx);
 771		return 2;
 772	}
 773
 774	/* three character sequence 00800-0FFFF => T3 Tx Tx */
 775	if (c > Runemax)
 776		c = Runeerror;
 777	if (c <= Rune3) {
 778		str[0] = T3 |  (c >> 2*Bitx);
 779		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
 780		str[2] = Tx |  (c & Maskx);
 781		return 3;
 782	}
 783	
 784	/* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
 785	str[0] = T4 |  (c >> 3*Bitx);
 786	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
 787	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
 788	str[3] = Tx |  (c & Maskx);
 789	return 4;
 790}               
 791
 792
 793/* ========== end of utf8 code =========== */
 794
 795
 796
 797Cell *matchop(Node **a, int n)	/* ~ and match() */
 798{
 799	Cell *x, *y, *z;
 800	char *s, *t;
 801	int i;
 802	int cstart, cpatlen, len;
 803	fa *pfa;
 804	int (*mf)(fa *, const char *) = match, mode = 0;
 805
 806	if (n == MATCHFCN) {
 807		mf = pmatch;
 808		mode = 1;
 809	}
 810	x = execute(a[1]);	/* a[1] = target text */
 811	s = getsval(x);
 812	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
 813		i = (*mf)((fa *) a[2], s);
 814	else {
 815		y = execute(a[2]);	/* a[2] = regular expr */
 816		t = getsval(y);
 817		pfa = makedfa(t, mode);
 818		i = (*mf)(pfa, s);
 819		tempfree(y);
 820	}
 821	z = x;
 822	if (n == MATCHFCN) {
 823		int start = patbeg - s + 1; /* origin 1 */
 824		if (patlen < 0) {
 825			start = 0; /* not found */
 826		} else {
 827			cstart = u8_byte2char(s, start-1);
 828			cpatlen = 0;
 829			for (i = 0; i < patlen; i += len) {
 830				len = u8_nextlen(patbeg+i);
 831				cpatlen++;
 832			}
 833
 834			start = cstart;
 835			patlen = cpatlen;
 836		}
 837
 838		setfval(rstartloc, (Awkfloat) start);
 839		setfval(rlengthloc, (Awkfloat) patlen);
 840		x = gettemp();
 841		x->tval = NUM;
 842		x->fval = start;
 843	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
 844		x = True;
 845	else
 846		x = False;
 847
 848	tempfree(z);
 849	return x;
 850}
 851
 852
 853Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
 854{
 855	Cell *x, *y;
 856	int i;
 857
 858	x = execute(a[0]);
 859	i = istrue(x);
 860	tempfree(x);
 861	switch (n) {
 862	case BOR:
 863		if (i) return(True);
 864		y = execute(a[1]);
 865		i = istrue(y);
 866		tempfree(y);
 867		if (i) return(True);
 868		else return(False);
 869	case AND:
 870		if ( !i ) return(False);
 871		y = execute(a[1]);
 872		i = istrue(y);
 873		tempfree(y);
 874		if (i) return(True);
 875		else return(False);
 876	case NOT:
 877		if (i) return(False);
 878		else return(True);
 879	default:	/* can't happen */
 880		FATAL("unknown boolean operator %d", n);
 881	}
 882	return 0;	/*NOTREACHED*/
 883}
 884
 885Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
 886{
 887	int i;
 888	Cell *x, *y;
 889	Awkfloat j;
 890	bool x_is_nan, y_is_nan;
 891
 892	x = execute(a[0]);
 893	y = execute(a[1]);
 894	x_is_nan = isnan(x->fval);
 895	y_is_nan = isnan(y->fval);
 896	if (x->tval&NUM && y->tval&NUM) {
 897		if ((x_is_nan || y_is_nan) && n != NE)
 898			return(False);
 899		j = x->fval - y->fval;
 900		i = j<0? -1: (j>0? 1: 0);
 901	} else {
 902		i = strcmp(getsval(x), getsval(y));
 903	}
 904	tempfree(x);
 905	tempfree(y);
 906	switch (n) {
 907	case LT:	if (i<0) return(True);
 908			else return(False);
 909	case LE:	if (i<=0) return(True);
 910			else return(False);
 911	case NE:	if (x_is_nan && y_is_nan) return(True);
 912			else if (i!=0) return(True);
 913			else return(False);
 914	case EQ:	if (i == 0) return(True);
 915			else return(False);
 916	case GE:	if (i>=0) return(True);
 917			else return(False);
 918	case GT:	if (i>0) return(True);
 919			else return(False);
 920	default:	/* can't happen */
 921		FATAL("unknown relational operator %d", n);
 922	}
 923	return 0;	/*NOTREACHED*/
 924}
 925
 926void tfree(Cell *a)	/* free a tempcell */
 927{
 928	if (freeable(a)) {
 929		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
 930		xfree(a->sval);
 931	}
 932	if (a == tmps)
 933		FATAL("tempcell list is curdled");
 934	a->cnext = tmps;
 935	tmps = a;
 936}
 937
 938Cell *gettemp(void)	/* get a tempcell */
 939{	int i;
 940	Cell *x;
 941
 942	if (!tmps) {
 943		tmps = (Cell *) calloc(100, sizeof(*tmps));
 944		if (!tmps)
 945			FATAL("out of space for temporaries");
 946		for (i = 1; i < 100; i++)
 947			tmps[i-1].cnext = &tmps[i];
 948		tmps[i-1].cnext = NULL;
 949	}
 950	x = tmps;
 951	tmps = x->cnext;
 952	*x = tempcell;
 953	return(x);
 954}
 955
 956Cell *indirect(Node **a, int n)	/* $( a[0] ) */
 957{
 958	Awkfloat val;
 959	Cell *x;
 960	int m;
 961
 962	x = execute(a[0]);
 963	val = getfval(x);	/* freebsd: defend against super large field numbers */
 964	if ((Awkfloat)INT_MAX < val)
 965		FATAL("trying to access out of range field %s", x->nval);
 966	m = (int) val;
 967	tempfree(x);
 968	x = fieldadr(m);
 969	x->ctype = OCELL;	/* BUG?  why are these needed? */
 970	x->csub = CFLD;
 971	return(x);
 972}
 973
 974Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
 975{
 976	int k, m, n;
 977	int mb, nb;
 978	char *s;
 979	int temp;
 980	Cell *x, *y, *z = NULL;
 981
 982	x = execute(a[0]);
 983	y = execute(a[1]);
 984	if (a[2] != NULL)
 985		z = execute(a[2]);
 986	s = getsval(x);
 987	k = u8_strlen(s) + 1;
 988	if (k <= 1) {
 989		tempfree(x);
 990		tempfree(y);
 991		if (a[2] != NULL) {
 992			tempfree(z);
 993		}
 994		x = gettemp();
 995		setsval(x, "");
 996		return(x);
 997	}
 998	m = (int) getfval(y);
 999	if (m <= 0)
1000		m = 1;
1001	else if (m > k)
1002		m = k;
1003	tempfree(y);
1004	if (a[2] != NULL) {
1005		n = (int) getfval(z);
1006		tempfree(z);
1007	} else
1008		n = k - 1;
1009	if (n < 0)
1010		n = 0;
1011	else if (n > k - m)
1012		n = k - m;
1013	/* m is start, n is length from there */
1014	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1015	y = gettemp();
1016	mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1017	nb = u8_char2byte(s, m-1+n);  /* byte offset of end+1 char in s */
1018
1019	temp = s[nb];	/* with thanks to John Linderman */
1020	s[nb] = '\0';
1021	setsval(y, s + mb);
1022	s[nb] = temp;
1023	tempfree(x);
1024	return(y);
1025}
1026
1027Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
1028{
1029	Cell *x, *y, *z;
1030	char *s1, *s2, *p1, *p2, *q;
1031	Awkfloat v = 0.0;
1032
1033	x = execute(a[0]);
1034	s1 = getsval(x);
1035	y = execute(a[1]);
1036	s2 = getsval(y);
1037
1038	z = gettemp();
1039	for (p1 = s1; *p1 != '\0'; p1++) {
1040		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1041			continue;
1042		if (*p2 == '\0') {
1043			/* v = (Awkfloat) (p1 - s1 + 1);	 origin 1 */
1044
1045		   /* should be a function: used in match() as well */
1046			int i, len;
1047			v = 0;
1048			for (i = 0; i < p1-s1+1; i += len) {
1049				len = u8_nextlen(s1+i);
1050				v++;
1051			}
1052			break;
1053		}
1054	}
1055	tempfree(x);
1056	tempfree(y);
1057	setfval(z, v);
1058	return(z);
1059}
1060
1061int has_utf8(char *s)	/* return 1 if s contains any utf-8 (2 bytes or more) character */
1062{
1063	int n;
1064
1065	for (n = 0; *s != 0; s += n) {
1066		n = u8_nextlen(s);
1067		if (n > 1)
1068			return 1;
1069	}
1070	return 0;
1071}
1072
1073#define	MAXNUMSIZE	50
1074
1075int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
1076{
1077	char *fmt;
1078	char *p, *t;
1079	const char *os;
1080	Cell *x;
1081	int flag = 0, n;
1082	int fmtwd; /* format width */
1083	int fmtsz = recsize;
1084	char *buf = *pbuf;
1085	int bufsize = *pbufsize;
1086#define FMTSZ(a)   (fmtsz - ((a) - fmt))
1087#define BUFSZ(a)   (bufsize - ((a) - buf))
1088
1089	static bool first = true;
1090	static bool have_a_format = false;
1091
1092	if (first) {
1093		char xbuf[100];
1094
1095		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1096		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1097		first = false;
1098	}
1099
1100	os = s;
1101	p = buf;
1102	if ((fmt = (char *) malloc(fmtsz)) == NULL)
1103		FATAL("out of memory in format()");
1104	while (*s) {
1105		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1106		if (*s != '%') {
1107			*p++ = *s++;
1108			continue;
1109		}
1110		if (*(s+1) == '%') {
1111			*p++ = '%';
1112			s += 2;
1113			continue;
1114		}
1115		fmtwd = atoi(s+1);
1116		if (fmtwd < 0)
1117			fmtwd = -fmtwd;
1118		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1119		for (t = fmt; (*t++ = *s) != '\0'; s++) {
1120			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1121				FATAL("format item %.30s... ran format() out of memory", os);
1122			/* Ignore size specifiers */
1123			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
1124				t--;
1125				continue;
1126			}
1127			if (isalpha((uschar)*s))
1128				break;
1129			if (*s == '$') {
1130				FATAL("'$' not permitted in awk formats");
1131			}
1132			if (*s == '*') {
1133				if (a == NULL) {
1134					FATAL("not enough args in printf(%s)", os);
1135				}
1136				x = execute(a);
1137				a = a->nnext;
1138				snprintf(t - 1, FMTSZ(t - 1),
1139				    "%d", fmtwd=(int) getfval(x));
1140				if (fmtwd < 0)
1141					fmtwd = -fmtwd;
1142				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1143				t = fmt + strlen(fmt);
1144				tempfree(x);
1145			}
1146		}
1147		*t = '\0';
1148		if (fmtwd < 0)
1149			fmtwd = -fmtwd;
1150		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1151		switch (*s) {
1152		case 'a': case 'A':
1153			if (have_a_format)
1154				flag = *s;
1155			else
1156				flag = 'f';
1157			break;
1158		case 'f': case 'e': case 'g': case 'E': case 'G':
1159			flag = 'f';
1160			break;
1161		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1162			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1163			*(t-1) = 'j';
1164			*t = *s;
1165			*++t = '\0';
1166			break;
1167		case 's':
1168			flag = 's';
1169			break;
1170		case 'c':
1171			flag = 'c';
1172			break;
1173		default:
1174			WARNING("weird printf conversion %s", fmt);
1175			flag = '?';
1176			break;
1177		}
1178		if (a == NULL)
1179			FATAL("not enough args in printf(%s)", os);
1180		x = execute(a);
1181		a = a->nnext;
1182		n = MAXNUMSIZE;
1183		if (fmtwd > n)
1184			n = fmtwd;
1185		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1186		switch (flag) {
1187		case '?':
1188			snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
1189			t = getsval(x);
1190			n = strlen(t);
1191			if (fmtwd > n)
1192				n = fmtwd;
1193			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1194			p += strlen(p);
1195			snprintf(p, BUFSZ(p), "%s", t);
1196			break;
1197		case 'a':
1198		case 'A':
1199		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1200		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1201		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1202
1203		case 's': {
1204			t = getsval(x);
1205			n = strlen(t);
1206			/* if simple format or no utf-8 in the string, sprintf works */
1207			if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1208				if (fmtwd > n)
1209					n = fmtwd;
1210				if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1211					FATAL("huge string/format (%d chars) in printf %.30s..." \
1212						" ran format() out of memory", n, t);
1213				snprintf(p, BUFSZ(p), fmt, t);
1214				break;
1215			}
1216
1217			/* get here if string has utf-8 chars and fmt is not plain %s */
1218			/* "%-w.ps", where -, w and .p are all optional */
1219			/* '0' before the w is a flag character */
1220			/* fmt points at % */
1221			int ljust = 0, wid = 0, prec = n, pad = 0;
1222			char *f = fmt+1;
1223			if (f[0] == '-') {
1224				ljust = 1;
1225				f++;
1226			}
1227			// flags '0' and '+' are recognized but skipped
1228			if (f[0] == '0') {
1229				f++;
1230				if (f[0] == '+')
1231					f++;
1232			}
1233			if (f[0] == '+') {
1234				f++;
1235				if (f[0] == '0')
1236					f++;
1237			}
1238			if (isdigit(f[0])) { /* there is a wid */
1239				wid = strtol(f, &f, 10);
1240			}
1241			if (f[0] == '.') { /* there is a .prec */
1242				prec = strtol(++f, &f, 10);
1243			}
1244			if (prec > u8_strlen(t))
1245				prec = u8_strlen(t);
1246			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1247			int i, k, n;
1248			
1249			if (ljust) { // print prec chars from t, then pad blanks
1250				n = u8_char2byte(t, prec);
1251				for (k = 0; k < n; k++) {
1252					//putchar(t[k]);
1253					*p++ = t[k];
1254				}
1255				for (i = 0; i < pad; i++) {
1256					//printf(" ");
1257					*p++ = ' ';
1258				}
1259			} else { // print pad blanks, then prec chars from t
1260				for (i = 0; i < pad; i++) {
1261					//printf(" ");
1262					*p++ = ' ';
1263				}
1264				n = u8_char2byte(t, prec);
1265				for (k = 0; k < n; k++) {
1266					//putchar(t[k]);
1267					*p++ = t[k];
1268				}
1269			}
1270			*p = 0;
1271			break;
1272		}
1273
1274               case 'c': {
1275			/*
1276			 * If a numeric value is given, awk should just turn
1277			 * it into a character and print it:
1278			 *      BEGIN { printf("%c\n", 65) }
1279			 * prints "A".
1280			 *
1281			 * But what if the numeric value is > 128 and
1282			 * represents a valid Unicode code point?!? We do
1283			 * our best to convert it back into UTF-8. If we
1284			 * can't, we output the encoding of the Unicode
1285			 * "invalid character", 0xFFFD.
1286			 */
1287			if (isnum(x)) {
1288				int charval = (int) getfval(x);
1289
1290				if (charval != 0) {
1291					if (charval < 128 || awk_mb_cur_max == 1)
1292						snprintf(p, BUFSZ(p), fmt, charval);
1293					else {
1294						// possible unicode character
1295						size_t count;
1296						char *bs = wide_char_to_byte_str(charval, &count);
1297
1298						if (bs == NULL)	{ // invalid character
1299							// use unicode invalid character, 0xFFFD
1300							static char invalid_char[] = "\357\277\275";
1301							bs = invalid_char;
1302							count = 3;
1303						}
1304						t = bs;
1305						n = count;
1306						goto format_percent_c;
1307					}
1308				} else {
1309					*p++ = '\0'; /* explicit null byte */
1310					*p = '\0';   /* next output will start here */
1311				}
1312				break;
1313			}
1314			t = getsval(x);
1315			n = u8_nextlen(t);
1316		format_percent_c:
1317			if (n < 2) { /* not utf8 */
1318				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1319				break;
1320			}
1321
1322			// utf8 character, almost same song and dance as for %s
1323			int ljust = 0, wid = 0, prec = n, pad = 0;
1324			char *f = fmt+1;
1325			if (f[0] == '-') {
1326				ljust = 1;
1327				f++;
1328			}
1329			// flags '0' and '+' are recognized but skipped
1330			if (f[0] == '0') {
1331				f++;
1332				if (f[0] == '+')
1333					f++;
1334			}
1335			if (f[0] == '+') {
1336				f++;
1337				if (f[0] == '0')
1338					f++;
1339			}
1340			if (isdigit(f[0])) { /* there is a wid */
1341				wid = strtol(f, &f, 10);
1342			}
1343			if (f[0] == '.') { /* there is a .prec */
1344				prec = strtol(++f, &f, 10);
1345			}
1346			if (prec > 1)           // %c --> only one character
1347				prec = 1;
1348			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1349			int i;
1350
1351			if (ljust) { // print one char from t, then pad blanks
1352				for (i = 0; i < n; i++)
1353					*p++ = t[i];
1354				for (i = 0; i < pad; i++) {
1355					//printf(" ");
1356					*p++ = ' ';
1357				}
1358			} else { // print pad blanks, then prec chars from t
1359				for (i = 0; i < pad; i++) {
1360					//printf(" ");
1361					*p++ = ' ';
1362				}
1363				for (i = 0; i < n; i++)
1364					*p++ = t[i];
1365			}
1366			*p = 0;
1367			break;
1368		}
1369		default:
1370			FATAL("can't happen: bad conversion %c in format()", flag);
1371		}
1372
1373		tempfree(x);
1374		p += strlen(p);
1375		s++;
1376	}
1377	*p = '\0';
1378	free(fmt);
1379	for ( ; a; a = a->nnext) {		/* evaluate any remaining args */
1380		x = execute(a);
1381		tempfree(x);
1382	}
1383	*pbuf = buf;
1384	*pbufsize = bufsize;
1385	return p - buf;
1386}
1387
1388Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
1389{
1390	Cell *x;
1391	Node *y;
1392	char *buf;
1393	int bufsz=3*recsize;
1394
1395	if ((buf = (char *) malloc(bufsz)) == NULL)
1396		FATAL("out of memory in awksprintf");
1397	y = a[0]->nnext;
1398	x = execute(a[0]);
1399	if (format(&buf, &bufsz, getsval(x), y) == -1)
1400		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
1401	tempfree(x);
1402	x = gettemp();
1403	x->sval = buf;
1404	x->tval = STR;
1405	return(x);
1406}
1407
1408Cell *awkprintf(Node **a, int n)		/* printf */
1409{	/* a[0] is list of args, starting with format string */
1410	/* a[1] is redirection operator, a[2] is redirection file */
1411	FILE *fp;
1412	Cell *x;
1413	Node *y;
1414	char *buf;
1415	int len;
1416	int bufsz=3*recsize;
1417
1418	if ((buf = (char *) malloc(bufsz)) == NULL)
1419		FATAL("out of memory in awkprintf");
1420	y = a[0]->nnext;
1421	x = execute(a[0]);
1422	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1423		FATAL("printf string %.30s... too long.  can't happen.", buf);
1424	tempfree(x);
1425	if (a[1] == NULL) {
1426		/* fputs(buf, stdout); */
1427		fwrite(buf, len, 1, stdout);
1428		if (ferror(stdout))
1429			FATAL("write error on stdout");
1430	} else {
1431		fp = redirect(ptoi(a[1]), a[2]);
1432		/* fputs(buf, fp); */
1433		fwrite(buf, len, 1, fp);
1434		fflush(fp);
1435		if (ferror(fp))
1436			FATAL("write error on %s", filename(fp));
1437	}
1438	free(buf);
1439	return(True);
1440}
1441
1442Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1443{
1444	Awkfloat i, j = 0;
1445	double v;
1446	Cell *x, *y, *z;
1447
1448	x = execute(a[0]);
1449	i = getfval(x);
1450	tempfree(x);
1451	if (n != UMINUS && n != UPLUS) {
1452		y = execute(a[1]);
1453		j = getfval(y);
1454		tempfree(y);
1455	}
1456	z = gettemp();
1457	switch (n) {
1458	case ADD:
1459		i += j;
1460		break;
1461	case MINUS:
1462		i -= j;
1463		break;
1464	case MULT:
1465		i *= j;
1466		break;
1467	case DIVIDE:
1468		if (j == 0)
1469			FATAL("division by zero");
1470		i /= j;
1471		break;
1472	case MOD:
1473		if (j == 0)
1474			FATAL("division by zero in mod");
1475		modf(i/j, &v);
1476		i = i - j * v;
1477		break;
1478	case UMINUS:
1479		i = -i;
1480		break;
1481	case UPLUS: /* handled by getfval(), above */
1482		break;
1483	case POWER:
1484		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1485			i = ipow(i, (int) j);
1486               else
1487			i = pow_errcheck(i, j);
1488		break;
1489	default:	/* can't happen */
1490		FATAL("illegal arithmetic operator %d", n);
1491	}
1492	setfval(z, i);
1493	return(z);
1494}
1495
1496double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1497{
1498	double v;
1499
1500	if (n <= 0)
1501		return 1;
1502	v = ipow(x, n/2);
1503	if (n % 2 == 0)
1504		return v * v;
1505	else
1506		return x * v * v;
1507}
1508
1509Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1510{
1511	Cell *x, *z;
1512	int k;
1513	Awkfloat xf;
1514
1515	x = execute(a[0]);
1516	xf = getfval(x);
1517	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1518	if (n == PREINCR || n == PREDECR) {
1519		setfval(x, xf + k);
1520		return(x);
1521	}
1522	z = gettemp();
1523	setfval(z, xf);
1524	setfval(x, xf + k);
1525	tempfree(x);
1526	return(z);
1527}
1528
1529Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1530{		/* this is subtle; don't muck with it. */
1531	Cell *x, *y;
1532	Awkfloat xf, yf;
1533	double v;
1534
1535	y = execute(a[1]);
1536	x = execute(a[0]);
1537	if (n == ASSIGN) {	/* ordinary assignment */
1538		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1539			;	/* self-assignment: leave alone unless it's a field or NF */
1540		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1541			yf = getfval(y);
1542			setsval(x, getsval(y));
1543			x->fval = yf;
1544			x->tval |= NUM;
1545		}
1546		else if (isstr(y))
1547			setsval(x, getsval(y));
1548		else if (isnum(y))
1549			setfval(x, getfval(y));
1550		else
1551			funnyvar(y, "read value of");
1552		tempfree(y);
1553		return(x);
1554	}
1555	xf = getfval(x);
1556	yf = getfval(y);
1557	switch (n) {
1558	case ADDEQ:
1559		xf += yf;
1560		break;
1561	case SUBEQ:
1562		xf -= yf;
1563		break;
1564	case MULTEQ:
1565		xf *= yf;
1566		break;
1567	case DIVEQ:
1568		if ((x->tval & CON) != 0)
1569			FATAL("non-constant required for left side of /=");
1570		if (yf == 0)
1571			FATAL("division by zero in /=");
1572		xf /= yf;
1573		break;
1574	case MODEQ:
1575		if (yf == 0)
1576			FATAL("division by zero in %%=");
1577		modf(xf/yf, &v);
1578		xf = xf - yf * v;
1579		break;
1580	case POWEQ:
1581		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1582			xf = ipow(xf, (int) yf);
1583               else
1584			xf = pow_errcheck(xf, yf);
1585		break;
1586	default:
1587		FATAL("illegal assignment operator %d", n);
1588		break;
1589	}
1590	tempfree(y);
1591	setfval(x, xf);
1592	return(x);
1593}
1594
1595Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1596{
1597	Cell *x, *y, *z;
1598	int n1, n2;
1599	char *s = NULL;
1600	int ssz = 0;
1601
1602	x = execute(a[0]);
1603	n1 = strlen(getsval(x));
1604	adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1605	memcpy(s, x->sval, n1);
1606
1607	tempfree(x);
1608
1609	y = execute(a[1]);
1610	n2 = strlen(getsval(y));
1611	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1612	memcpy(s + n1, y->sval, n2);
1613	s[n1 + n2] = '\0';
1614
1615	tempfree(y);
1616
1617	z = gettemp();
1618	z->sval = s;
1619	z->tval = STR;
1620
1621	return(z);
1622}
1623
1624Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1625{
1626	Cell *x;
1627
1628	if (a[0] == NULL)
1629		x = execute(a[1]);
1630	else {
1631		x = execute(a[0]);
1632		if (istrue(x)) {
1633			tempfree(x);
1634			x = execute(a[1]);
1635		}
1636	}
1637	return x;
1638}
1639
1640Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1641{
1642	Cell *x;
1643	int pair;
1644
1645	pair = ptoi(a[3]);
1646	if (pairstack[pair] == 0) {
1647		x = execute(a[0]);
1648		if (istrue(x))
1649			pairstack[pair] = 1;
1650		tempfree(x);
1651	}
1652	if (pairstack[pair] == 1) {
1653		x = execute(a[1]);
1654		if (istrue(x))
1655			pairstack[pair] = 0;
1656		tempfree(x);
1657		x = execute(a[2]);
1658		return(x);
1659	}
1660	return(False);
1661}
1662
1663Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1664{
1665	Cell *x = NULL, *y, *ap;
1666	const char *s, *origs, *t;
1667	const char *fs = NULL;
1668	char *origfs = NULL;
1669	int sep;
1670	char temp, num[50];
1671	int n, tempstat, arg3type;
1672	int j;
1673	double result;
1674
1675	y = execute(a[0]);	/* source string */
1676	origs = s = strdup(getsval(y));
1677	tempfree(y);
1678	arg3type = ptoi(a[3]);
1679	if (a[2] == NULL) {		/* BUG: CSV should override implicit fs but not explicit */
1680		fs = getsval(fsloc);
1681	} else if (arg3type == STRING) {	/* split(str,arr,"string") */
1682		x = execute(a[2]);
1683		fs = origfs = strdup(getsval(x));
1684		tempfree(x);
1685	} else if (arg3type == REGEXPR) {
1686		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1687	} else {
1688		FATAL("illegal type of split");
1689	}
1690	sep = *fs;
1691	ap = execute(a[1]);	/* array name */
1692/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1693	freesymtab(ap);
1694	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1695	ap->tval &= ~STR;
1696	ap->tval |= ARR;
1697	ap->sval = (char *) makesymtab(NSYMTAB);
1698
1699	n = 0;
1700        if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1701		/* split(s, a, //); have to arrange that it looks like empty sep */
1702		arg3type = 0;
1703		fs = "";
1704		sep = 0;
1705	}
1706	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1707		fa *pfa;
1708		if (arg3type == REGEXPR) {	/* it's ready already */
1709			pfa = (fa *) a[2];
1710		} else {
1711			pfa = makedfa(fs, 1);
1712		}
1713		if (nematch(pfa,s)) {
1714			tempstat = pfa->initstat;
1715			pfa->initstat = 2;
1716			do {
1717				n++;
1718				snprintf(num, sizeof(num), "%d", n);
1719				temp = *patbeg;
1720				setptr(patbeg, '\0');
1721				if (is_number(s, & result))
1722					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1723				else
1724					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1725				setptr(patbeg, temp);
1726				s = patbeg + patlen;
1727				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1728					n++;
1729					snprintf(num, sizeof(num), "%d", n);
1730					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1731					pfa->initstat = tempstat;
1732					goto spdone;
1733				}
1734			} while (nematch(pfa,s));
1735			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1736							/* cf gsub and refldbld */
1737		}
1738		n++;
1739		snprintf(num, sizeof(num), "%d", n);
1740		if (is_number(s, & result))
1741			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1742		else
1743			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1744  spdone:
1745		pfa = NULL;
1746
1747	} else if (a[2] == NULL && CSV) {	/* CSV only if no explicit separator */
1748		char *newt = (char *) malloc(strlen(s) + 1); /* for building new string; reuse for each field */
1749		if (newt == NULL)
1750			FATAL("out of space in split");
1751		for (;;) {
1752			char *fr = newt;
1753			n++;
1754			if (*s == '"' ) { /* start of "..." */
1755				for (s++ ; *s != '\0'; ) {
1756					if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1757						s += 2; /* doubled quote */
1758						*fr++ = '"';
1759					} else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1760						s++; /* skip over closing quote */
1761						break;
1762					} else {
1763						*fr++ = *s++;
1764					}
1765				}
1766				*fr++ = 0;
1767			} else {	/* unquoted field */
1768				while (*s != ',' && *s != '\0')
1769					*fr++ = *s++;
1770				*fr++ = 0;
1771			}
1772			snprintf(num, sizeof(num), "%d", n);
1773			if (is_number(newt, &result))
1774				setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1775			else
1776				setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1777			if (*s++ == '\0')
1778				break;
1779		}
1780		free(newt);
1781
1782	} else if (!CSV && sep == ' ') { /* usual case: split on white space */
1783		for (n = 0; ; ) {
1784#define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1785			while (ISWS(*s))
1786				s++;
1787			if (*s == '\0')
1788				break;
1789			n++;
1790			t = s;
1791			do
1792				s++;
1793			while (*s != '\0' && !ISWS(*s));
1794			temp = *s;
1795			setptr(s, '\0');
1796			snprintf(num, sizeof(num), "%d", n);
1797			if (is_number(t, & result))
1798				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1799			else
1800				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1801			setptr(s, temp);
1802			if (*s != '\0')
1803				s++;
1804		}
1805
1806	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1807		for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1808			char buf[10];
1809			n++;
1810			snprintf(num, sizeof(num), "%d", n);
1811
1812			for (j = 0; j < u8_nextlen(s); j++) {
1813				buf[j] = s[j];
1814			}
1815			buf[j] = '\0';
1816
1817			if (isdigit((uschar)buf[0]))
1818				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1819			else
1820				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1821		}
1822
1823	} else if (*s != '\0') {  /* some random single character */
1824		for (;;) {
1825			n++;
1826			t = s;
1827			while (*s != sep && *s != '\0')
1828				s++;
1829			temp = *s;
1830			setptr(s, '\0');
1831			snprintf(num, sizeof(num), "%d", n);
1832			if (is_number(t, & result))
1833				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1834			else
1835				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1836			setptr(s, temp);
1837			if (*s++ == '\0')
1838				break;
1839		}
1840	}
1841	tempfree(ap);
1842	xfree(origs);
1843	xfree(origfs);
1844	x = gettemp();
1845	x->tval = NUM;
1846	x->fval = n;
1847	return(x);
1848}
1849
1850Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1851{
1852	Cell *x;
1853
1854	x = execute(a[0]);
1855	if (istrue(x)) {
1856		tempfree(x);
1857		x = execute(a[1]);
1858	} else {
1859		tempfree(x);
1860		x = execute(a[2]);
1861	}
1862	return(x);
1863}
1864
1865Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1866{
1867	Cell *x;
1868
1869	x = execute(a[0]);
1870	if (istrue(x)) {
1871		tempfree(x);
1872		x = execute(a[1]);
1873	} else if (a[2] != NULL) {
1874		tempfree(x);
1875		x = execute(a[2]);
1876	}
1877	return(x);
1878}
1879
1880Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1881{
1882	Cell *x;
1883
1884	for (;;) {
1885		x = execute(a[0]);
1886		if (!istrue(x))
1887			return(x);
1888		tempfree(x);
1889		x = execute(a[1]);
1890		if (isbreak(x)) {
1891			x = True;
1892			return(x);
1893		}
1894		if (isnext(x) || isexit(x) || isret(x))
1895			return(x);
1896		tempfree(x);
1897	}
1898}
1899
1900Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1901{
1902	Cell *x;
1903
1904	for (;;) {
1905		x = execute(a[0]);
1906		if (isbreak(x))
1907			return True;
1908		if (isnext(x) || isexit(x) || isret(x))
1909			return(x);
1910		tempfree(x);
1911		x = execute(a[1]);
1912		if (!istrue(x))
1913			return(x);
1914		tempfree(x);
1915	}
1916}
1917
1918Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1919{
1920	Cell *x;
1921
1922	x = execute(a[0]);
1923	tempfree(x);
1924	for (;;) {
1925		if (a[1]!=NULL) {
1926			x = execute(a[1]);
1927			if (!istrue(x)) return(x);
1928			else tempfree(x);
1929		}
1930		x = execute(a[3]);
1931		if (isbreak(x))		/* turn off break */
1932			return True;
1933		if (isnext(x) || isexit(x) || isret(x))
1934			return(x);
1935		tempfree(x);
1936		x = execute(a[2]);
1937		tempfree(x);
1938	}
1939}
1940
1941Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1942{
1943	Cell *x, *vp, *arrayp, *cp, *ncp;
1944	Array *tp;
1945	int i;
1946
1947	vp = execute(a[0]);
1948	arrayp = execute(a[1]);
1949	if (!isarr(arrayp)) {
1950		return True;
1951	}
1952	tp = (Array *) arrayp->sval;
1953	tempfree(arrayp);
1954	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1955		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1956			setsval(vp, cp->nval);
1957			ncp = cp->cnext;
1958			x = execute(a[2]);
1959			if (isbreak(x)) {
1960				tempfree(vp);
1961				return True;
1962			}
1963			if (isnext(x) || isexit(x) || isret(x)) {
1964				tempfree(vp);
1965				return(x);
1966			}
1967			tempfree(x);
1968		}
1969	}
1970	return True;
1971}
1972
1973static char *nawk_convert(const char *s, int (*fun_c)(int),
1974    wint_t (*fun_wc)(wint_t))
1975{
1976	char *buf      = NULL;
1977	char *pbuf     = NULL;
1978	const char *ps = NULL;
1979	size_t n       = 0;
1980	wchar_t wc;
1981	const size_t sz = awk_mb_cur_max;
1982	int unused;
1983
1984	if (sz == 1) {
1985		buf = tostring(s);
1986
1987		for (pbuf = buf; *pbuf; pbuf++)
1988			*pbuf = fun_c((uschar)*pbuf);
1989
1990		return buf;
1991	} else {
1992		/* upper/lower character may be shorter/longer */
1993		buf = tostringN(s, strlen(s) * sz + 1);
1994
1995		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1996		/*
1997		 * Reset internal state here too.
1998		 * Assign result to avoid a compiler warning. (Casting to void
1999		 * doesn't work.)
2000		 * Increment said variable to avoid a different warning.
2001		 */
2002		unused = wctomb(NULL, L'\0');
2003		unused++;
2004
2005		ps   = s;
2006		pbuf = buf;
2007		while (n = mbtowc(&wc, ps, sz),
2008		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
2009		{
2010			ps += n;
2011
2012			n = wctomb(pbuf, fun_wc(wc));
2013			if (n == (size_t)-1)
2014				FATAL("illegal wide character %s", s);
2015
2016			pbuf += n;
2017		}
2018
2019		*pbuf = '\0';
2020
2021		if (n)
2022			FATAL("illegal byte sequence %s", s);
2023
2024		return buf;
2025	}
2026}
2027
2028#ifdef __DJGPP__
2029static wint_t towupper(wint_t wc)
2030{
2031	if (wc >= 0 && wc < 256)
2032		return toupper(wc & 0xFF);
2033
2034	return wc;
2035}
2036
2037static wint_t towlower(wint_t wc)
2038{
2039	if (wc >= 0 && wc < 256)
2040		return tolower(wc & 0xFF);
2041
2042	return wc;
2043}
2044#endif
2045
2046static char *nawk_toupper(const char *s)
2047{
2048	return nawk_convert(s, toupper, towupper);
2049}
2050
2051static char *nawk_tolower(const char *s)
2052{
2053	return nawk_convert(s, tolower, towlower);
2054}
2055
2056
2057
2058Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
2059{
2060	Cell *x, *y;
2061	Awkfloat u = 0;
2062	int t;
2063	Awkfloat tmp;
2064	char *buf;
2065	Node *nextarg;
2066	FILE *fp;
2067	int status = 0;
2068	int estatus = 0;
2069
2070	t = ptoi(a[0]);
2071	x = execute(a[1]);
2072	nextarg = a[1]->nnext;
2073	switch (t) {
2074	case FLENGTH:
2075		if (isarr(x))
2076			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
2077		else
2078			u = u8_strlen(getsval(x));
2079		break;
2080	case FLOG:
2081		u = log_errcheck(getfval(x));
2082		break;
2083	case FINT:
2084		modf(getfval(x), &u); break;
2085	case FEXP:
2086		u = exp_errcheck(getfval(x));
2087		break;
2088	case FSQRT:
2089		u = sqrt_errcheck(getfval(x));
2090		break;
2091	case FSIN:
2092		u = sin(getfval(x)); break;
2093	case FCOS:
2094		u = cos(getfval(x)); break;
2095	case FATAN:
2096		if (nextarg == NULL) {
2097			WARNING("atan2 requires two arguments; returning 1.0");
2098			u = 1.0;
2099		} else {
2100			y = execute(a[1]->nnext);
2101			u = atan2(getfval(x), getfval(y));
2102			tempfree(y);
2103			nextarg = nextarg->nnext;
2104		}
2105		break;
2106	case FSYSTEM:
2107		fflush(stdout);		/* in case something is buffered already */
2108		estatus = status = system(getsval(x));
2109		if (status != -1) {
2110			if (WIFEXITED(status)) {
2111				estatus = WEXITSTATUS(status);
2112			} else if (WIFSIGNALED(status)) {
2113				estatus = WTERMSIG(status) + 256;
2114#ifdef WCOREDUMP
2115				if (WCOREDUMP(status))
2116					estatus += 256;
2117#endif
2118			} else	/* something else?!? */
2119				estatus = 0;
2120		}
2121		/* else estatus was set to -1 */
2122		u = estatus;
2123		break;
2124	case FRAND:
2125		/* random() returns numbers in [0..2^31-1]
2126		 * in order to get a number in [0, 1), divide it by 2^31
2127		 */
2128		do {
2129			/* exact if Awkfloat wide enough */
2130			u = (Awkfloat) random();
2131			u /= 0x80000000;  /* should be exact */
2132		} while (u >= 1.0);	  /* in case Awkfloat is narrow */
2133		break;
2134	case FSRAND:
2135		if (isrec(x))	/* no argument provided */
2136			u = time((time_t *)0);
2137		else
2138			u = getfval(x);
2139		tmp = u;
2140		srandom((unsigned long) u);
2141		u = srand_seed;
2142		srand_seed = tmp;
2143		break;
2144	case FTOUPPER:
2145	case FTOLOWER:
2146		if (t == FTOUPPER)
2147			buf = nawk_toupper(getsval(x));
2148		else
2149			buf = nawk_tolower(getsval(x));
2150		tempfree(x);
2151		x = gettemp();
2152		setsval(x, buf);
2153		free(buf);
2154		return x;
2155	case FFLUSH:
2156		if (isrec(x) || strlen(getsval(x)) == 0) {
2157			flush_all();	/* fflush() or fflush("") -> all */
2158			u = 0;
2159		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2160			u = EOF;
2161		else
2162			u = fflush(fp);
2163		break;
2164	default:	/* can't happen */
2165		FATAL("illegal function type %d", t);
2166		break;
2167	}
2168	tempfree(x);
2169	x = gettemp();
2170	setfval(x, u);
2171	if (nextarg != NULL) {
2172		WARNING("warning: function has too many arguments");
2173		for ( ; nextarg; nextarg = nextarg->nnext) {
2174			y = execute(nextarg);
2175			tempfree(y);
2176		}
2177	}
2178	return(x);
2179}
2180
2181Cell *printstat(Node **a, int n)	/* print a[0] */
2182{
2183	Node *x;
2184	Cell *y;
2185	FILE *fp;
2186
2187	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
2188		fp = stdout;
2189	else
2190		fp = redirect(ptoi(a[1]), a[2]);
2191	for (x = a[0]; x != NULL; x = x->nnext) {
2192		y = execute(x);
2193		fputs(getpssval(y), fp);
2194		tempfree(y);
2195		if (x->nnext == NULL)
2196			fputs(getsval(orsloc), fp);
2197		else
2198			fputs(getsval(ofsloc), fp);
2199	}
2200	if (a[1] != NULL)
2201		fflush(fp);
2202	if (ferror(fp))
2203		FATAL("write error on %s", filename(fp));
2204	return(True);
2205}
2206
2207Cell *nullproc(Node **a, int n)
2208{
2209	return 0;
2210}
2211
2212
2213FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
2214{
2215	FILE *fp;
2216	Cell *x;
2217	char *fname;
2218
2219	x = execute(b);
2220	fname = getsval(x);
2221	fp = openfile(a, fname, NULL);
2222	if (fp == NULL)
2223		FATAL("can't open file %s", fname);
2224	tempfree(x);
2225	return fp;
2226}
2227
2228struct files {
2229	FILE *fp;
2230	const char	*fname;
2231	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
2232} *files;
2233
2234size_t nfiles;
2235
2236static void stdinit(void)	/* in case stdin, etc., are not constants */
2237{
2238	nfiles = FOPEN_MAX;
2239	files = (struct files *) calloc(nfiles, sizeof(*files));
2240	if (files == NULL)
2241		FATAL("can't allocate file memory for %zu files", nfiles);
2242        files[0].fp = stdin;
2243	files[0].fname = tostring("/dev/stdin");
2244	files[0].mode = LT;
2245        files[1].fp = stdout;
2246	files[1].fname = tostring("/dev/stdout");
2247	files[1].mode = GT;
2248        files[2].fp = stderr;
2249	files[2].fname = tostring("/dev/stderr");
2250	files[2].mode = GT;
2251}
2252
2253FILE *openfile(int a, const char *us, bool *pnewflag)
2254{
2255	const char *s = us;
2256	size_t i;
2257	int m;
2258	FILE *fp = NULL;
2259	struct stat sbuf;
2260
2261	if (*s == '\0')
2262		FATAL("null file name in print or getline");
2263
2264	for (i = 0; i < nfiles; i++)
2265		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2266		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2267		     a == FFLUSH)) {
2268			if (pnewflag)
2269				*pnewflag = false;
2270			return files[i].fp;
2271		}
2272	if (a == FFLUSH)	/* didn't find it, so don't create it! */
2273		return NULL;
2274	for (i = 0; i < nfiles; i++)
2275		if (files[i].fp == NULL)
2276			break;
2277	if (i >= nfiles) {
2278		struct files *nf;
2279		size_t nnf = nfiles + FOPEN_MAX;
2280		nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2281		if (nf == NULL)
2282			FATAL("cannot grow files for %s and %zu files", s, nnf);
2283		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2284		nfiles = nnf;
2285		files = nf;
2286	}
2287
2288	fflush(stdout);	/* force a semblance of order */
2289
2290	/* don't try to read or write a directory */
2291	if (a == LT || a == GT || a == APPEND)
2292		if (stat(s, &sbuf) == 0 && S_ISDIR(sbuf.st_mode))
2293				return NULL;
2294
2295	m = a;
2296	if (a == GT) {
2297		fp = fopen(s, "w");
2298	} else if (a == APPEND) {
2299		fp = fopen(s, "a");
2300		m = GT;	/* so can mix > and >> */
2301	} else if (a == '|') {	/* output pipe */
2302		fp = popen(s, "w");
2303	} else if (a == LE) {	/* input pipe */
2304		fp = popen(s, "r");
2305	} else if (a == LT) {	/* getline <file */
2306		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
2307	} else	/* can't happen */
2308		FATAL("illegal redirection %d", a);
2309	if (fp != NULL) {
2310		files[i].fname = tostring(s);
2311		files[i].fp = fp;
2312		files[i].mode = m;
2313		if (pnewflag)
2314			*pnewflag = true;
2315		if (fp != stdin && fp != stdout && fp != stderr)
2316			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2317	}
2318	return fp;
2319}
2320
2321const char *filename(FILE *fp)
2322{
2323	size_t i;
2324
2325	for (i = 0; i < nfiles; i++)
2326		if (fp == files[i].fp)
2327			return files[i].fname;
2328	return "???";
2329}
2330
2331Cell *closefile(Node **a, int n)
2332{
2333 	Cell *x;
2334	size_t i;
2335	bool stat;
2336
2337 	x = execute(a[0]);
2338 	getsval(x);
2339	stat = true;
2340 	for (i = 0; i < nfiles; i++) {
2341		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2342			continue;
2343		if (files[i].mode == GT || files[i].mode == '|')
2344			fflush(files[i].fp);
2345		if (ferror(files[i].fp)) {
2346			if ((files[i].mode == GT && files[i].fp != stderr)
2347			  || files[i].mode == '|')
2348				FATAL("write error on %s", files[i].fname);
2349			else
2350				WARNING("i/o error occurred on %s", files[i].fname);
2351		}
2352		if (files[i].fp == stdin || files[i].fp == stdout ||
2353		    files[i].fp == stderr)
2354			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2355		else if (files[i].mode == '|' || files[i].mode == LE)
2356			stat = pclose(files[i].fp) == -1;
2357		else
2358			stat = fclose(files[i].fp) == EOF;
2359		if (stat)
2360			WARNING("i/o error occurred closing %s", files[i].fname);
2361		xfree(files[i].fname);
2362		files[i].fname = NULL;	/* watch out for ref thru this */
2363		files[i].fp = NULL;
2364		break;
2365 	}
2366 	tempfree(x);
2367 	x = gettemp();
2368	setfval(x, (Awkfloat) (stat ? -1 : 0));
2369 	return(x);
2370}
2371
2372void closeall(void)
2373{
2374	size_t i;
2375	bool stat = false;
2376
2377	for (i = 0; i < nfiles; i++) {
2378		if (! files[i].fp)
2379			continue;
2380		if (files[i].mode == GT || files[i].mode == '|')
2381			fflush(files[i].fp);
2382		if (ferror(files[i].fp)) {
2383			if ((files[i].mode == GT && files[i].fp != stderr)
2384			  || files[i].mode == '|')
2385				FATAL("write error on %s", files[i].fname);
2386			else
2387				WARNING("i/o error occurred on %s", files[i].fname);
2388		}
2389		if (files[i].fp == stdin || files[i].fp == stdout ||
2390		    files[i].fp == stderr)
2391			continue;
2392		if (files[i].mode == '|' || files[i].mode == LE)
2393			stat = pclose(files[i].fp) == -1;
2394		else
2395			stat = fclose(files[i].fp) == EOF;
2396		if (stat)
2397			WARNING("i/o error occurred while closing %s", files[i].fname);
2398	}
2399}
2400
2401static void flush_all(void)
2402{
2403	size_t i;
2404
2405	for (i = 0; i < nfiles; i++)
2406		if (files[i].fp)
2407			fflush(files[i].fp);
2408}
2409
2410void backsub(char **pb_ptr, const char **sptr_ptr);
2411
2412Cell *dosub(Node **a, int subop)        /* sub and gsub */
2413{
2414	fa *pfa;
2415	int tempstat = 0;
2416	char *repl;
2417	Cell *x;
2418
2419	char *buf = NULL;
2420	char *pb = NULL;
2421	int bufsz = recsize;
2422
2423	const char *r, *s;
2424	const char *start;
2425	const char *noempty = NULL;      /* empty match disallowed here */
2426	size_t m = 0;                    /* match count */
2427	size_t whichm = 0;               /* which match to select, 0 = global */
2428	int mtype;                       /* match type */
2429
2430	if (a[0] == NULL) {	/* 0 => a[1] is already-compiled regexpr */
2431		pfa = (fa *) a[1];
2432	} else {
2433		x = execute(a[1]);
2434		pfa = makedfa(getsval(x), 1);
2435		tempfree(x);
2436	}
2437
2438	x = execute(a[2]);	/* replacement string */
2439	repl = tostring(getsval(x));
2440	tempfree(x);
2441
2442	switch (subop) {
2443	case SUB:
2444		whichm = 1;
2445		x = execute(a[3]);    /* source string */
2446		break;
2447	case GSUB:
2448		whichm = 0;
2449		x = execute(a[3]);    /* source string */
2450		break;
2451	default:
2452		FATAL("dosub: unrecognized subop: %d", subop);
2453	}
2454
2455	start = getsval(x);
2456	while (pmatch(pfa, start)) {
2457		if (buf == NULL) {
2458			if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2459				FATAL("out of memory in dosub");
2460			tempstat = pfa->initstat;
2461			pfa->initstat = 2;
2462		}
2463
2464		/* match types */
2465		#define	MT_IGNORE  0  /* unselected or invalid */
2466		#define MT_INSERT  1  /* selected, empty */
2467		#define MT_REPLACE 2  /* selected, not empty */
2468
2469		/* an empty match just after replacement is invalid */
2470
2471		if (patbeg == noempty && patlen == 0) {
2472			mtype = MT_IGNORE;    /* invalid, not counted */
2473		} else if (whichm == ++m || whichm == 0) {
2474			mtype = patlen ? MT_REPLACE : MT_INSERT;
2475		} else {
2476			mtype = MT_IGNORE;    /* unselected, but counted */
2477		}
2478
2479		/* leading text: */
2480		if (patbeg > start) {
2481			adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2482				recsize, &pb, "dosub");
2483			s = start;
2484			while (s < patbeg)
2485				*pb++ = *s++;
2486		}
2487
2488		if (mtype == MT_IGNORE)
2489			goto matching_text;  /* skip replacement text */
2490
2491		r = repl;
2492		while (*r != 0) {
2493			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2494			if (*r == '\\') {
2495				backsub(&pb, &r);
2496			} else if (*r == '&') {
2497				r++;
2498				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2499					&pb, "dosub");
2500				for (s = patbeg; s < patbeg+patlen; )
2501					*pb++ = *s++;
2502			} else {
2503				*pb++ = *r++;
2504			}
2505		}
2506
2507matching_text:
2508		if (mtype == MT_REPLACE || *patbeg == '\0')
2509			goto next_search;  /* skip matching text */
2510		
2511		if (patlen == 0)
2512			patlen = u8_nextlen(patbeg);
2513		adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2514		s = patbeg;
2515		while (s < patbeg + patlen)
2516			*pb++ = *s++;
2517
2518next_search:
2519		start = patbeg + patlen;
2520		if (m == whichm || *patbeg == '\0')
2521			break;
2522		if (mtype == MT_REPLACE)
2523			noempty = start;
2524
2525		#undef MT_IGNORE
2526		#undef MT_INSERT
2527		#undef MT_REPLACE
2528	}
2529
2530	if (repl) {
2531		free(repl);
2532	}
2533
2534	if (buf != NULL) {
2535		pfa->initstat = tempstat;
2536
2537		/* trailing text */
2538		adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2539		while ((*pb++ = *start++) != '\0')
2540			;
2541
2542		setsval(x, buf);
2543		free(buf);
2544	}
2545
2546	tempfree(x);
2547	x = gettemp();
2548	x->tval = NUM;
2549	x->fval = m;
2550	return x;
2551}
2552
2553void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2554{						/* sptr[0] == '\\' */
2555	char *pb = *pb_ptr;
2556	const char *sptr = *sptr_ptr;
2557	static bool first = true;
2558	static bool do_posix = false;
2559
2560	if (first) {
2561		first = false;
2562		do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2563	}
2564
2565	if (sptr[1] == '\\') {
2566		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2567			*pb++ = '\\';
2568			*pb++ = '&';
2569			sptr += 4;
2570		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2571			*pb++ = '\\';
2572			sptr += 2;
2573		} else if (do_posix) {		/* \\x -> \x */
2574			sptr++;
2575			*pb++ = *sptr++;
2576		} else {			/* \\x -> \\x */
2577			*pb++ = *sptr++;
2578			*pb++ = *sptr++;
2579		}
2580	} else if (sptr[1] == '&') {	/* literal & */
2581		sptr++;
2582		*pb++ = *sptr++;
2583	} else				/* literal \ */
2584		*pb++ = *sptr++;
2585
2586	*pb_ptr = pb;
2587	*sptr_ptr = sptr;
2588}
2589
2590static char *wide_char_to_byte_str(int rune, size_t *outlen)
2591{
2592	static char buf[5];
2593	int len;
2594
2595	if (rune < 0 || rune > 0x10FFFF)
2596		return NULL;
2597
2598	memset(buf, 0, sizeof(buf));
2599
2600	len = 0;
2601	if (rune <= 0x0000007F) {
2602		buf[len++] = rune;
2603	} else if (rune <= 0x000007FF) {
2604		// 110xxxxx 10xxxxxx
2605		buf[len++] = 0xC0 | (rune >> 6);
2606		buf[len++] = 0x80 | (rune & 0x3F);
2607	} else if (rune <= 0x0000FFFF) {
2608		// 1110xxxx 10xxxxxx 10xxxxxx
2609		buf[len++] = 0xE0 | (rune >> 12);
2610		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2611		buf[len++] = 0x80 | (rune & 0x3F);
2612
2613	} else {
2614		// 0x00010000 - 0x10FFFF
2615		// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2616		buf[len++] = 0xF0 | (rune >> 18);
2617		buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2618		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2619		buf[len++] = 0x80 | (rune & 0x3F);
2620	}
2621
2622	*outlen = len;
2623	buf[len++] = '\0';
2624
2625	return buf;
2626}