1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#ifdef __GNUC__
26#pragma GCC diagnostic ignored "-Wunused-parameter"
27#endif
28
29#define DEBUG
30#include <stdio.h>
31#include <ctype.h>
32#include <wctype.h>
33#include <fcntl.h>
34#include <setjmp.h>
35#include <limits.h>
36#include <math.h>
37#include <string.h>
38#include <stdlib.h>
39#include <time.h>
40#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/wait.h>
43#include "awk.h"
44#include "awkgram.tab.h"
45
46
47static void stdinit(void);
48static void flush_all(void);
49static char *wide_char_to_byte_str(int rune, size_t *outlen);
50
51#if 1
52#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
53#else
54void tempfree(Cell *p) {
55 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
56 WARNING("bad csub %d in Cell %d %s",
57 p->csub, p->ctype, p->sval);
58 }
59 if (istemp(p))
60 tfree(p);
61}
62#endif
63
64/* do we really need these? */
65/* #ifdef _NFILE */
66/* #ifndef FOPEN_MAX */
67/* #define FOPEN_MAX _NFILE */
68/* #endif */
69/* #endif */
70
71/* #ifndef FOPEN_MAX */
72/* #define FOPEN_MAX 40 */ /* max number of open files */
73/* #endif */
74
75
76jmp_buf env;
77extern int pairstack[];
78extern Awkfloat srand_seed;
79
80Node *winner = NULL; /* root of parse tree */
81Cell *tmps; /* free temporary cells for execution */
82
83static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
84Cell *True = &truecell;
85static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
86Cell *False = &falsecell;
87static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
88Cell *jbreak = &breakcell;
89static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
90Cell *jcont = &contcell;
91static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
92Cell *jnext = &nextcell;
93static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
94Cell *jnextfile = &nextfilecell;
95static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
96Cell *jexit = &exitcell;
97static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
98Cell *jret = &retcell;
99static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
100
101Node *curnode = NULL; /* the node being executed, for debugging */
102
103/* buffer memory management */
104int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
105 const char *whatrtn)
106/* pbuf: address of pointer to buffer being managed
107 * psiz: address of buffer size variable
108 * minlen: minimum length of buffer needed
109 * quantum: buffer size quantum
110 * pbptr: address of movable pointer into buffer, or 0 if none
111 * whatrtn: name of the calling routine if failure should cause fatal error
112 *
113 * return 0 for realloc failure, !=0 for success
114 */
115{
116 if (minlen > *psiz) {
117 char *tbuf;
118 int rminlen = quantum ? minlen % quantum : 0;
119 int boff = pbptr ? *pbptr - *pbuf : 0;
120 /* round up to next multiple of quantum */
121 if (rminlen)
122 minlen += quantum - rminlen;
123 tbuf = (char *) realloc(*pbuf, minlen);
124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
125 if (tbuf == NULL) {
126 if (whatrtn)
127 FATAL("out of memory in %s", whatrtn);
128 return 0;
129 }
130 *pbuf = tbuf;
131 *psiz = minlen;
132 if (pbptr)
133 *pbptr = tbuf + boff;
134 }
135 return 1;
136}
137
138void run(Node *a) /* execution of parse tree starts here */
139{
140
141 stdinit();
142 execute(a);
143 closeall();
144}
145
146Cell *execute(Node *u) /* execute a node of the parse tree */
147{
148 Cell *(*proc)(Node **, int);
149 Cell *x;
150 Node *a;
151
152 if (u == NULL)
153 return(True);
154 for (a = u; ; a = a->nnext) {
155 curnode = a;
156 if (isvalue(a)) {
157 x = (Cell *) (a->narg[0]);
158 if (isfld(x) && !donefld)
159 fldbld();
160 else if (isrec(x) && !donerec)
161 recbld();
162 return(x);
163 }
164 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
165 FATAL("illegal statement");
166 proc = proctab[a->nobj-FIRSTTOKEN];
167 x = (*proc)(a->narg, a->nobj);
168 if (isfld(x) && !donefld)
169 fldbld();
170 else if (isrec(x) && !donerec)
171 recbld();
172 if (isexpr(a))
173 return(x);
174 if (isjump(x))
175 return(x);
176 if (a->nnext == NULL)
177 return(x);
178 tempfree(x);
179 }
180}
181
182
183Cell *program(Node **a, int n) /* execute an awk program */
184{ /* a[0] = BEGIN, a[1] = body, a[2] = END */
185 Cell *x;
186
187 if (setjmp(env) != 0)
188 goto ex;
189 if (a[0]) { /* BEGIN */
190 x = execute(a[0]);
191 if (isexit(x))
192 return(True);
193 if (isjump(x))
194 FATAL("illegal break, continue, next or nextfile from BEGIN");
195 tempfree(x);
196 }
197 if (a[1] || a[2])
198 while (getrec(&record, &recsize, true) > 0) {
199 x = execute(a[1]);
200 if (isexit(x))
201 break;
202 tempfree(x);
203 }
204 ex:
205 if (setjmp(env) != 0) /* handles exit within END */
206 goto ex1;
207 if (a[2]) { /* END */
208 x = execute(a[2]);
209 if (isbreak(x) || isnext(x) || iscont(x))
210 FATAL("illegal break, continue, next or nextfile from END");
211 tempfree(x);
212 }
213 ex1:
214 return(True);
215}
216
217struct Frame { /* stack frame for awk function calls */
218 int nargs; /* number of arguments in this call */
219 Cell *fcncell; /* pointer to Cell for function */
220 Cell **args; /* pointer to array of arguments after execute */
221 Cell *retval; /* return value */
222};
223
224#define NARGS 50 /* max args in a call */
225
226struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
227int nframe = 0; /* number of frames allocated */
228struct Frame *frp = NULL; /* frame pointer. bottom level unused */
229
230Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231{
232 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
233 int i, ncall, ndef;
234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
235 Node *x;
236 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
237 Cell *y, *z, *fcn;
238 char *s;
239
240 fcn = execute(a[0]); /* the function itself */
241 s = fcn->nval;
242 if (!isfcn(fcn))
243 FATAL("calling undefined function %s", s);
244 if (frame == NULL) {
245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
246 if (frame == NULL)
247 FATAL("out of space for stack frames calling %s", s);
248 }
249 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
250 ncall++;
251 ndef = (int) fcn->fval; /* args in defn */
252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
253 if (ncall > ndef)
254 WARNING("function %s called with %d args, uses only %d",
255 s, ncall, ndef);
256 if (ncall + ndef > NARGS)
257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
258 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
260 y = execute(x);
261 oargs[i] = y;
262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
264 if (isfcn(y))
265 FATAL("can't use function %s as argument in %s", y->nval, s);
266 if (isarr(y))
267 args[i] = y; /* arrays by ref */
268 else
269 args[i] = copycell(y);
270 tempfree(y);
271 }
272 for ( ; i < ndef; i++) { /* add null args for ones not provided */
273 args[i] = gettemp();
274 *args[i] = newcopycell;
275 }
276 frp++; /* now ok to up frame */
277 if (frp >= frame + nframe) {
278 int dfp = frp - frame; /* old index */
279 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
280 if (frame == NULL)
281 FATAL("out of space for stack frames in %s", s);
282 frp = frame + dfp;
283 }
284 frp->fcncell = fcn;
285 frp->args = args;
286 frp->nargs = ndef; /* number defined with (excess are locals) */
287 frp->retval = gettemp();
288
289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
290 y = execute((Node *)(fcn->sval)); /* execute body */
291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
292
293 for (i = 0; i < ndef; i++) {
294 Cell *t = frp->args[i];
295 if (isarr(t)) {
296 if (t->csub == CCOPY) {
297 if (i >= ncall) {
298 freesymtab(t);
299 t->csub = CTEMP;
300 tempfree(t);
301 } else {
302 oargs[i]->tval = t->tval;
303 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
304 oargs[i]->sval = t->sval;
305 tempfree(t);
306 }
307 }
308 } else if (t != y) { /* kludge to prevent freeing twice */
309 t->csub = CTEMP;
310 tempfree(t);
311 } else if (t == y && t->csub == CCOPY) {
312 t->csub = CTEMP;
313 tempfree(t);
314 freed = 1;
315 }
316 }
317 tempfree(fcn);
318 if (isexit(y) || isnext(y))
319 return y;
320 if (freed == 0) {
321 tempfree(y); /* don't free twice! */
322 }
323 z = frp->retval; /* return value */
324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
325 frp--;
326 return(z);
327}
328
329Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330{
331 Cell *y;
332
333 /* copy is not constant or field */
334
335 y = gettemp();
336 y->tval = x->tval & ~(CON|FLD|REC);
337 y->csub = CCOPY; /* prevents freeing until call is over */
338 y->nval = x->nval; /* BUG? */
339 if (isstr(x) /* || x->ctype == OCELL */) {
340 y->sval = tostring(x->sval);
341 y->tval &= ~DONTFREE;
342 } else
343 y->tval |= DONTFREE;
344 y->fval = x->fval;
345 return y;
346}
347
348Cell *arg(Node **a, int n) /* nth argument of a function */
349{
350
351 n = ptoi(a[0]); /* argument number, counting from 0 */
352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
353 if (n+1 > frp->nargs)
354 FATAL("argument #%d of function %s was not supplied",
355 n+1, frp->fcncell->nval);
356 return frp->args[n];
357}
358
359Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360{
361 Cell *y;
362
363 switch (n) {
364 case EXIT:
365 if (a[0] != NULL) {
366 y = execute(a[0]);
367 errorflag = (int) getfval(y);
368 tempfree(y);
369 }
370 longjmp(env, 1);
371 case RETURN:
372 if (a[0] != NULL) {
373 y = execute(a[0]);
374 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
375 setsval(frp->retval, getsval(y));
376 frp->retval->fval = getfval(y);
377 frp->retval->tval |= NUM;
378 }
379 else if (y->tval & STR)
380 setsval(frp->retval, getsval(y));
381 else if (y->tval & NUM)
382 setfval(frp->retval, getfval(y));
383 else /* can't happen */
384 FATAL("bad type variable %d", y->tval);
385 tempfree(y);
386 }
387 return(jret);
388 case NEXT:
389 return(jnext);
390 case NEXTFILE:
391 nextfile();
392 return(jnextfile);
393 case BREAK:
394 return(jbreak);
395 case CONTINUE:
396 return(jcont);
397 default: /* can't happen */
398 FATAL("illegal jump type %d", n);
399 }
400 return 0; /* not reached */
401}
402
403Cell *awkgetline(Node **a, int n) /* get next line from specific input */
404{ /* a[0] is variable, a[1] is operator, a[2] is filename */
405 Cell *r, *x;
406 extern Cell **fldtab;
407 FILE *fp;
408 char *buf;
409 int bufsize = recsize;
410 int mode;
411 bool newflag;
412 double result;
413
414 if ((buf = (char *) malloc(bufsize)) == NULL)
415 FATAL("out of memory in getline");
416
417 fflush(stdout); /* in case someone is waiting for a prompt */
418 r = gettemp();
419 if (a[1] != NULL) { /* getline < file */
420 x = execute(a[2]); /* filename */
421 mode = ptoi(a[1]);
422 if (mode == '|') /* input pipe */
423 mode = LE; /* arbitrary flag */
424 fp = openfile(mode, getsval(x), &newflag);
425 tempfree(x);
426 if (fp == NULL)
427 n = -1;
428 else
429 n = readrec(&buf, &bufsize, fp, newflag);
430 if (n <= 0) {
431 ;
432 } else if (a[0] != NULL) { /* getline var <file */
433 x = execute(a[0]);
434 setsval(x, buf);
435 if (is_number(x->sval, & result)) {
436 x->fval = result;
437 x->tval |= NUM;
438 }
439 tempfree(x);
440 } else { /* getline <file */
441 setsval(fldtab[0], buf);
442 if (is_number(fldtab[0]->sval, & result)) {
443 fldtab[0]->fval = result;
444 fldtab[0]->tval |= NUM;
445 }
446 }
447 } else { /* bare getline; use current input */
448 if (a[0] == NULL) /* getline */
449 n = getrec(&record, &recsize, true);
450 else { /* getline var */
451 n = getrec(&buf, &bufsize, false);
452 if (n > 0) {
453 x = execute(a[0]);
454 setsval(x, buf);
455 if (is_number(x->sval, & result)) {
456 x->fval = result;
457 x->tval |= NUM;
458 }
459 tempfree(x);
460 }
461 }
462 }
463 setfval(r, (Awkfloat) n);
464 free(buf);
465 return r;
466}
467
468Cell *getnf(Node **a, int n) /* get NF */
469{
470 if (!donefld)
471 fldbld();
472 return (Cell *) a[0];
473}
474
475static char *
476makearraystring(Node *p, const char *func)
477{
478 char *buf;
479 int bufsz = recsize;
480 size_t blen;
481
482 if ((buf = (char *) malloc(bufsz)) == NULL) {
483 FATAL("%s: out of memory", func);
484 }
485
486 blen = 0;
487 buf[blen] = '\0';
488
489 for (; p; p = p->nnext) {
490 Cell *x = execute(p); /* expr */
491 char *s = getsval(x);
492 size_t seplen = strlen(getsval(subseploc));
493 size_t nsub = p->nnext ? seplen : 0;
494 size_t slen = strlen(s);
495 size_t tlen = blen + slen + nsub;
496
497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498 FATAL("%s: out of memory %s[%s...]",
499 func, x->nval, buf);
500 }
501 memcpy(buf + blen, s, slen);
502 if (nsub) {
503 memcpy(buf + blen + slen, *SUBSEP, nsub);
504 }
505 buf[tlen] = '\0';
506 blen = tlen;
507 tempfree(x);
508 }
509 return buf;
510}
511
512Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
513{
514 Cell *x, *z;
515 char *buf;
516
517 x = execute(a[0]); /* Cell* for symbol table */
518 buf = makearraystring(a[1], __func__);
519 if (!isarr(x)) {
520 DPRINTF("making %s into an array\n", NN(x->nval));
521 if (freeable(x))
522 xfree(x->sval);
523 x->tval &= ~(STR|NUM|DONTFREE);
524 x->tval |= ARR;
525 x->sval = (char *) makesymtab(NSYMTAB);
526 }
527 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
528 z->ctype = OCELL;
529 z->csub = CVAR;
530 tempfree(x);
531 free(buf);
532 return(z);
533}
534
535Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
536{
537 Cell *x;
538
539 x = execute(a[0]); /* Cell* for symbol table */
540 if (x == symtabloc) {
541 FATAL("cannot delete SYMTAB or its elements");
542 }
543 if (!isarr(x))
544 return True;
545 if (a[1] == NULL) { /* delete the elements, not the table */
546 freesymtab(x);
547 x->tval &= ~STR;
548 x->tval |= ARR;
549 x->sval = (char *) makesymtab(NSYMTAB);
550 } else {
551 char *buf = makearraystring(a[1], __func__);
552 freeelem(x, buf);
553 free(buf);
554 }
555 tempfree(x);
556 return True;
557}
558
559Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
560{
561 Cell *ap, *k;
562 char *buf;
563
564 ap = execute(a[1]); /* array name */
565 if (!isarr(ap)) {
566 DPRINTF("making %s into an array\n", ap->nval);
567 if (freeable(ap))
568 xfree(ap->sval);
569 ap->tval &= ~(STR|NUM|DONTFREE);
570 ap->tval |= ARR;
571 ap->sval = (char *) makesymtab(NSYMTAB);
572 }
573 buf = makearraystring(a[0], __func__);
574 k = lookup(buf, (Array *) ap->sval);
575 tempfree(ap);
576 free(buf);
577 if (k == NULL)
578 return(False);
579 else
580 return(True);
581}
582
583
584/* ======== utf-8 code ========== */
585
586/*
587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588 * or utf-8. u8_isutf tests whether a string starts with a valid
589 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590 * u8_nextlen returns length of next valid sequence, which is
591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592 * u8_strlen returns length of string in valid utf-8 sequences
593 * and/or high-bit bytes. Conversion functions go between byte
594 * number and character number.
595 *
596 * In theory, this behaves the same as before for non-utf8 bytes.
597 *
598 * Limited checking! This is a potential security hole.
599 */
600
601/* is s the beginning of a valid utf-8 string? */
602/* return length 1..4 if yes, 0 if no */
603int u8_isutf(const char *s)
604{
605 int n, ret;
606 unsigned char c;
607
608 c = s[0];
609 if (c < 128 || awk_mb_cur_max == 1)
610 return 1; /* what if it's 0? */
611
612 n = strlen(s);
613 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
614 ret = 2; /* 110xxxxx 10xxxxxx */
615 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
616 && (s[2] & 0xC0) == 0x80) {
617 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
618 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
619 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
620 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
621 } else {
622 ret = 0;
623 }
624 return ret;
625}
626
627/* Convert (prefix of) utf8 string to utf-32 rune. */
628/* Sets *rune to the value, returns the length. */
629/* No error checking: watch out. */
630int u8_rune(int *rune, const char *s)
631{
632 int n, ret;
633 unsigned char c;
634
635 c = s[0];
636 if (c < 128 || awk_mb_cur_max == 1) {
637 *rune = c;
638 return 1;
639 }
640
641 n = strlen(s);
642 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
643 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
644 ret = 2;
645 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
646 && (s[2] & 0xC0) == 0x80) {
647 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
648 /* 1110xxxx 10xxxxxx 10xxxxxx */
649 ret = 3;
650 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
651 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
652 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
653 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
654 ret = 4;
655 } else {
656 *rune = c;
657 ret = 1;
658 }
659 return ret; /* returns one byte if sequence doesn't look like utf */
660}
661
662/* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
663int u8_nextlen(const char *s)
664{
665 int len;
666
667 len = u8_isutf(s);
668 if (len == 0)
669 len = 1;
670 return len;
671}
672
673/* return number of utf characters or single non-utf bytes */
674int u8_strlen(const char *s)
675{
676 int i, len, n, totlen;
677 unsigned char c;
678
679 n = strlen(s);
680 totlen = 0;
681 for (i = 0; i < n; i += len) {
682 c = s[i];
683 if (c < 128 || awk_mb_cur_max == 1) {
684 len = 1;
685 } else {
686 len = u8_nextlen(&s[i]);
687 }
688 totlen++;
689 if (i > n)
690 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
691 }
692 return totlen;
693}
694
695/* convert utf-8 char number in a string to its byte offset */
696int u8_char2byte(const char *s, int charnum)
697{
698 int n;
699 int bytenum = 0;
700
701 while (charnum > 0) {
702 n = u8_nextlen(s);
703 s += n;
704 bytenum += n;
705 charnum--;
706 }
707 return bytenum;
708}
709
710/* convert byte offset in s to utf-8 char number that starts there */
711int u8_byte2char(const char *s, int bytenum)
712{
713 int i, len, b;
714 int charnum = 0; /* BUG: what origin? */
715 /* should be 0 to match start==0 which means no match */
716
717 b = strlen(s);
718 if (bytenum > b) {
719 return -1; /* ??? */
720 }
721 for (i = 0; i <= bytenum; i += len) {
722 len = u8_nextlen(s+i);
723 charnum++;
724 }
725 return charnum;
726}
727
728/* runetochar() adapted from rune.c in the Plan 9 distribution */
729
730enum
731{
732 Runeerror = 128, /* from somewhere else */
733 Runemax = 0x10FFFF,
734
735 Bit1 = 7,
736 Bitx = 6,
737 Bit2 = 5,
738 Bit3 = 4,
739 Bit4 = 3,
740 Bit5 = 2,
741
742 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
743 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
744 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
745 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
746 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
747 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
748
749 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
750 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
751 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
752 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
753
754 Maskx = (1<<Bitx)-1, /* 0011 1111 */
755 Testx = Maskx ^ 0xFF, /* 1100 0000 */
756
757};
758
759int runetochar(char *str, int c)
760{
761 /* one character sequence 00000-0007F => 00-7F */
762 if (c <= Rune1) {
763 str[0] = c;
764 return 1;
765 }
766
767 /* two character sequence 00080-007FF => T2 Tx */
768 if (c <= Rune2) {
769 str[0] = T2 | (c >> 1*Bitx);
770 str[1] = Tx | (c & Maskx);
771 return 2;
772 }
773
774 /* three character sequence 00800-0FFFF => T3 Tx Tx */
775 if (c > Runemax)
776 c = Runeerror;
777 if (c <= Rune3) {
778 str[0] = T3 | (c >> 2*Bitx);
779 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
780 str[2] = Tx | (c & Maskx);
781 return 3;
782 }
783
784 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
785 str[0] = T4 | (c >> 3*Bitx);
786 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
787 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
788 str[3] = Tx | (c & Maskx);
789 return 4;
790}
791
792
793/* ========== end of utf8 code =========== */
794
795
796
797Cell *matchop(Node **a, int n) /* ~ and match() */
798{
799 Cell *x, *y, *z;
800 char *s, *t;
801 int i;
802 int cstart, cpatlen, len;
803 fa *pfa;
804 int (*mf)(fa *, const char *) = match, mode = 0;
805
806 if (n == MATCHFCN) {
807 mf = pmatch;
808 mode = 1;
809 }
810 x = execute(a[1]); /* a[1] = target text */
811 s = getsval(x);
812 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
813 i = (*mf)((fa *) a[2], s);
814 else {
815 y = execute(a[2]); /* a[2] = regular expr */
816 t = getsval(y);
817 pfa = makedfa(t, mode);
818 i = (*mf)(pfa, s);
819 tempfree(y);
820 }
821 z = x;
822 if (n == MATCHFCN) {
823 int start = patbeg - s + 1; /* origin 1 */
824 if (patlen < 0) {
825 start = 0; /* not found */
826 } else {
827 cstart = u8_byte2char(s, start-1);
828 cpatlen = 0;
829 for (i = 0; i < patlen; i += len) {
830 len = u8_nextlen(patbeg+i);
831 cpatlen++;
832 }
833
834 start = cstart;
835 patlen = cpatlen;
836 }
837
838 setfval(rstartloc, (Awkfloat) start);
839 setfval(rlengthloc, (Awkfloat) patlen);
840 x = gettemp();
841 x->tval = NUM;
842 x->fval = start;
843 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
844 x = True;
845 else
846 x = False;
847
848 tempfree(z);
849 return x;
850}
851
852
853Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
854{
855 Cell *x, *y;
856 int i;
857
858 x = execute(a[0]);
859 i = istrue(x);
860 tempfree(x);
861 switch (n) {
862 case BOR:
863 if (i) return(True);
864 y = execute(a[1]);
865 i = istrue(y);
866 tempfree(y);
867 if (i) return(True);
868 else return(False);
869 case AND:
870 if ( !i ) return(False);
871 y = execute(a[1]);
872 i = istrue(y);
873 tempfree(y);
874 if (i) return(True);
875 else return(False);
876 case NOT:
877 if (i) return(False);
878 else return(True);
879 default: /* can't happen */
880 FATAL("unknown boolean operator %d", n);
881 }
882 return 0; /*NOTREACHED*/
883}
884
885Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
886{
887 int i;
888 Cell *x, *y;
889 Awkfloat j;
890 bool x_is_nan, y_is_nan;
891
892 x = execute(a[0]);
893 y = execute(a[1]);
894 x_is_nan = isnan(x->fval);
895 y_is_nan = isnan(y->fval);
896 if (x->tval&NUM && y->tval&NUM) {
897 if ((x_is_nan || y_is_nan) && n != NE)
898 return(False);
899 j = x->fval - y->fval;
900 i = j<0? -1: (j>0? 1: 0);
901 } else {
902 i = strcmp(getsval(x), getsval(y));
903 }
904 tempfree(x);
905 tempfree(y);
906 switch (n) {
907 case LT: if (i<0) return(True);
908 else return(False);
909 case LE: if (i<=0) return(True);
910 else return(False);
911 case NE: if (x_is_nan && y_is_nan) return(True);
912 else if (i!=0) return(True);
913 else return(False);
914 case EQ: if (i == 0) return(True);
915 else return(False);
916 case GE: if (i>=0) return(True);
917 else return(False);
918 case GT: if (i>0) return(True);
919 else return(False);
920 default: /* can't happen */
921 FATAL("unknown relational operator %d", n);
922 }
923 return 0; /*NOTREACHED*/
924}
925
926void tfree(Cell *a) /* free a tempcell */
927{
928 if (freeable(a)) {
929 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
930 xfree(a->sval);
931 }
932 if (a == tmps)
933 FATAL("tempcell list is curdled");
934 a->cnext = tmps;
935 tmps = a;
936}
937
938Cell *gettemp(void) /* get a tempcell */
939{ int i;
940 Cell *x;
941
942 if (!tmps) {
943 tmps = (Cell *) calloc(100, sizeof(*tmps));
944 if (!tmps)
945 FATAL("out of space for temporaries");
946 for (i = 1; i < 100; i++)
947 tmps[i-1].cnext = &tmps[i];
948 tmps[i-1].cnext = NULL;
949 }
950 x = tmps;
951 tmps = x->cnext;
952 *x = tempcell;
953 return(x);
954}
955
956Cell *indirect(Node **a, int n) /* $( a[0] ) */
957{
958 Awkfloat val;
959 Cell *x;
960 int m;
961
962 x = execute(a[0]);
963 val = getfval(x); /* freebsd: defend against super large field numbers */
964 if ((Awkfloat)INT_MAX < val)
965 FATAL("trying to access out of range field %s", x->nval);
966 m = (int) val;
967 tempfree(x);
968 x = fieldadr(m);
969 x->ctype = OCELL; /* BUG? why are these needed? */
970 x->csub = CFLD;
971 return(x);
972}
973
974Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
975{
976 int k, m, n;
977 int mb, nb;
978 char *s;
979 int temp;
980 Cell *x, *y, *z = NULL;
981
982 x = execute(a[0]);
983 y = execute(a[1]);
984 if (a[2] != NULL)
985 z = execute(a[2]);
986 s = getsval(x);
987 k = u8_strlen(s) + 1;
988 if (k <= 1) {
989 tempfree(x);
990 tempfree(y);
991 if (a[2] != NULL) {
992 tempfree(z);
993 }
994 x = gettemp();
995 setsval(x, "");
996 return(x);
997 }
998 m = (int) getfval(y);
999 if (m <= 0)
1000 m = 1;
1001 else if (m > k)
1002 m = k;
1003 tempfree(y);
1004 if (a[2] != NULL) {
1005 n = (int) getfval(z);
1006 tempfree(z);
1007 } else
1008 n = k - 1;
1009 if (n < 0)
1010 n = 0;
1011 else if (n > k - m)
1012 n = k - m;
1013 /* m is start, n is length from there */
1014 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1015 y = gettemp();
1016 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1017 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1018
1019 temp = s[nb]; /* with thanks to John Linderman */
1020 s[nb] = '\0';
1021 setsval(y, s + mb);
1022 s[nb] = temp;
1023 tempfree(x);
1024 return(y);
1025}
1026
1027Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1028{
1029 Cell *x, *y, *z;
1030 char *s1, *s2, *p1, *p2, *q;
1031 Awkfloat v = 0.0;
1032
1033 x = execute(a[0]);
1034 s1 = getsval(x);
1035 y = execute(a[1]);
1036 s2 = getsval(y);
1037
1038 z = gettemp();
1039 for (p1 = s1; *p1 != '\0'; p1++) {
1040 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1041 continue;
1042 if (*p2 == '\0') {
1043 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1044
1045 /* should be a function: used in match() as well */
1046 int i, len;
1047 v = 0;
1048 for (i = 0; i < p1-s1+1; i += len) {
1049 len = u8_nextlen(s1+i);
1050 v++;
1051 }
1052 break;
1053 }
1054 }
1055 tempfree(x);
1056 tempfree(y);
1057 setfval(z, v);
1058 return(z);
1059}
1060
1061int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1062{
1063 int n;
1064
1065 for (n = 0; *s != 0; s += n) {
1066 n = u8_nextlen(s);
1067 if (n > 1)
1068 return 1;
1069 }
1070 return 0;
1071}
1072
1073#define MAXNUMSIZE 50
1074
1075int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1076{
1077 char *fmt;
1078 char *p, *t;
1079 const char *os;
1080 Cell *x;
1081 int flag = 0, n;
1082 int fmtwd; /* format width */
1083 int fmtsz = recsize;
1084 char *buf = *pbuf;
1085 int bufsize = *pbufsize;
1086#define FMTSZ(a) (fmtsz - ((a) - fmt))
1087#define BUFSZ(a) (bufsize - ((a) - buf))
1088
1089 static bool first = true;
1090 static bool have_a_format = false;
1091
1092 if (first) {
1093 char xbuf[100];
1094
1095 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1096 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1097 first = false;
1098 }
1099
1100 os = s;
1101 p = buf;
1102 if ((fmt = (char *) malloc(fmtsz)) == NULL)
1103 FATAL("out of memory in format()");
1104 while (*s) {
1105 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1106 if (*s != '%') {
1107 *p++ = *s++;
1108 continue;
1109 }
1110 if (*(s+1) == '%') {
1111 *p++ = '%';
1112 s += 2;
1113 continue;
1114 }
1115 fmtwd = atoi(s+1);
1116 if (fmtwd < 0)
1117 fmtwd = -fmtwd;
1118 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1119 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1120 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1121 FATAL("format item %.30s... ran format() out of memory", os);
1122 /* Ignore size specifiers */
1123 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1124 t--;
1125 continue;
1126 }
1127 if (isalpha((uschar)*s))
1128 break;
1129 if (*s == '$') {
1130 FATAL("'$' not permitted in awk formats");
1131 }
1132 if (*s == '*') {
1133 if (a == NULL) {
1134 FATAL("not enough args in printf(%s)", os);
1135 }
1136 x = execute(a);
1137 a = a->nnext;
1138 snprintf(t - 1, FMTSZ(t - 1),
1139 "%d", fmtwd=(int) getfval(x));
1140 if (fmtwd < 0)
1141 fmtwd = -fmtwd;
1142 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1143 t = fmt + strlen(fmt);
1144 tempfree(x);
1145 }
1146 }
1147 *t = '\0';
1148 if (fmtwd < 0)
1149 fmtwd = -fmtwd;
1150 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1151 switch (*s) {
1152 case 'a': case 'A':
1153 if (have_a_format)
1154 flag = *s;
1155 else
1156 flag = 'f';
1157 break;
1158 case 'f': case 'e': case 'g': case 'E': case 'G':
1159 flag = 'f';
1160 break;
1161 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1162 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1163 *(t-1) = 'j';
1164 *t = *s;
1165 *++t = '\0';
1166 break;
1167 case 's':
1168 flag = 's';
1169 break;
1170 case 'c':
1171 flag = 'c';
1172 break;
1173 default:
1174 WARNING("weird printf conversion %s", fmt);
1175 flag = '?';
1176 break;
1177 }
1178 if (a == NULL)
1179 FATAL("not enough args in printf(%s)", os);
1180 x = execute(a);
1181 a = a->nnext;
1182 n = MAXNUMSIZE;
1183 if (fmtwd > n)
1184 n = fmtwd;
1185 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1186 switch (flag) {
1187 case '?':
1188 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1189 t = getsval(x);
1190 n = strlen(t);
1191 if (fmtwd > n)
1192 n = fmtwd;
1193 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1194 p += strlen(p);
1195 snprintf(p, BUFSZ(p), "%s", t);
1196 break;
1197 case 'a':
1198 case 'A':
1199 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1200 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1201 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1202
1203 case 's': {
1204 t = getsval(x);
1205 n = strlen(t);
1206 /* if simple format or no utf-8 in the string, sprintf works */
1207 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1208 if (fmtwd > n)
1209 n = fmtwd;
1210 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1211 FATAL("huge string/format (%d chars) in printf %.30s..." \
1212 " ran format() out of memory", n, t);
1213 snprintf(p, BUFSZ(p), fmt, t);
1214 break;
1215 }
1216
1217 /* get here if string has utf-8 chars and fmt is not plain %s */
1218 /* "%-w.ps", where -, w and .p are all optional */
1219 /* '0' before the w is a flag character */
1220 /* fmt points at % */
1221 int ljust = 0, wid = 0, prec = n, pad = 0;
1222 char *f = fmt+1;
1223 if (f[0] == '-') {
1224 ljust = 1;
1225 f++;
1226 }
1227 // flags '0' and '+' are recognized but skipped
1228 if (f[0] == '0') {
1229 f++;
1230 if (f[0] == '+')
1231 f++;
1232 }
1233 if (f[0] == '+') {
1234 f++;
1235 if (f[0] == '0')
1236 f++;
1237 }
1238 if (isdigit(f[0])) { /* there is a wid */
1239 wid = strtol(f, &f, 10);
1240 }
1241 if (f[0] == '.') { /* there is a .prec */
1242 prec = strtol(++f, &f, 10);
1243 }
1244 if (prec > u8_strlen(t))
1245 prec = u8_strlen(t);
1246 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1247 int i, k, n;
1248
1249 if (ljust) { // print prec chars from t, then pad blanks
1250 n = u8_char2byte(t, prec);
1251 for (k = 0; k < n; k++) {
1252 //putchar(t[k]);
1253 *p++ = t[k];
1254 }
1255 for (i = 0; i < pad; i++) {
1256 //printf(" ");
1257 *p++ = ' ';
1258 }
1259 } else { // print pad blanks, then prec chars from t
1260 for (i = 0; i < pad; i++) {
1261 //printf(" ");
1262 *p++ = ' ';
1263 }
1264 n = u8_char2byte(t, prec);
1265 for (k = 0; k < n; k++) {
1266 //putchar(t[k]);
1267 *p++ = t[k];
1268 }
1269 }
1270 *p = 0;
1271 break;
1272 }
1273
1274 case 'c': {
1275 /*
1276 * If a numeric value is given, awk should just turn
1277 * it into a character and print it:
1278 * BEGIN { printf("%c\n", 65) }
1279 * prints "A".
1280 *
1281 * But what if the numeric value is > 128 and
1282 * represents a valid Unicode code point?!? We do
1283 * our best to convert it back into UTF-8. If we
1284 * can't, we output the encoding of the Unicode
1285 * "invalid character", 0xFFFD.
1286 */
1287 if (isnum(x)) {
1288 int charval = (int) getfval(x);
1289
1290 if (charval != 0) {
1291 if (charval < 128 || awk_mb_cur_max == 1)
1292 snprintf(p, BUFSZ(p), fmt, charval);
1293 else {
1294 // possible unicode character
1295 size_t count;
1296 char *bs = wide_char_to_byte_str(charval, &count);
1297
1298 if (bs == NULL) { // invalid character
1299 // use unicode invalid character, 0xFFFD
1300 static char invalid_char[] = "\357\277\275";
1301 bs = invalid_char;
1302 count = 3;
1303 }
1304 t = bs;
1305 n = count;
1306 goto format_percent_c;
1307 }
1308 } else {
1309 *p++ = '\0'; /* explicit null byte */
1310 *p = '\0'; /* next output will start here */
1311 }
1312 break;
1313 }
1314 t = getsval(x);
1315 n = u8_nextlen(t);
1316 format_percent_c:
1317 if (n < 2) { /* not utf8 */
1318 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1319 break;
1320 }
1321
1322 // utf8 character, almost same song and dance as for %s
1323 int ljust = 0, wid = 0, prec = n, pad = 0;
1324 char *f = fmt+1;
1325 if (f[0] == '-') {
1326 ljust = 1;
1327 f++;
1328 }
1329 // flags '0' and '+' are recognized but skipped
1330 if (f[0] == '0') {
1331 f++;
1332 if (f[0] == '+')
1333 f++;
1334 }
1335 if (f[0] == '+') {
1336 f++;
1337 if (f[0] == '0')
1338 f++;
1339 }
1340 if (isdigit(f[0])) { /* there is a wid */
1341 wid = strtol(f, &f, 10);
1342 }
1343 if (f[0] == '.') { /* there is a .prec */
1344 prec = strtol(++f, &f, 10);
1345 }
1346 if (prec > 1) // %c --> only one character
1347 prec = 1;
1348 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1349 int i;
1350
1351 if (ljust) { // print one char from t, then pad blanks
1352 for (i = 0; i < n; i++)
1353 *p++ = t[i];
1354 for (i = 0; i < pad; i++) {
1355 //printf(" ");
1356 *p++ = ' ';
1357 }
1358 } else { // print pad blanks, then prec chars from t
1359 for (i = 0; i < pad; i++) {
1360 //printf(" ");
1361 *p++ = ' ';
1362 }
1363 for (i = 0; i < n; i++)
1364 *p++ = t[i];
1365 }
1366 *p = 0;
1367 break;
1368 }
1369 default:
1370 FATAL("can't happen: bad conversion %c in format()", flag);
1371 }
1372
1373 tempfree(x);
1374 p += strlen(p);
1375 s++;
1376 }
1377 *p = '\0';
1378 free(fmt);
1379 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1380 x = execute(a);
1381 tempfree(x);
1382 }
1383 *pbuf = buf;
1384 *pbufsize = bufsize;
1385 return p - buf;
1386}
1387
1388Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1389{
1390 Cell *x;
1391 Node *y;
1392 char *buf;
1393 int bufsz=3*recsize;
1394
1395 if ((buf = (char *) malloc(bufsz)) == NULL)
1396 FATAL("out of memory in awksprintf");
1397 y = a[0]->nnext;
1398 x = execute(a[0]);
1399 if (format(&buf, &bufsz, getsval(x), y) == -1)
1400 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1401 tempfree(x);
1402 x = gettemp();
1403 x->sval = buf;
1404 x->tval = STR;
1405 return(x);
1406}
1407
1408Cell *awkprintf(Node **a, int n) /* printf */
1409{ /* a[0] is list of args, starting with format string */
1410 /* a[1] is redirection operator, a[2] is redirection file */
1411 FILE *fp;
1412 Cell *x;
1413 Node *y;
1414 char *buf;
1415 int len;
1416 int bufsz=3*recsize;
1417
1418 if ((buf = (char *) malloc(bufsz)) == NULL)
1419 FATAL("out of memory in awkprintf");
1420 y = a[0]->nnext;
1421 x = execute(a[0]);
1422 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1423 FATAL("printf string %.30s... too long. can't happen.", buf);
1424 tempfree(x);
1425 if (a[1] == NULL) {
1426 /* fputs(buf, stdout); */
1427 fwrite(buf, len, 1, stdout);
1428 if (ferror(stdout))
1429 FATAL("write error on stdout");
1430 } else {
1431 fp = redirect(ptoi(a[1]), a[2]);
1432 /* fputs(buf, fp); */
1433 fwrite(buf, len, 1, fp);
1434 fflush(fp);
1435 if (ferror(fp))
1436 FATAL("write error on %s", filename(fp));
1437 }
1438 free(buf);
1439 return(True);
1440}
1441
1442Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1443{
1444 Awkfloat i, j = 0;
1445 double v;
1446 Cell *x, *y, *z;
1447
1448 x = execute(a[0]);
1449 i = getfval(x);
1450 tempfree(x);
1451 if (n != UMINUS && n != UPLUS) {
1452 y = execute(a[1]);
1453 j = getfval(y);
1454 tempfree(y);
1455 }
1456 z = gettemp();
1457 switch (n) {
1458 case ADD:
1459 i += j;
1460 break;
1461 case MINUS:
1462 i -= j;
1463 break;
1464 case MULT:
1465 i *= j;
1466 break;
1467 case DIVIDE:
1468 if (j == 0)
1469 FATAL("division by zero");
1470 i /= j;
1471 break;
1472 case MOD:
1473 if (j == 0)
1474 FATAL("division by zero in mod");
1475 modf(i/j, &v);
1476 i = i - j * v;
1477 break;
1478 case UMINUS:
1479 i = -i;
1480 break;
1481 case UPLUS: /* handled by getfval(), above */
1482 break;
1483 case POWER:
1484 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1485 i = ipow(i, (int) j);
1486 else
1487 i = pow_errcheck(i, j);
1488 break;
1489 default: /* can't happen */
1490 FATAL("illegal arithmetic operator %d", n);
1491 }
1492 setfval(z, i);
1493 return(z);
1494}
1495
1496double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1497{
1498 double v;
1499
1500 if (n <= 0)
1501 return 1;
1502 v = ipow(x, n/2);
1503 if (n % 2 == 0)
1504 return v * v;
1505 else
1506 return x * v * v;
1507}
1508
1509Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1510{
1511 Cell *x, *z;
1512 int k;
1513 Awkfloat xf;
1514
1515 x = execute(a[0]);
1516 xf = getfval(x);
1517 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1518 if (n == PREINCR || n == PREDECR) {
1519 setfval(x, xf + k);
1520 return(x);
1521 }
1522 z = gettemp();
1523 setfval(z, xf);
1524 setfval(x, xf + k);
1525 tempfree(x);
1526 return(z);
1527}
1528
1529Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1530{ /* this is subtle; don't muck with it. */
1531 Cell *x, *y;
1532 Awkfloat xf, yf;
1533 double v;
1534
1535 y = execute(a[1]);
1536 x = execute(a[0]);
1537 if (n == ASSIGN) { /* ordinary assignment */
1538 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1539 ; /* self-assignment: leave alone unless it's a field or NF */
1540 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1541 yf = getfval(y);
1542 setsval(x, getsval(y));
1543 x->fval = yf;
1544 x->tval |= NUM;
1545 }
1546 else if (isstr(y))
1547 setsval(x, getsval(y));
1548 else if (isnum(y))
1549 setfval(x, getfval(y));
1550 else
1551 funnyvar(y, "read value of");
1552 tempfree(y);
1553 return(x);
1554 }
1555 xf = getfval(x);
1556 yf = getfval(y);
1557 switch (n) {
1558 case ADDEQ:
1559 xf += yf;
1560 break;
1561 case SUBEQ:
1562 xf -= yf;
1563 break;
1564 case MULTEQ:
1565 xf *= yf;
1566 break;
1567 case DIVEQ:
1568 if ((x->tval & CON) != 0)
1569 FATAL("non-constant required for left side of /=");
1570 if (yf == 0)
1571 FATAL("division by zero in /=");
1572 xf /= yf;
1573 break;
1574 case MODEQ:
1575 if (yf == 0)
1576 FATAL("division by zero in %%=");
1577 modf(xf/yf, &v);
1578 xf = xf - yf * v;
1579 break;
1580 case POWEQ:
1581 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1582 xf = ipow(xf, (int) yf);
1583 else
1584 xf = pow_errcheck(xf, yf);
1585 break;
1586 default:
1587 FATAL("illegal assignment operator %d", n);
1588 break;
1589 }
1590 tempfree(y);
1591 setfval(x, xf);
1592 return(x);
1593}
1594
1595Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1596{
1597 Cell *x, *y, *z;
1598 int n1, n2;
1599 char *s = NULL;
1600 int ssz = 0;
1601
1602 x = execute(a[0]);
1603 n1 = strlen(getsval(x));
1604 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1605 memcpy(s, x->sval, n1);
1606
1607 tempfree(x);
1608
1609 y = execute(a[1]);
1610 n2 = strlen(getsval(y));
1611 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1612 memcpy(s + n1, y->sval, n2);
1613 s[n1 + n2] = '\0';
1614
1615 tempfree(y);
1616
1617 z = gettemp();
1618 z->sval = s;
1619 z->tval = STR;
1620
1621 return(z);
1622}
1623
1624Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1625{
1626 Cell *x;
1627
1628 if (a[0] == NULL)
1629 x = execute(a[1]);
1630 else {
1631 x = execute(a[0]);
1632 if (istrue(x)) {
1633 tempfree(x);
1634 x = execute(a[1]);
1635 }
1636 }
1637 return x;
1638}
1639
1640Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1641{
1642 Cell *x;
1643 int pair;
1644
1645 pair = ptoi(a[3]);
1646 if (pairstack[pair] == 0) {
1647 x = execute(a[0]);
1648 if (istrue(x))
1649 pairstack[pair] = 1;
1650 tempfree(x);
1651 }
1652 if (pairstack[pair] == 1) {
1653 x = execute(a[1]);
1654 if (istrue(x))
1655 pairstack[pair] = 0;
1656 tempfree(x);
1657 x = execute(a[2]);
1658 return(x);
1659 }
1660 return(False);
1661}
1662
1663Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1664{
1665 Cell *x = NULL, *y, *ap;
1666 const char *s, *origs, *t;
1667 const char *fs = NULL;
1668 char *origfs = NULL;
1669 int sep;
1670 char temp, num[50];
1671 int n, tempstat, arg3type;
1672 int j;
1673 double result;
1674
1675 y = execute(a[0]); /* source string */
1676 origs = s = strdup(getsval(y));
1677 tempfree(y);
1678 arg3type = ptoi(a[3]);
1679 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1680 fs = getsval(fsloc);
1681 } else if (arg3type == STRING) { /* split(str,arr,"string") */
1682 x = execute(a[2]);
1683 fs = origfs = strdup(getsval(x));
1684 tempfree(x);
1685 } else if (arg3type == REGEXPR) {
1686 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1687 } else {
1688 FATAL("illegal type of split");
1689 }
1690 sep = *fs;
1691 ap = execute(a[1]); /* array name */
1692/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1693 freesymtab(ap);
1694 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1695 ap->tval &= ~STR;
1696 ap->tval |= ARR;
1697 ap->sval = (char *) makesymtab(NSYMTAB);
1698
1699 n = 0;
1700 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1701 /* split(s, a, //); have to arrange that it looks like empty sep */
1702 arg3type = 0;
1703 fs = "";
1704 sep = 0;
1705 }
1706 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1707 fa *pfa;
1708 if (arg3type == REGEXPR) { /* it's ready already */
1709 pfa = (fa *) a[2];
1710 } else {
1711 pfa = makedfa(fs, 1);
1712 }
1713 if (nematch(pfa,s)) {
1714 tempstat = pfa->initstat;
1715 pfa->initstat = 2;
1716 do {
1717 n++;
1718 snprintf(num, sizeof(num), "%d", n);
1719 temp = *patbeg;
1720 setptr(patbeg, '\0');
1721 if (is_number(s, & result))
1722 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1723 else
1724 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1725 setptr(patbeg, temp);
1726 s = patbeg + patlen;
1727 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1728 n++;
1729 snprintf(num, sizeof(num), "%d", n);
1730 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1731 pfa->initstat = tempstat;
1732 goto spdone;
1733 }
1734 } while (nematch(pfa,s));
1735 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1736 /* cf gsub and refldbld */
1737 }
1738 n++;
1739 snprintf(num, sizeof(num), "%d", n);
1740 if (is_number(s, & result))
1741 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1742 else
1743 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1744 spdone:
1745 pfa = NULL;
1746
1747 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1748 char *newt = (char *) malloc(strlen(s) + 1); /* for building new string; reuse for each field */
1749 if (newt == NULL)
1750 FATAL("out of space in split");
1751 for (;;) {
1752 char *fr = newt;
1753 n++;
1754 if (*s == '"' ) { /* start of "..." */
1755 for (s++ ; *s != '\0'; ) {
1756 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1757 s += 2; /* doubled quote */
1758 *fr++ = '"';
1759 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1760 s++; /* skip over closing quote */
1761 break;
1762 } else {
1763 *fr++ = *s++;
1764 }
1765 }
1766 *fr++ = 0;
1767 } else { /* unquoted field */
1768 while (*s != ',' && *s != '\0')
1769 *fr++ = *s++;
1770 *fr++ = 0;
1771 }
1772 snprintf(num, sizeof(num), "%d", n);
1773 if (is_number(newt, &result))
1774 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1775 else
1776 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1777 if (*s++ == '\0')
1778 break;
1779 }
1780 free(newt);
1781
1782 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1783 for (n = 0; ; ) {
1784#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1785 while (ISWS(*s))
1786 s++;
1787 if (*s == '\0')
1788 break;
1789 n++;
1790 t = s;
1791 do
1792 s++;
1793 while (*s != '\0' && !ISWS(*s));
1794 temp = *s;
1795 setptr(s, '\0');
1796 snprintf(num, sizeof(num), "%d", n);
1797 if (is_number(t, & result))
1798 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1799 else
1800 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1801 setptr(s, temp);
1802 if (*s != '\0')
1803 s++;
1804 }
1805
1806 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1807 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1808 char buf[10];
1809 n++;
1810 snprintf(num, sizeof(num), "%d", n);
1811
1812 for (j = 0; j < u8_nextlen(s); j++) {
1813 buf[j] = s[j];
1814 }
1815 buf[j] = '\0';
1816
1817 if (isdigit((uschar)buf[0]))
1818 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1819 else
1820 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1821 }
1822
1823 } else if (*s != '\0') { /* some random single character */
1824 for (;;) {
1825 n++;
1826 t = s;
1827 while (*s != sep && *s != '\0')
1828 s++;
1829 temp = *s;
1830 setptr(s, '\0');
1831 snprintf(num, sizeof(num), "%d", n);
1832 if (is_number(t, & result))
1833 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1834 else
1835 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1836 setptr(s, temp);
1837 if (*s++ == '\0')
1838 break;
1839 }
1840 }
1841 tempfree(ap);
1842 xfree(origs);
1843 xfree(origfs);
1844 x = gettemp();
1845 x->tval = NUM;
1846 x->fval = n;
1847 return(x);
1848}
1849
1850Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1851{
1852 Cell *x;
1853
1854 x = execute(a[0]);
1855 if (istrue(x)) {
1856 tempfree(x);
1857 x = execute(a[1]);
1858 } else {
1859 tempfree(x);
1860 x = execute(a[2]);
1861 }
1862 return(x);
1863}
1864
1865Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1866{
1867 Cell *x;
1868
1869 x = execute(a[0]);
1870 if (istrue(x)) {
1871 tempfree(x);
1872 x = execute(a[1]);
1873 } else if (a[2] != NULL) {
1874 tempfree(x);
1875 x = execute(a[2]);
1876 }
1877 return(x);
1878}
1879
1880Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1881{
1882 Cell *x;
1883
1884 for (;;) {
1885 x = execute(a[0]);
1886 if (!istrue(x))
1887 return(x);
1888 tempfree(x);
1889 x = execute(a[1]);
1890 if (isbreak(x)) {
1891 x = True;
1892 return(x);
1893 }
1894 if (isnext(x) || isexit(x) || isret(x))
1895 return(x);
1896 tempfree(x);
1897 }
1898}
1899
1900Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1901{
1902 Cell *x;
1903
1904 for (;;) {
1905 x = execute(a[0]);
1906 if (isbreak(x))
1907 return True;
1908 if (isnext(x) || isexit(x) || isret(x))
1909 return(x);
1910 tempfree(x);
1911 x = execute(a[1]);
1912 if (!istrue(x))
1913 return(x);
1914 tempfree(x);
1915 }
1916}
1917
1918Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1919{
1920 Cell *x;
1921
1922 x = execute(a[0]);
1923 tempfree(x);
1924 for (;;) {
1925 if (a[1]!=NULL) {
1926 x = execute(a[1]);
1927 if (!istrue(x)) return(x);
1928 else tempfree(x);
1929 }
1930 x = execute(a[3]);
1931 if (isbreak(x)) /* turn off break */
1932 return True;
1933 if (isnext(x) || isexit(x) || isret(x))
1934 return(x);
1935 tempfree(x);
1936 x = execute(a[2]);
1937 tempfree(x);
1938 }
1939}
1940
1941Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1942{
1943 Cell *x, *vp, *arrayp, *cp, *ncp;
1944 Array *tp;
1945 int i;
1946
1947 vp = execute(a[0]);
1948 arrayp = execute(a[1]);
1949 if (!isarr(arrayp)) {
1950 return True;
1951 }
1952 tp = (Array *) arrayp->sval;
1953 tempfree(arrayp);
1954 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1955 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1956 setsval(vp, cp->nval);
1957 ncp = cp->cnext;
1958 x = execute(a[2]);
1959 if (isbreak(x)) {
1960 tempfree(vp);
1961 return True;
1962 }
1963 if (isnext(x) || isexit(x) || isret(x)) {
1964 tempfree(vp);
1965 return(x);
1966 }
1967 tempfree(x);
1968 }
1969 }
1970 return True;
1971}
1972
1973static char *nawk_convert(const char *s, int (*fun_c)(int),
1974 wint_t (*fun_wc)(wint_t))
1975{
1976 char *buf = NULL;
1977 char *pbuf = NULL;
1978 const char *ps = NULL;
1979 size_t n = 0;
1980 wchar_t wc;
1981 const size_t sz = awk_mb_cur_max;
1982 int unused;
1983
1984 if (sz == 1) {
1985 buf = tostring(s);
1986
1987 for (pbuf = buf; *pbuf; pbuf++)
1988 *pbuf = fun_c((uschar)*pbuf);
1989
1990 return buf;
1991 } else {
1992 /* upper/lower character may be shorter/longer */
1993 buf = tostringN(s, strlen(s) * sz + 1);
1994
1995 (void) mbtowc(NULL, NULL, 0); /* reset internal state */
1996 /*
1997 * Reset internal state here too.
1998 * Assign result to avoid a compiler warning. (Casting to void
1999 * doesn't work.)
2000 * Increment said variable to avoid a different warning.
2001 */
2002 unused = wctomb(NULL, L'\0');
2003 unused++;
2004
2005 ps = s;
2006 pbuf = buf;
2007 while (n = mbtowc(&wc, ps, sz),
2008 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2009 {
2010 ps += n;
2011
2012 n = wctomb(pbuf, fun_wc(wc));
2013 if (n == (size_t)-1)
2014 FATAL("illegal wide character %s", s);
2015
2016 pbuf += n;
2017 }
2018
2019 *pbuf = '\0';
2020
2021 if (n)
2022 FATAL("illegal byte sequence %s", s);
2023
2024 return buf;
2025 }
2026}
2027
2028#ifdef __DJGPP__
2029static wint_t towupper(wint_t wc)
2030{
2031 if (wc >= 0 && wc < 256)
2032 return toupper(wc & 0xFF);
2033
2034 return wc;
2035}
2036
2037static wint_t towlower(wint_t wc)
2038{
2039 if (wc >= 0 && wc < 256)
2040 return tolower(wc & 0xFF);
2041
2042 return wc;
2043}
2044#endif
2045
2046static char *nawk_toupper(const char *s)
2047{
2048 return nawk_convert(s, toupper, towupper);
2049}
2050
2051static char *nawk_tolower(const char *s)
2052{
2053 return nawk_convert(s, tolower, towlower);
2054}
2055
2056
2057
2058Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2059{
2060 Cell *x, *y;
2061 Awkfloat u = 0;
2062 int t;
2063 Awkfloat tmp;
2064 char *buf;
2065 Node *nextarg;
2066 FILE *fp;
2067 int status = 0;
2068 int estatus = 0;
2069
2070 t = ptoi(a[0]);
2071 x = execute(a[1]);
2072 nextarg = a[1]->nnext;
2073 switch (t) {
2074 case FLENGTH:
2075 if (isarr(x))
2076 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2077 else
2078 u = u8_strlen(getsval(x));
2079 break;
2080 case FLOG:
2081 u = log_errcheck(getfval(x));
2082 break;
2083 case FINT:
2084 modf(getfval(x), &u); break;
2085 case FEXP:
2086 u = exp_errcheck(getfval(x));
2087 break;
2088 case FSQRT:
2089 u = sqrt_errcheck(getfval(x));
2090 break;
2091 case FSIN:
2092 u = sin(getfval(x)); break;
2093 case FCOS:
2094 u = cos(getfval(x)); break;
2095 case FATAN:
2096 if (nextarg == NULL) {
2097 WARNING("atan2 requires two arguments; returning 1.0");
2098 u = 1.0;
2099 } else {
2100 y = execute(a[1]->nnext);
2101 u = atan2(getfval(x), getfval(y));
2102 tempfree(y);
2103 nextarg = nextarg->nnext;
2104 }
2105 break;
2106 case FSYSTEM:
2107 fflush(stdout); /* in case something is buffered already */
2108 estatus = status = system(getsval(x));
2109 if (status != -1) {
2110 if (WIFEXITED(status)) {
2111 estatus = WEXITSTATUS(status);
2112 } else if (WIFSIGNALED(status)) {
2113 estatus = WTERMSIG(status) + 256;
2114#ifdef WCOREDUMP
2115 if (WCOREDUMP(status))
2116 estatus += 256;
2117#endif
2118 } else /* something else?!? */
2119 estatus = 0;
2120 }
2121 /* else estatus was set to -1 */
2122 u = estatus;
2123 break;
2124 case FRAND:
2125 /* random() returns numbers in [0..2^31-1]
2126 * in order to get a number in [0, 1), divide it by 2^31
2127 */
2128 do {
2129 /* exact if Awkfloat wide enough */
2130 u = (Awkfloat) random();
2131 u /= 0x80000000; /* should be exact */
2132 } while (u >= 1.0); /* in case Awkfloat is narrow */
2133 break;
2134 case FSRAND:
2135 if (isrec(x)) /* no argument provided */
2136 u = time((time_t *)0);
2137 else
2138 u = getfval(x);
2139 tmp = u;
2140 srandom((unsigned long) u);
2141 u = srand_seed;
2142 srand_seed = tmp;
2143 break;
2144 case FTOUPPER:
2145 case FTOLOWER:
2146 if (t == FTOUPPER)
2147 buf = nawk_toupper(getsval(x));
2148 else
2149 buf = nawk_tolower(getsval(x));
2150 tempfree(x);
2151 x = gettemp();
2152 setsval(x, buf);
2153 free(buf);
2154 return x;
2155 case FFLUSH:
2156 if (isrec(x) || strlen(getsval(x)) == 0) {
2157 flush_all(); /* fflush() or fflush("") -> all */
2158 u = 0;
2159 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2160 u = EOF;
2161 else
2162 u = fflush(fp);
2163 break;
2164 default: /* can't happen */
2165 FATAL("illegal function type %d", t);
2166 break;
2167 }
2168 tempfree(x);
2169 x = gettemp();
2170 setfval(x, u);
2171 if (nextarg != NULL) {
2172 WARNING("warning: function has too many arguments");
2173 for ( ; nextarg; nextarg = nextarg->nnext) {
2174 y = execute(nextarg);
2175 tempfree(y);
2176 }
2177 }
2178 return(x);
2179}
2180
2181Cell *printstat(Node **a, int n) /* print a[0] */
2182{
2183 Node *x;
2184 Cell *y;
2185 FILE *fp;
2186
2187 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
2188 fp = stdout;
2189 else
2190 fp = redirect(ptoi(a[1]), a[2]);
2191 for (x = a[0]; x != NULL; x = x->nnext) {
2192 y = execute(x);
2193 fputs(getpssval(y), fp);
2194 tempfree(y);
2195 if (x->nnext == NULL)
2196 fputs(getsval(orsloc), fp);
2197 else
2198 fputs(getsval(ofsloc), fp);
2199 }
2200 if (a[1] != NULL)
2201 fflush(fp);
2202 if (ferror(fp))
2203 FATAL("write error on %s", filename(fp));
2204 return(True);
2205}
2206
2207Cell *nullproc(Node **a, int n)
2208{
2209 return 0;
2210}
2211
2212
2213FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2214{
2215 FILE *fp;
2216 Cell *x;
2217 char *fname;
2218
2219 x = execute(b);
2220 fname = getsval(x);
2221 fp = openfile(a, fname, NULL);
2222 if (fp == NULL)
2223 FATAL("can't open file %s", fname);
2224 tempfree(x);
2225 return fp;
2226}
2227
2228struct files {
2229 FILE *fp;
2230 const char *fname;
2231 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2232} *files;
2233
2234size_t nfiles;
2235
2236static void stdinit(void) /* in case stdin, etc., are not constants */
2237{
2238 nfiles = FOPEN_MAX;
2239 files = (struct files *) calloc(nfiles, sizeof(*files));
2240 if (files == NULL)
2241 FATAL("can't allocate file memory for %zu files", nfiles);
2242 files[0].fp = stdin;
2243 files[0].fname = tostring("/dev/stdin");
2244 files[0].mode = LT;
2245 files[1].fp = stdout;
2246 files[1].fname = tostring("/dev/stdout");
2247 files[1].mode = GT;
2248 files[2].fp = stderr;
2249 files[2].fname = tostring("/dev/stderr");
2250 files[2].mode = GT;
2251}
2252
2253FILE *openfile(int a, const char *us, bool *pnewflag)
2254{
2255 const char *s = us;
2256 size_t i;
2257 int m;
2258 FILE *fp = NULL;
2259 struct stat sbuf;
2260
2261 if (*s == '\0')
2262 FATAL("null file name in print or getline");
2263
2264 for (i = 0; i < nfiles; i++)
2265 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2266 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2267 a == FFLUSH)) {
2268 if (pnewflag)
2269 *pnewflag = false;
2270 return files[i].fp;
2271 }
2272 if (a == FFLUSH) /* didn't find it, so don't create it! */
2273 return NULL;
2274 for (i = 0; i < nfiles; i++)
2275 if (files[i].fp == NULL)
2276 break;
2277 if (i >= nfiles) {
2278 struct files *nf;
2279 size_t nnf = nfiles + FOPEN_MAX;
2280 nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2281 if (nf == NULL)
2282 FATAL("cannot grow files for %s and %zu files", s, nnf);
2283 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2284 nfiles = nnf;
2285 files = nf;
2286 }
2287
2288 fflush(stdout); /* force a semblance of order */
2289
2290 /* don't try to read or write a directory */
2291 if (a == LT || a == GT || a == APPEND)
2292 if (stat(s, &sbuf) == 0 && S_ISDIR(sbuf.st_mode))
2293 return NULL;
2294
2295 m = a;
2296 if (a == GT) {
2297 fp = fopen(s, "w");
2298 } else if (a == APPEND) {
2299 fp = fopen(s, "a");
2300 m = GT; /* so can mix > and >> */
2301 } else if (a == '|') { /* output pipe */
2302 fp = popen(s, "w");
2303 } else if (a == LE) { /* input pipe */
2304 fp = popen(s, "r");
2305 } else if (a == LT) { /* getline <file */
2306 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2307 } else /* can't happen */
2308 FATAL("illegal redirection %d", a);
2309 if (fp != NULL) {
2310 files[i].fname = tostring(s);
2311 files[i].fp = fp;
2312 files[i].mode = m;
2313 if (pnewflag)
2314 *pnewflag = true;
2315 if (fp != stdin && fp != stdout && fp != stderr)
2316 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2317 }
2318 return fp;
2319}
2320
2321const char *filename(FILE *fp)
2322{
2323 size_t i;
2324
2325 for (i = 0; i < nfiles; i++)
2326 if (fp == files[i].fp)
2327 return files[i].fname;
2328 return "???";
2329}
2330
2331Cell *closefile(Node **a, int n)
2332{
2333 Cell *x;
2334 size_t i;
2335 bool stat;
2336
2337 x = execute(a[0]);
2338 getsval(x);
2339 stat = true;
2340 for (i = 0; i < nfiles; i++) {
2341 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2342 continue;
2343 if (files[i].mode == GT || files[i].mode == '|')
2344 fflush(files[i].fp);
2345 if (ferror(files[i].fp)) {
2346 if ((files[i].mode == GT && files[i].fp != stderr)
2347 || files[i].mode == '|')
2348 FATAL("write error on %s", files[i].fname);
2349 else
2350 WARNING("i/o error occurred on %s", files[i].fname);
2351 }
2352 if (files[i].fp == stdin || files[i].fp == stdout ||
2353 files[i].fp == stderr)
2354 stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2355 else if (files[i].mode == '|' || files[i].mode == LE)
2356 stat = pclose(files[i].fp) == -1;
2357 else
2358 stat = fclose(files[i].fp) == EOF;
2359 if (stat)
2360 WARNING("i/o error occurred closing %s", files[i].fname);
2361 xfree(files[i].fname);
2362 files[i].fname = NULL; /* watch out for ref thru this */
2363 files[i].fp = NULL;
2364 break;
2365 }
2366 tempfree(x);
2367 x = gettemp();
2368 setfval(x, (Awkfloat) (stat ? -1 : 0));
2369 return(x);
2370}
2371
2372void closeall(void)
2373{
2374 size_t i;
2375 bool stat = false;
2376
2377 for (i = 0; i < nfiles; i++) {
2378 if (! files[i].fp)
2379 continue;
2380 if (files[i].mode == GT || files[i].mode == '|')
2381 fflush(files[i].fp);
2382 if (ferror(files[i].fp)) {
2383 if ((files[i].mode == GT && files[i].fp != stderr)
2384 || files[i].mode == '|')
2385 FATAL("write error on %s", files[i].fname);
2386 else
2387 WARNING("i/o error occurred on %s", files[i].fname);
2388 }
2389 if (files[i].fp == stdin || files[i].fp == stdout ||
2390 files[i].fp == stderr)
2391 continue;
2392 if (files[i].mode == '|' || files[i].mode == LE)
2393 stat = pclose(files[i].fp) == -1;
2394 else
2395 stat = fclose(files[i].fp) == EOF;
2396 if (stat)
2397 WARNING("i/o error occurred while closing %s", files[i].fname);
2398 }
2399}
2400
2401static void flush_all(void)
2402{
2403 size_t i;
2404
2405 for (i = 0; i < nfiles; i++)
2406 if (files[i].fp)
2407 fflush(files[i].fp);
2408}
2409
2410void backsub(char **pb_ptr, const char **sptr_ptr);
2411
2412Cell *dosub(Node **a, int subop) /* sub and gsub */
2413{
2414 fa *pfa;
2415 int tempstat = 0;
2416 char *repl;
2417 Cell *x;
2418
2419 char *buf = NULL;
2420 char *pb = NULL;
2421 int bufsz = recsize;
2422
2423 const char *r, *s;
2424 const char *start;
2425 const char *noempty = NULL; /* empty match disallowed here */
2426 size_t m = 0; /* match count */
2427 size_t whichm = 0; /* which match to select, 0 = global */
2428 int mtype; /* match type */
2429
2430 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2431 pfa = (fa *) a[1];
2432 } else {
2433 x = execute(a[1]);
2434 pfa = makedfa(getsval(x), 1);
2435 tempfree(x);
2436 }
2437
2438 x = execute(a[2]); /* replacement string */
2439 repl = tostring(getsval(x));
2440 tempfree(x);
2441
2442 switch (subop) {
2443 case SUB:
2444 whichm = 1;
2445 x = execute(a[3]); /* source string */
2446 break;
2447 case GSUB:
2448 whichm = 0;
2449 x = execute(a[3]); /* source string */
2450 break;
2451 default:
2452 FATAL("dosub: unrecognized subop: %d", subop);
2453 }
2454
2455 start = getsval(x);
2456 while (pmatch(pfa, start)) {
2457 if (buf == NULL) {
2458 if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2459 FATAL("out of memory in dosub");
2460 tempstat = pfa->initstat;
2461 pfa->initstat = 2;
2462 }
2463
2464 /* match types */
2465 #define MT_IGNORE 0 /* unselected or invalid */
2466 #define MT_INSERT 1 /* selected, empty */
2467 #define MT_REPLACE 2 /* selected, not empty */
2468
2469 /* an empty match just after replacement is invalid */
2470
2471 if (patbeg == noempty && patlen == 0) {
2472 mtype = MT_IGNORE; /* invalid, not counted */
2473 } else if (whichm == ++m || whichm == 0) {
2474 mtype = patlen ? MT_REPLACE : MT_INSERT;
2475 } else {
2476 mtype = MT_IGNORE; /* unselected, but counted */
2477 }
2478
2479 /* leading text: */
2480 if (patbeg > start) {
2481 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2482 recsize, &pb, "dosub");
2483 s = start;
2484 while (s < patbeg)
2485 *pb++ = *s++;
2486 }
2487
2488 if (mtype == MT_IGNORE)
2489 goto matching_text; /* skip replacement text */
2490
2491 r = repl;
2492 while (*r != 0) {
2493 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2494 if (*r == '\\') {
2495 backsub(&pb, &r);
2496 } else if (*r == '&') {
2497 r++;
2498 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2499 &pb, "dosub");
2500 for (s = patbeg; s < patbeg+patlen; )
2501 *pb++ = *s++;
2502 } else {
2503 *pb++ = *r++;
2504 }
2505 }
2506
2507matching_text:
2508 if (mtype == MT_REPLACE || *patbeg == '\0')
2509 goto next_search; /* skip matching text */
2510
2511 if (patlen == 0)
2512 patlen = u8_nextlen(patbeg);
2513 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2514 s = patbeg;
2515 while (s < patbeg + patlen)
2516 *pb++ = *s++;
2517
2518next_search:
2519 start = patbeg + patlen;
2520 if (m == whichm || *patbeg == '\0')
2521 break;
2522 if (mtype == MT_REPLACE)
2523 noempty = start;
2524
2525 #undef MT_IGNORE
2526 #undef MT_INSERT
2527 #undef MT_REPLACE
2528 }
2529
2530 if (repl) {
2531 free(repl);
2532 }
2533
2534 if (buf != NULL) {
2535 pfa->initstat = tempstat;
2536
2537 /* trailing text */
2538 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2539 while ((*pb++ = *start++) != '\0')
2540 ;
2541
2542 setsval(x, buf);
2543 free(buf);
2544 }
2545
2546 tempfree(x);
2547 x = gettemp();
2548 x->tval = NUM;
2549 x->fval = m;
2550 return x;
2551}
2552
2553void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2554{ /* sptr[0] == '\\' */
2555 char *pb = *pb_ptr;
2556 const char *sptr = *sptr_ptr;
2557 static bool first = true;
2558 static bool do_posix = false;
2559
2560 if (first) {
2561 first = false;
2562 do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2563 }
2564
2565 if (sptr[1] == '\\') {
2566 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2567 *pb++ = '\\';
2568 *pb++ = '&';
2569 sptr += 4;
2570 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2571 *pb++ = '\\';
2572 sptr += 2;
2573 } else if (do_posix) { /* \\x -> \x */
2574 sptr++;
2575 *pb++ = *sptr++;
2576 } else { /* \\x -> \\x */
2577 *pb++ = *sptr++;
2578 *pb++ = *sptr++;
2579 }
2580 } else if (sptr[1] == '&') { /* literal & */
2581 sptr++;
2582 *pb++ = *sptr++;
2583 } else /* literal \ */
2584 *pb++ = *sptr++;
2585
2586 *pb_ptr = pb;
2587 *sptr_ptr = sptr;
2588}
2589
2590static char *wide_char_to_byte_str(int rune, size_t *outlen)
2591{
2592 static char buf[5];
2593 int len;
2594
2595 if (rune < 0 || rune > 0x10FFFF)
2596 return NULL;
2597
2598 memset(buf, 0, sizeof(buf));
2599
2600 len = 0;
2601 if (rune <= 0x0000007F) {
2602 buf[len++] = rune;
2603 } else if (rune <= 0x000007FF) {
2604 // 110xxxxx 10xxxxxx
2605 buf[len++] = 0xC0 | (rune >> 6);
2606 buf[len++] = 0x80 | (rune & 0x3F);
2607 } else if (rune <= 0x0000FFFF) {
2608 // 1110xxxx 10xxxxxx 10xxxxxx
2609 buf[len++] = 0xE0 | (rune >> 12);
2610 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2611 buf[len++] = 0x80 | (rune & 0x3F);
2612
2613 } else {
2614 // 0x00010000 - 0x10FFFF
2615 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2616 buf[len++] = 0xF0 | (rune >> 18);
2617 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2618 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2619 buf[len++] = 0x80 | (rune & 0x3F);
2620 }
2621
2622 *outlen = len;
2623 buf[len++] = '\0';
2624
2625 return buf;
2626}