master xplshn/aruu / cmd / posix / awk / tran.c
  1/****************************************************************
  2Copyright (C) Lucent Technologies 1997
  3All Rights Reserved
  4
  5Permission to use, copy, modify, and distribute this software and
  6its documentation for any purpose and without fee is hereby
  7granted, provided that the above copyright notice appear in all
  8copies and that both that the copyright notice and this
  9permission notice and warranty disclaimer appear in supporting
 10documentation, and that the name Lucent Technologies or any of
 11its entities not be used in advertising or publicity pertaining
 12to distribution of the software without specific, written prior
 13permission.
 14
 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 22THIS SOFTWARE.
 23****************************************************************/
 24
 25#define	DEBUG
 26#include <stdio.h>
 27#include <math.h>
 28#include <ctype.h>
 29#include <string.h>
 30#include <stdlib.h>
 31#include "awk.h"
 32
 33#define	FULLTAB	2	/* rehash when table gets this x full */
 34#define	GROWTAB 4	/* grow table by this factor */
 35
 36Array	*symtab;	/* main symbol table */
 37
 38char	**FS;		/* initial field sep */
 39char	**RS;		/* initial record sep */
 40char	**OFS;		/* output field sep */
 41char	**ORS;		/* output record sep */
 42char	**OFMT;		/* output format for numbers */
 43char	**CONVFMT;	/* format for conversions in getsval */
 44Awkfloat *NF;		/* number of fields in current record */
 45Awkfloat *NR;		/* number of current record */
 46Awkfloat *FNR;		/* number of current record in current file */
 47char	**FILENAME;	/* current filename argument */
 48Awkfloat *ARGC;		/* number of arguments from command line */
 49char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
 50Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
 51Awkfloat *RLENGTH;	/* length of same */
 52
 53Cell	*fsloc;		/* FS */
 54Cell	*nrloc;		/* NR */
 55Cell	*nfloc;		/* NF */
 56Cell	*fnrloc;	/* FNR */
 57Cell	*ofsloc;	/* OFS */
 58Cell	*orsloc;	/* ORS */
 59Cell	*rsloc;		/* RS */
 60Cell	*ARGVcell;	/* cell with symbol table containing ARGV[...] */
 61Cell	*rstartloc;	/* RSTART */
 62Cell	*rlengthloc;	/* RLENGTH */
 63Cell	*subseploc;	/* SUBSEP */
 64Cell	*symtabloc;	/* SYMTAB */
 65
 66Cell	*nullloc;	/* a guaranteed empty cell */
 67Node	*nullnode;	/* zero&null, converted into a node for comparisons */
 68Cell	*literal0;
 69
 70extern Cell **fldtab;
 71
 72void syminit(void)	/* initialize symbol table with builtin vars */
 73{
 74	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
 75	/* this is used for if(x)... tests: */
 76	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
 77	nullnode = celltonode(nullloc, CCON);
 78
 79	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
 80	FS = &fsloc->sval;
 81	rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
 82	RS = &rsloc->sval;
 83	ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
 84	OFS = &ofsloc->sval;
 85	orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
 86	ORS = &orsloc->sval;
 87	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
 88	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
 89	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
 90	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
 91	NF = &nfloc->fval;
 92	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
 93	NR = &nrloc->fval;
 94	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
 95	FNR = &fnrloc->fval;
 96	subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
 97	SUBSEP = &subseploc->sval;
 98	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
 99	RSTART = &rstartloc->fval;
100	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
101	RLENGTH = &rlengthloc->fval;
102	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
103	free(symtabloc->sval);
104	symtabloc->sval = (char *) symtab;
105}
106
107void arginit(int ac, char **av)	/* set up ARGV and ARGC */
108{
109	Array *ap;
110	Cell *cp;
111	int i;
112	char temp[50];
113
114	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
115	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
116	ap = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
117	free(cp->sval);
118	cp->sval = (char *) ap;
119	for (i = 0; i < ac; i++) {
120		double result;
121
122		sprintf(temp, "%d", i);
123		if (is_number(*av, & result))
124			setsymtab(temp, *av, result, STR|NUM, ap);
125		else
126			setsymtab(temp, *av, 0.0, STR, ap);
127		av++;
128	}
129	ARGVcell = cp;
130}
131
132void envinit(char **envp)	/* set up ENVIRON variable */
133{
134	Array *ap;
135	Cell *cp;
136	char *p;
137
138	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
139	ap = makesymtab(NSYMTAB);
140	free(cp->sval);
141	cp->sval = (char *) ap;
142	for ( ; *envp; envp++) {
143		double result;
144
145		if ((p = strchr(*envp, '=')) == NULL)
146			continue;
147		if( p == *envp ) /* no left hand side name in env string */
148			continue;
149		*p++ = 0;	/* split into two strings at = */
150		if (is_number(p, & result))
151			setsymtab(*envp, p, result, STR|NUM, ap);
152		else
153			setsymtab(*envp, p, 0.0, STR, ap);
154		p[-1] = '=';	/* restore in case env is passed down to a shell */
155	}
156}
157
158Array *makesymtab(int n)	/* make a new symbol table */
159{
160	Array *ap;
161	Cell **tp;
162
163	ap = (Array *) malloc(sizeof(*ap));
164	tp = (Cell **) calloc(n, sizeof(*tp));
165	if (ap == NULL || tp == NULL)
166		FATAL("out of space in makesymtab");
167	ap->nelem = 0;
168	ap->size = n;
169	ap->tab = tp;
170	return(ap);
171}
172
173void freesymtab(Cell *ap)	/* free a symbol table */
174{
175	Cell *cp, *temp;
176	Array *tp;
177	int i;
178
179	if (!isarr(ap))
180		return;
181	tp = (Array *) ap->sval;
182	if (tp == NULL)
183		return;
184	for (i = 0; i < tp->size; i++) {
185		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
186			xfree(cp->nval);
187			if (freeable(cp))
188				xfree(cp->sval);
189			temp = cp->cnext;	/* avoids freeing then using */
190			free(cp);
191			tp->nelem--;
192		}
193		tp->tab[i] = NULL;
194	}
195	if (tp->nelem != 0)
196		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
197	free(tp->tab);
198	free(tp);
199}
200
201void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
202{
203	Array *tp;
204	Cell *p, *prev = NULL;
205	int h;
206
207	tp = (Array *) ap->sval;
208	h = hash(s, tp->size);
209	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
210		if (strcmp(s, p->nval) == 0) {
211			if (prev == NULL)	/* 1st one */
212				tp->tab[h] = p->cnext;
213			else			/* middle somewhere */
214				prev->cnext = p->cnext;
215			if (freeable(p))
216				xfree(p->sval);
217			free(p->nval);
218			free(p);
219			tp->nelem--;
220			return;
221		}
222}
223
224Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
225{
226	int h;
227	Cell *p;
228
229	if (n != NULL && (p = lookup(n, tp)) != NULL) {
230		DPRINTF("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
231			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval);
232		return(p);
233	}
234	p = (Cell *) malloc(sizeof(*p));
235	if (p == NULL)
236		FATAL("out of space for symbol table at %s", n);
237	p->nval = tostring(n);
238	p->sval = s ? tostring(s) : tostring("");
239	p->fval = f;
240	p->tval = t;
241	p->csub = CUNK;
242	p->ctype = OCELL;
243	tp->nelem++;
244	if (tp->nelem > FULLTAB * tp->size)
245		rehash(tp);
246	h = hash(n, tp->size);
247	p->cnext = tp->tab[h];
248	tp->tab[h] = p;
249	DPRINTF("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
250		(void*)p, p->nval, p->sval, p->fval, p->tval);
251	return(p);
252}
253
254int hash(const char *s, int n)	/* form hash value for string s */
255{
256	unsigned hashval;
257
258	for (hashval = 0; *s != '\0'; s++)
259		hashval = (*s + 31 * hashval);
260	return hashval % n;
261}
262
263void rehash(Array *tp)	/* rehash items in small table into big one */
264{
265	int i, nh, nsz;
266	Cell *cp, *op, **np;
267
268	nsz = GROWTAB * tp->size;
269	np = (Cell **) calloc(nsz, sizeof(*np));
270	if (np == NULL)		/* can't do it, but can keep running. */
271		return;		/* someone else will run out later. */
272	for (i = 0; i < tp->size; i++) {
273		for (cp = tp->tab[i]; cp; cp = op) {
274			op = cp->cnext;
275			nh = hash(cp->nval, nsz);
276			cp->cnext = np[nh];
277			np[nh] = cp;
278		}
279	}
280	free(tp->tab);
281	tp->tab = np;
282	tp->size = nsz;
283}
284
285Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
286{
287	Cell *p;
288	int h;
289
290	h = hash(s, tp->size);
291	for (p = tp->tab[h]; p != NULL; p = p->cnext)
292		if (strcmp(s, p->nval) == 0)
293			return(p);	/* found it */
294	return(NULL);			/* not found */
295}
296
297Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
298{
299	int fldno;
300
301	f += 0.0;		/* normalise negative zero to positive zero */
302	if ((vp->tval & (NUM | STR)) == 0)
303		funnyvar(vp, "assign to");
304	if (isfld(vp)) {
305		donerec = false;	/* mark $0 invalid */
306		fldno = atoi(vp->nval);
307		if (fldno > *NF)
308			newfld(fldno);
309		DPRINTF("setting field %d to %g\n", fldno, f);
310	} else if (&vp->fval == NF) {
311		donerec = false;	/* mark $0 invalid */
312		setlastfld(f);
313		DPRINTF("setfval: setting NF to %g\n", f);
314	} else if (isrec(vp)) {
315		donefld = false;	/* mark $1... invalid */
316		donerec = true;
317		savefs();
318	} else if (vp == ofsloc) {
319		if (!donerec)
320			recbld();
321	}
322	if (freeable(vp))
323		xfree(vp->sval); /* free any previous string */
324	vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
325	vp->fmt = NULL;
326	vp->tval |= NUM;	/* mark number ok */
327	if (f == -0)  /* who would have thought this possible? */
328		f = 0;
329	DPRINTF("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval);
330	return vp->fval = f;
331}
332
333void funnyvar(Cell *vp, const char *rw)
334{
335	if (isarr(vp))
336		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
337	if (vp->tval & FCN)
338		FATAL("can't %s %s; it's a function.", rw, vp->nval);
339	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
340		(void *)vp, vp->nval, vp->sval, vp->fval, vp->tval);
341}
342
343char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
344{
345	char *t;
346	int fldno;
347	Awkfloat f;
348
349	DPRINTF("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
350		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld);
351	if ((vp->tval & (NUM | STR)) == 0)
352		funnyvar(vp, "assign to");
353	if (CSV && (vp == rsloc))
354		WARNING("danger: don't set RS when --csv is in effect");
355	if (CSV && (vp == fsloc))
356		WARNING("danger: don't set FS when --csv is in effect");
357	if (isfld(vp)) {
358		donerec = false;	/* mark $0 invalid */
359		fldno = atoi(vp->nval);
360		if (fldno > *NF)
361			newfld(fldno);
362		DPRINTF("setting field %d to %s (%p)\n", fldno, s, (const void*)s);
363	} else if (isrec(vp)) {
364		donefld = false;	/* mark $1... invalid */
365		donerec = true;
366		savefs();
367	} else if (vp == ofsloc) {
368		if (!donerec)
369			recbld();
370	}
371	t = s ? tostring(s) : tostring("");	/* in case it's self-assign */
372	if (freeable(vp))
373		xfree(vp->sval);
374	vp->tval &= ~(NUM|DONTFREE|CONVC|CONVO);
375	vp->tval |= STR;
376	vp->fmt = NULL;
377	DPRINTF("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
378		(void*)vp, NN(vp->nval), t, (void*)t, vp->tval, donerec, donefld);
379	vp->sval = t;
380	if (&vp->fval == NF) {
381		donerec = false;	/* mark $0 invalid */
382		f = getfval(vp);
383		setlastfld(f);
384		DPRINTF("setsval: setting NF to %g\n", f);
385	}
386
387	return(vp->sval);
388}
389
390Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
391{
392	if ((vp->tval & (NUM | STR)) == 0)
393		funnyvar(vp, "read value of");
394	if (isfld(vp) && !donefld)
395		fldbld();
396	else if (isrec(vp) && !donerec)
397		recbld();
398	if (!isnum(vp)) {	/* not a number */
399		double fval;
400		bool no_trailing;
401
402		if (is_valid_number(vp->sval, true, & no_trailing, & fval)) {
403			vp->fval = fval;
404			if (no_trailing && !(vp->tval&CON))
405				vp->tval |= NUM;	/* make NUM only sparingly */
406		} else
407			vp->fval = 0.0;
408	}
409	DPRINTF("getfval %p: %s = %g, t=%o\n",
410		(void*)vp, NN(vp->nval), vp->fval, vp->tval);
411	return(vp->fval);
412}
413
414static const char *get_inf_nan(double d)
415{
416	if (isinf(d)) {
417		return (d < 0 ? "-inf" : "+inf");
418	} else if (isnan(d)) {
419		return (signbit(d) != 0 ? "-nan" : "+nan");
420	} else
421		return NULL;
422}
423
424static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
425{
426	char s[256];
427	double dtemp;
428	const char *p;
429
430	if ((vp->tval & (NUM | STR)) == 0)
431		funnyvar(vp, "read value of");
432	if (isfld(vp) && ! donefld)
433		fldbld();
434	else if (isrec(vp) && ! donerec)
435		recbld();
436
437	/*
438	 * ADR: This is complicated and more fragile than is desirable.
439	 * Retrieving a string value for a number associates the string
440	 * value with the scalar.  Previously, the string value was
441	 * sticky, meaning if converted via OFMT that became the value
442	 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
443	 * changed after a string value was retrieved, the original value
444	 * was maintained and used.  Also not per POSIX.
445	 *
446	 * We work around this design by adding two additional flags,
447	 * CONVC and CONVO, indicating how the string value was
448	 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
449	 * of the pointer to the xFMT format string used for the
450	 * conversion.  This pointer is only read, **never** dereferenced.
451	 * The next time we do a conversion, if it's coming from the same
452	 * xFMT as last time, and the pointer value is different, we
453	 * know that the xFMT format string changed, and we need to
454	 * redo the conversion. If it's the same, we don't have to.
455	 *
456	 * There are also several cases where we don't do a conversion,
457	 * such as for a field (see the checks below).
458	 */
459
460	/* Don't duplicate the code for actually updating the value */
461#define update_str_val(vp) \
462	{ \
463		if (freeable(vp)) \
464			xfree(vp->sval); \
465		if ((p = get_inf_nan(vp->fval)) != NULL) \
466			strcpy(s, p); \
467		else if (modf(vp->fval, &dtemp) == 0)	/* it's integral */ \
468			snprintf(s, sizeof (s), "%.30g", vp->fval); \
469		else \
470			snprintf(s, sizeof (s), *fmt, vp->fval); \
471		vp->sval = tostring(s); \
472		vp->tval &= ~DONTFREE; \
473		vp->tval |= STR; \
474	}
475
476	if (isstr(vp) == 0) {
477		update_str_val(vp);
478		if (fmt == OFMT) {
479			vp->tval &= ~CONVC;
480			vp->tval |= CONVO;
481		} else {
482			/* CONVFMT */
483			vp->tval &= ~CONVO;
484			vp->tval |= CONVC;
485		}
486		vp->fmt = *fmt;
487	} else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
488		goto done;
489	} else if (isstr(vp)) {
490		if (fmt == OFMT) {
491			if ((vp->tval & CONVC) != 0
492			    || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
493				update_str_val(vp);
494				vp->tval &= ~CONVC;
495				vp->tval |= CONVO;
496				vp->fmt = *fmt;
497			}
498		} else {
499			/* CONVFMT */
500			if ((vp->tval & CONVO) != 0
501			    || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
502				update_str_val(vp);
503				vp->tval &= ~CONVO;
504				vp->tval |= CONVC;
505				vp->fmt = *fmt;
506			}
507		}
508	}
509done:
510	DPRINTF("getsval %p: %s = \"%s (%p)\", t=%o\n",
511		(void*)vp, NN(vp->nval), vp->sval, (void*)vp->sval, vp->tval);
512	return(vp->sval);
513}
514
515char *getsval(Cell *vp)       /* get string val of a Cell */
516{
517      return get_str_val(vp, CONVFMT);
518}
519
520char *getpssval(Cell *vp)     /* get string val of a Cell for print */
521{
522      return get_str_val(vp, OFMT);
523}
524
525
526char *tostring(const char *s)	/* make a copy of string s */
527{
528	char *p = strdup(s);
529	if (p == NULL)
530		FATAL("out of space in tostring on %s", s);
531	return(p);
532}
533
534char *tostringN(const char *s, size_t n)	/* make a copy of string s */
535{
536	char *p;
537
538	p = (char *) malloc(n);
539	if (p == NULL)
540		FATAL("out of space in tostring on %s", s);
541	strcpy(p, s);
542	return(p);
543}
544
545Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */
546{
547	Cell *c;
548	char *p;
549	char *sa = getsval(a);
550	char *sb = getsval(b);
551	size_t l = strlen(sa) + strlen(sb) + 1;
552	p = (char *) malloc(l);
553	if (p == NULL)
554		FATAL("out of space concatenating %s and %s", sa, sb);
555	snprintf(p, l, "%s%s", sa, sb);
556
557	l++;	// add room for ' '
558	char *newbuf = (char *) malloc(l);
559	if (newbuf == NULL)
560		FATAL("out of space concatenating %s and %s", sa, sb);
561	// See string() in lex.c; a string "xx" is stored in the symbol
562	// table as "xx ".
563	snprintf(newbuf, l, "%s ", p);
564	c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab);
565	free(p);
566	free(newbuf);
567	return c;
568}
569
570char *qstring(const char *is, int delim)	/* collect string up to next delim */
571{
572	int c, n;
573	const uschar *s = (const uschar *) is;
574	uschar *buf, *bp;
575
576	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
577		FATAL( "out of space in qstring(%s)", s);
578	for (bp = buf; (c = *s) != delim; s++) {
579		if (c == '\n')
580			SYNTAX( "newline in string %.20s...", is );
581		else if (c != '\\')
582			*bp++ = c;
583		else {	/* \something */
584			c = *++s;
585			if (c == 0) {	/* \ at end */
586				*bp++ = '\\';
587				break;	/* for loop */
588			}
589			switch (c) {
590			case '\\':	*bp++ = '\\'; break;
591			case 'n':	*bp++ = '\n'; break;
592			case 't':	*bp++ = '\t'; break;
593			case 'b':	*bp++ = '\b'; break;
594			case 'f':	*bp++ = '\f'; break;
595			case 'r':	*bp++ = '\r'; break;
596			case 'v':	*bp++ = '\v'; break;
597			case 'a':	*bp++ = '\a'; break;
598			default:
599				if (!isdigit(c)) {
600					*bp++ = c;
601					break;
602				}
603				n = c - '0';
604				if (isdigit(s[1])) {
605					n = 8 * n + *++s - '0';
606					if (isdigit(s[1]))
607						n = 8 * n + *++s - '0';
608				}
609				*bp++ = n;
610				break;
611			}
612		}
613	}
614	*bp++ = 0;
615	return (char *) buf;
616}
617
618const char *flags2str(int flags)
619{
620	static const struct ftab {
621		const char *name;
622		int value;
623	} flagtab[] = {
624		{ "NUM", NUM },
625		{ "STR", STR },
626		{ "DONTFREE", DONTFREE },
627		{ "CON", CON },
628		{ "ARR", ARR },
629		{ "FCN", FCN },
630		{ "FLD", FLD },
631		{ "REC", REC },
632		{ "CONVC", CONVC },
633		{ "CONVO", CONVO },
634		{ NULL, 0 }
635	};
636	static char buf[100];
637	int i;
638	char *cp = buf;
639
640	for (i = 0; flagtab[i].name != NULL; i++) {
641		if ((flags & flagtab[i].value) != 0) {
642			if (cp > buf)
643				*cp++ = '|';
644			strcpy(cp, flagtab[i].name);
645			cp += strlen(cp);
646		}
647	}
648
649	return buf;
650}