master xplshn/aruu / cmd / posix / tr.c
  1/* See LICENSE file for copyright and license details. */
  2
  3
  4#include <stdlib.h>
  5
  6#include "utf.h"
  7#include "util.h"
  8
  9static int cflag = 0;
 10static int dflag = 0;
 11static int sflag = 0;
 12
 13struct range {
 14	Rune   start;
 15	Rune   end;
 16	size_t quant;
 17};
 18
 19static struct {
 20	char    *name;
 21	int    (*check)(Rune);
 22} classes[] = {
 23	{ "alnum",  isalnumrune  },
 24	{ "alpha",  isalpharune  },
 25	{ "blank",  isblankrune  },
 26	{ "cntrl",  iscntrlrune  },
 27	{ "digit",  isdigitrune  },
 28	{ "graph",  isgraphrune  },
 29	{ "lower",  islowerrune  },
 30	{ "print",  isprintrune  },
 31	{ "punct",  ispunctrune  },
 32	{ "space",  isspacerune  },
 33	{ "upper",  isupperrune  },
 34	{ "xdigit", isxdigitrune },
 35};
 36
 37#define ISLOWERBIT 		   1U << 6
 38#define ISUPPERBIT 		   1U << 10
 39
 40static struct   range *set1 = NULL;
 41static size_t   set1ranges  = 0;
 42static unsigned set1checks  = 0;
 43static struct   range *set2 = NULL;
 44static size_t   set2ranges  = 0;
 45static unsigned set2checks  = 0;
 46
 47static int
 48check(Rune rune, unsigned checks)
 49{
 50	size_t i;
 51
 52	for (i = 0; checks && i < LEN(classes); i++, checks >>= 1)
 53		if (checks & 1 && classes[i].check(rune))
 54			return 1;
 55
 56	return 0;
 57}
 58
 59static size_t
 60rangelen(struct range r)
 61{
 62	return (r.end - r.start + 1) * r.quant;
 63}
 64
 65static size_t
 66setlen(struct range *set, size_t setranges)
 67{
 68	size_t len = 0, i;
 69
 70	for (i = 0; i < setranges; i++)
 71		len += rangelen(set[i]);
 72
 73	return len;
 74}
 75
 76static int
 77rstrmatch(Rune *r, char *s, size_t n)
 78{
 79	size_t i;
 80
 81	for (i = 0; i < n; i++)
 82		if (r[i] != s[i])
 83			return 0;
 84	return 1;
 85}
 86
 87static size_t
 88makeset(char *str, struct range **set, unsigned *checks)
 89{
 90	Rune  *rstr;
 91	size_t len, i, j, m, n;
 92	size_t q, setranges = 0;
 93	int    factor, base;
 94
 95	/* rstr defines at most len ranges */
 96	unescape(str);
 97	rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
 98	len = utftorunestr(str, rstr);
 99	*set = ereallocarray(NULL, len, sizeof(**set));
100
101	for (i = 0; i < len; i++) {
102		if (rstr[i] == '[') {
103			j = i;
104nextbrack:
105			if (j >= len)
106				goto literal;
107			for (m = j; m < len; m++)
108				if (rstr[m] == ']') {
109					j = m;
110					break;
111				}
112			if (j == i)
113				goto literal;
114
115			/* CLASSES [=EQUIV=] (skip) */
116			if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
117				if (j - i != 4)
118					goto literal;
119				(*set)[setranges].start = rstr[i + 2];
120				(*set)[setranges].end   = rstr[i + 2];
121				(*set)[setranges].quant = 1;
122				setranges++;
123				i = j;
124				continue;
125			}
126
127			/* CLASSES [:CLASS:] */
128			if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
129				for (n = 0; n < LEN(classes); n++) {
130					if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
131						*checks |= 1 << n;
132						i = j;
133						break;
134					}
135				}
136				if (n < LEN(classes))
137					continue;
138				eprintf("Invalid character class.\n");
139			}
140
141			/* REPEAT  [_*n] (only allowed in set2) */
142			if (j - i > 2 && rstr[i + 2] == '*') {
143				/* check if right side of '*' is a number */
144				q = 0;
145				factor = 1;
146				base = (rstr[i + 3] == '0') ? 8 : 10;
147				for (n = j - 1; n > i + 2; n--) {
148					if (rstr[n] < '0' || rstr[n] > '9') {
149						n = 0;
150						break;
151					}
152					q += (rstr[n] - '0') * factor;
153					factor *= base;
154				}
155				if (n == 0) {
156					j = m + 1;
157					goto nextbrack;
158				}
159				(*set)[setranges].start = rstr[i + 1];
160				(*set)[setranges].end   = rstr[i + 1];
161				(*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
162				setranges++;
163				i = j;
164				continue;
165			}
166
167			j = m + 1;
168			goto nextbrack;
169		}
170literal:
171		/* RANGES [_-__-_], _-__-_ */
172		/* LITERALS _______ */
173		(*set)[setranges].start = rstr[i];
174
175		if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
176			i += 2;
177		(*set)[setranges].end = rstr[i];
178		(*set)[setranges].quant = 1;
179		setranges++;
180	}
181
182	free(rstr);
183	return setranges;
184}
185
186static void
187usage(void)
188{
189	eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
190}
191
192// ?man tr: translate characters
193// ?man arguments: set1 [set2
194// ?man translate, squeeze, or delete characters from standard input
195int
196main(int argc, char *argv[])
197{
198	Rune r, lastrune = 0;
199	size_t off1, off2, i, m;
200	int ret = 0;
201
202	ARGBEGIN {
203	// ?man -c: print count or perform stdout action
204	case 'c':
205	// ?man -C: specify option flag
206	case 'C':
207		cflag = 1;
208		break;
209	// ?man -d: specify directory
210	case 'd':
211		dflag = 1;
212		break;
213	// ?man -s: silent mode or print summary
214	case 's':
215		sflag = 1;
216		break;
217	default:
218		usage();
219	} ARGEND
220
221	if (!argc || argc > 2 || (dflag == sflag && argc != 2) ||
222	    (dflag && argc != 1))
223		usage();
224
225	set1ranges = makeset(argv[0], &set1, &set1checks);
226	if (argc == 2) {
227		set2ranges = makeset(argv[1], &set2, &set2checks);
228		/* sanity checks as we are translating */
229		if (!set2ranges && !set2checks)
230			eprintf("cannot map to an empty set.\n");
231		if (set2checks && set2checks != ISLOWERBIT &&
232		    set2checks != ISUPPERBIT) {
233			eprintf("can only map to 'lower' and 'upper' class.\n");
234		}
235	}
236read:
237	if (!efgetrune(&r, stdin, "<stdin>")) {
238		ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
239		return ret;
240	}
241	if (argc == 1 && sflag)
242		goto write;
243	for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
244		if (set1[i].start <= r && r <= set1[i].end) {
245			if (dflag) {
246				if (cflag)
247					goto write;
248				else
249					goto read;
250			}
251			if (cflag)
252				goto write;
253
254			/* map r to set2 */
255			if (set2checks) {
256				if (set2checks == ISLOWERBIT)
257					r = tolowerrune(r);
258				else
259					r = toupperrune(r);
260			} else {
261				off1 += r - set1[i].start;
262				if (off1 > setlen(set2, set2ranges) - 1) {
263					r = set2[set2ranges - 1].end;
264					goto write;
265				}
266				for (m = 0, off2 = 0; m < set2ranges; m++) {
267					if (off2 + rangelen(set2[m]) > off1) {
268						m++;
269						break;
270					}
271					off2 += rangelen(set2[m]);
272				}
273				m--;
274				r = set2[m].start + (off1 - off2) / set2[m].quant;
275			}
276			goto write;
277		}
278	}
279	if (check(r, set1checks)) {
280		if (cflag)
281			goto write;
282		if (dflag)
283			goto read;
284		if (set2checks) {
285			if (set2checks == ISLOWERBIT)
286				r = tolowerrune(r);
287			else
288				r = toupperrune(r);
289		} else {
290			r = set2[set2ranges - 1].end;
291		}
292		goto write;
293	}
294	if (!dflag && cflag) {
295		if (set2checks) {
296			if (set2checks == ISLOWERBIT)
297				r = tolowerrune(r);
298			else
299				r = toupperrune(r);
300		} else {
301			r = set2[set2ranges - 1].end;
302		}
303		goto write;
304	}
305	if (dflag && cflag)
306		goto read;
307write:
308	if (argc == 1 && sflag && r == lastrune) {
309		if (check(r, set1checks))
310			goto read;
311		for (i = 0; i < set1ranges; i++) {
312			if (set1[i].start <= r && r <= set1[i].end)
313				goto read;
314		}
315	}
316	if (argc == 2 && sflag && r == lastrune) {
317		if (set2checks && check(r, set2checks))
318			goto read;
319		for (i = 0; i < set2ranges; i++) {
320			if (set2[i].start <= r && r <= set2[i].end)
321				goto read;
322		}
323	}
324	efputrune(&r, stdout, "<stdout>");
325	lastrune = r;
326	goto read;
327}