1/* See LICENSE file for copyright and license details. */
2
3
4#include <stdlib.h>
5
6#include "utf.h"
7#include "util.h"
8
9static int cflag = 0;
10static int dflag = 0;
11static int sflag = 0;
12
13struct range {
14 Rune start;
15 Rune end;
16 size_t quant;
17};
18
19static struct {
20 char *name;
21 int (*check)(Rune);
22} classes[] = {
23 { "alnum", isalnumrune },
24 { "alpha", isalpharune },
25 { "blank", isblankrune },
26 { "cntrl", iscntrlrune },
27 { "digit", isdigitrune },
28 { "graph", isgraphrune },
29 { "lower", islowerrune },
30 { "print", isprintrune },
31 { "punct", ispunctrune },
32 { "space", isspacerune },
33 { "upper", isupperrune },
34 { "xdigit", isxdigitrune },
35};
36
37#define ISLOWERBIT 1U << 6
38#define ISUPPERBIT 1U << 10
39
40static struct range *set1 = NULL;
41static size_t set1ranges = 0;
42static unsigned set1checks = 0;
43static struct range *set2 = NULL;
44static size_t set2ranges = 0;
45static unsigned set2checks = 0;
46
47static int
48check(Rune rune, unsigned checks)
49{
50 size_t i;
51
52 for (i = 0; checks && i < LEN(classes); i++, checks >>= 1)
53 if (checks & 1 && classes[i].check(rune))
54 return 1;
55
56 return 0;
57}
58
59static size_t
60rangelen(struct range r)
61{
62 return (r.end - r.start + 1) * r.quant;
63}
64
65static size_t
66setlen(struct range *set, size_t setranges)
67{
68 size_t len = 0, i;
69
70 for (i = 0; i < setranges; i++)
71 len += rangelen(set[i]);
72
73 return len;
74}
75
76static int
77rstrmatch(Rune *r, char *s, size_t n)
78{
79 size_t i;
80
81 for (i = 0; i < n; i++)
82 if (r[i] != s[i])
83 return 0;
84 return 1;
85}
86
87static size_t
88makeset(char *str, struct range **set, unsigned *checks)
89{
90 Rune *rstr;
91 size_t len, i, j, m, n;
92 size_t q, setranges = 0;
93 int factor, base;
94
95 /* rstr defines at most len ranges */
96 unescape(str);
97 rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
98 len = utftorunestr(str, rstr);
99 *set = ereallocarray(NULL, len, sizeof(**set));
100
101 for (i = 0; i < len; i++) {
102 if (rstr[i] == '[') {
103 j = i;
104nextbrack:
105 if (j >= len)
106 goto literal;
107 for (m = j; m < len; m++)
108 if (rstr[m] == ']') {
109 j = m;
110 break;
111 }
112 if (j == i)
113 goto literal;
114
115 /* CLASSES [=EQUIV=] (skip) */
116 if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
117 if (j - i != 4)
118 goto literal;
119 (*set)[setranges].start = rstr[i + 2];
120 (*set)[setranges].end = rstr[i + 2];
121 (*set)[setranges].quant = 1;
122 setranges++;
123 i = j;
124 continue;
125 }
126
127 /* CLASSES [:CLASS:] */
128 if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
129 for (n = 0; n < LEN(classes); n++) {
130 if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
131 *checks |= 1 << n;
132 i = j;
133 break;
134 }
135 }
136 if (n < LEN(classes))
137 continue;
138 eprintf("Invalid character class.\n");
139 }
140
141 /* REPEAT [_*n] (only allowed in set2) */
142 if (j - i > 2 && rstr[i + 2] == '*') {
143 /* check if right side of '*' is a number */
144 q = 0;
145 factor = 1;
146 base = (rstr[i + 3] == '0') ? 8 : 10;
147 for (n = j - 1; n > i + 2; n--) {
148 if (rstr[n] < '0' || rstr[n] > '9') {
149 n = 0;
150 break;
151 }
152 q += (rstr[n] - '0') * factor;
153 factor *= base;
154 }
155 if (n == 0) {
156 j = m + 1;
157 goto nextbrack;
158 }
159 (*set)[setranges].start = rstr[i + 1];
160 (*set)[setranges].end = rstr[i + 1];
161 (*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
162 setranges++;
163 i = j;
164 continue;
165 }
166
167 j = m + 1;
168 goto nextbrack;
169 }
170literal:
171 /* RANGES [_-__-_], _-__-_ */
172 /* LITERALS _______ */
173 (*set)[setranges].start = rstr[i];
174
175 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
176 i += 2;
177 (*set)[setranges].end = rstr[i];
178 (*set)[setranges].quant = 1;
179 setranges++;
180 }
181
182 free(rstr);
183 return setranges;
184}
185
186static void
187usage(void)
188{
189 eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
190}
191
192// ?man tr: translate characters
193// ?man arguments: set1 [set2
194// ?man translate, squeeze, or delete characters from standard input
195int
196main(int argc, char *argv[])
197{
198 Rune r, lastrune = 0;
199 size_t off1, off2, i, m;
200 int ret = 0;
201
202 ARGBEGIN {
203 // ?man -c: print count or perform stdout action
204 case 'c':
205 // ?man -C: specify option flag
206 case 'C':
207 cflag = 1;
208 break;
209 // ?man -d: specify directory
210 case 'd':
211 dflag = 1;
212 break;
213 // ?man -s: silent mode or print summary
214 case 's':
215 sflag = 1;
216 break;
217 default:
218 usage();
219 } ARGEND
220
221 if (!argc || argc > 2 || (dflag == sflag && argc != 2) ||
222 (dflag && argc != 1))
223 usage();
224
225 set1ranges = makeset(argv[0], &set1, &set1checks);
226 if (argc == 2) {
227 set2ranges = makeset(argv[1], &set2, &set2checks);
228 /* sanity checks as we are translating */
229 if (!set2ranges && !set2checks)
230 eprintf("cannot map to an empty set.\n");
231 if (set2checks && set2checks != ISLOWERBIT &&
232 set2checks != ISUPPERBIT) {
233 eprintf("can only map to 'lower' and 'upper' class.\n");
234 }
235 }
236read:
237 if (!efgetrune(&r, stdin, "<stdin>")) {
238 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
239 return ret;
240 }
241 if (argc == 1 && sflag)
242 goto write;
243 for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
244 if (set1[i].start <= r && r <= set1[i].end) {
245 if (dflag) {
246 if (cflag)
247 goto write;
248 else
249 goto read;
250 }
251 if (cflag)
252 goto write;
253
254 /* map r to set2 */
255 if (set2checks) {
256 if (set2checks == ISLOWERBIT)
257 r = tolowerrune(r);
258 else
259 r = toupperrune(r);
260 } else {
261 off1 += r - set1[i].start;
262 if (off1 > setlen(set2, set2ranges) - 1) {
263 r = set2[set2ranges - 1].end;
264 goto write;
265 }
266 for (m = 0, off2 = 0; m < set2ranges; m++) {
267 if (off2 + rangelen(set2[m]) > off1) {
268 m++;
269 break;
270 }
271 off2 += rangelen(set2[m]);
272 }
273 m--;
274 r = set2[m].start + (off1 - off2) / set2[m].quant;
275 }
276 goto write;
277 }
278 }
279 if (check(r, set1checks)) {
280 if (cflag)
281 goto write;
282 if (dflag)
283 goto read;
284 if (set2checks) {
285 if (set2checks == ISLOWERBIT)
286 r = tolowerrune(r);
287 else
288 r = toupperrune(r);
289 } else {
290 r = set2[set2ranges - 1].end;
291 }
292 goto write;
293 }
294 if (!dflag && cflag) {
295 if (set2checks) {
296 if (set2checks == ISLOWERBIT)
297 r = tolowerrune(r);
298 else
299 r = toupperrune(r);
300 } else {
301 r = set2[set2ranges - 1].end;
302 }
303 goto write;
304 }
305 if (dflag && cflag)
306 goto read;
307write:
308 if (argc == 1 && sflag && r == lastrune) {
309 if (check(r, set1checks))
310 goto read;
311 for (i = 0; i < set1ranges; i++) {
312 if (set1[i].start <= r && r <= set1[i].end)
313 goto read;
314 }
315 }
316 if (argc == 2 && sflag && r == lastrune) {
317 if (set2checks && check(r, set2checks))
318 goto read;
319 for (i = 0; i < set2ranges; i++) {
320 if (set2[i].start <= r && r <= set2[i].end)
321 goto read;
322 }
323 }
324 efputrune(&r, stdout, "<stdout>");
325 lastrune = r;
326 goto read;
327}