1/* See LICENSE file for copyright and license details. */
2
3
4#include "queue.h"
5#include "text.h"
6#include "utf.h"
7#include "util.h"
8
9#include <ctype.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14struct keydef {
15 int start_column;
16 int end_column;
17 int start_char;
18 int end_char;
19 int flags;
20 TAILQ_ENTRY(keydef) entry;
21};
22
23struct column {
24 struct line line;
25 size_t cap;
26};
27
28enum {
29 MOD_N = 1 << 0,
30 MOD_STARTB = 1 << 1,
31 MOD_ENDB = 1 << 2,
32 MOD_R = 1 << 3,
33 MOD_D = 1 << 4,
34 MOD_F = 1 << 5,
35 MOD_I = 1 << 6,
36};
37
38static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
39
40static int Cflag = 0, cflag = 0, uflag = 0;
41#if FEATURE_SORT_BIG
42static int zflag = 0;
43#endif
44#if FEATURE_SORT_STABLE
45static int sflag = 0;
46#endif
47static char *fieldsep = NULL;
48static size_t fieldseplen = 0;
49static struct column col1, col2;
50
51static void
52skipblank(struct line *a)
53{
54 while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
55 a->data++;
56 a->len--;
57 }
58}
59
60static void
61skipnonblank(struct line *a)
62{
63 while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
64 *(a->data) != '\t')) {
65 a->data++;
66 a->len--;
67 }
68}
69
70static void
71skipcolumn(struct line *a, int skip_to_next_col)
72{
73 char *s;
74
75 if (fieldsep) {
76 if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) {
77 if (skip_to_next_col)
78 s += fieldseplen;
79 a->len -= s - a->data;
80 a->data = s;
81 } else {
82 a->data += a->len - 1;
83 a->len = 1;
84 }
85 } else {
86 skipblank(a);
87 skipnonblank(a);
88 }
89}
90
91static void
92columns(struct line *line, const struct keydef *kd, struct column *col)
93{
94 Rune r;
95 struct line start, end;
96 size_t utflen, rlen;
97 int i;
98
99 start.data = line->data;
100 start.len = line->len;
101 for (i = 1; i < kd->start_column; i++)
102 skipcolumn(&start, 1);
103 if (kd->flags & MOD_STARTB)
104 skipblank(&start);
105 for (utflen = 0; start.len > 1 && utflen < (size_t)(kd->start_char - 1);) {
106 rlen = chartorune(&r, start.data);
107 start.data += rlen;
108 start.len -= rlen;
109 utflen++;
110 }
111
112 end.data = line->data;
113 end.len = line->len;
114 if (kd->end_column) {
115 for (i = 1; i < kd->end_column; i++)
116 skipcolumn(&end, 1);
117 if (kd->flags & MOD_ENDB)
118 skipblank(&end);
119 if (kd->end_char) {
120 for (utflen = 0; end.len > 1 && utflen < (size_t)kd->end_char;) {
121 rlen = chartorune(&r, end.data);
122 end.data += rlen;
123 end.len -= rlen;
124 utflen++;
125 }
126 } else {
127 skipcolumn(&end, 0);
128 }
129 } else {
130 end.data += end.len - 1;
131 end.len = 1;
132 }
133 col->line.len = MAX(0, end.data - start.data);
134 if (!(col->line.data) || col->cap < col->line.len + 1) {
135 free(col->line.data);
136 col->line.data = emalloc(col->line.len + 1);
137 col->cap = col->line.len + 1;
138 }
139 memcpy(col->line.data, start.data, col->line.len);
140 col->line.data[col->line.len] = '\0';
141}
142
143static int
144skipmodcmp(struct line *a, struct line *b, int flags)
145{
146 Rune r1, r2;
147 size_t offa = 0, offb = 0;
148
149 do {
150 offa += chartorune(&r1, a->data + offa);
151 offb += chartorune(&r2, b->data + offb);
152
153 if (flags & MOD_D && flags & MOD_I) {
154 while (offa < a->len && ((!isblankrune(r1) &&
155 !isalnumrune(r1)) || (!isprintrune(r1))))
156 offa += chartorune(&r1, a->data + offa);
157 while (offb < b->len && ((!isblankrune(r2) &&
158 !isalnumrune(r2)) || (!isprintrune(r2))))
159 offb += chartorune(&r2, b->data + offb);
160 }
161 else if (flags & MOD_D) {
162 while (offa < a->len && !isblankrune(r1) &&
163 !isalnumrune(r1))
164 offa += chartorune(&r1, a->data + offa);
165 while (offb < b->len && !isblankrune(r2) &&
166 !isalnumrune(r2))
167 offb += chartorune(&r2, b->data + offb);
168 }
169 else if (flags & MOD_I) {
170 while (offa < a->len && !isprintrune(r1))
171 offa += chartorune(&r1, a->data + offa);
172 while (offb < b->len && !isprintrune(r2))
173 offb += chartorune(&r2, b->data + offb);
174 }
175 if (flags & MOD_F) {
176 r1 = toupperrune(r1);
177 r2 = toupperrune(r2);
178 }
179 } while (r1 && r1 == r2);
180
181 return r1 - r2;
182}
183
184#if FEATURE_SORT_BIG
185static void
186getlines_z(FILE *fp, struct linebuf *b)
187{
188 char *line = NULL;
189 size_t size = 0, linelen = 0;
190 ssize_t len;
191
192 while ((len = getdelim(&line, &size, '\0', fp)) > 0) {
193 if (++b->nlines > b->capacity) {
194 b->capacity += 512;
195 b->lines = ereallocarray(b->lines, b->capacity, sizeof(*b->lines));
196 }
197 linelen = len;
198 b->lines[b->nlines - 1].data = memcpy(emalloc(linelen + 1), line, linelen + 1);
199 b->lines[b->nlines - 1].len = linelen;
200 }
201 free(line);
202 if (b->lines && b->nlines && linelen && b->lines[b->nlines - 1].data[linelen - 1] != '\0') {
203 b->lines[b->nlines - 1].data = erealloc(b->lines[b->nlines - 1].data, linelen + 2);
204 b->lines[b->nlines - 1].data[linelen] = '\0';
205 b->lines[b->nlines - 1].data[linelen + 1] = '\0';
206 b->lines[b->nlines - 1].len++;
207 }
208}
209#endif
210
211static int
212slinecmp(struct line *a, struct line *b)
213{
214 int res = 0;
215 double x, y;
216 struct keydef *kd;
217
218 TAILQ_FOREACH(kd, &kdhead, entry) {
219 columns(a, kd, &col1);
220 columns(b, kd, &col2);
221
222 /* if -u is given, don't use default key definition
223 * unless it is the only one */
224 if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
225 TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
226 res = 0;
227 } else if (kd->flags & MOD_N) {
228 x = strtod(col1.line.data, NULL);
229 y = strtod(col2.line.data, NULL);
230 res = (x < y) ? -1 : (x > y);
231 } else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
232 res = skipmodcmp(&col1.line, &col2.line, kd->flags);
233 } else {
234 res = linecmp(&col1.line, &col2.line);
235 }
236
237 if (kd->flags & MOD_R)
238 res = -res;
239 if (res)
240 break;
241 }
242
243 return res;
244}
245
246#if FEATURE_SORT_STABLE
247struct sort_item {
248 struct line line;
249 size_t index;
250};
251
252static int
253slinecmp_stable(const struct sort_item *a, const struct sort_item *b)
254{
255 int res = slinecmp((struct line *)&a->line, (struct line *)&b->line);
256 if (res == 0)
257 return (a->index < b->index) ? -1 : 1;
258 return res;
259}
260#endif
261
262static int
263check(FILE *fp, const char *fname)
264{
265 static struct line prev, cur, tmp;
266 static size_t prevsize, cursize, tmpsize;
267 ssize_t len;
268 int delim = '\n';
269
270#if FEATURE_SORT_BIG
271 if (zflag)
272 delim = '\0';
273#endif
274
275 if (!prev.data) {
276 if ((len = getdelim(&prev.data, &prevsize, delim, fp)) < 0) {
277 if (feof(fp))
278 return 0;
279 eprintf("getdelim:");
280 }
281 prev.len = len;
282 }
283 while ((len = getdelim(&cur.data, &cursize, delim, fp)) > 0) {
284 cur.len = len;
285 if (uflag > slinecmp(&cur, &prev)) {
286 if (!Cflag) {
287 weprintf("disorder %s: ", fname);
288 fwrite(cur.data, 1, cur.len, stderr);
289 }
290 return 1;
291 }
292 tmp = cur;
293 tmpsize = cursize;
294 cur = prev;
295 cursize = prevsize;
296 prev = tmp;
297 prevsize = tmpsize;
298 }
299
300 return 0;
301}
302
303static int
304parse_flags(char **s, int *flags, int bflag)
305{
306 while (isalpha((int)**s)) {
307 switch (*((*s)++)) {
308 // ?man -b: specify block size or base directory
309 case 'b':
310 *flags |= bflag;
311 break;
312 // ?man -d: specify directory
313 case 'd':
314 *flags |= MOD_D;
315 break;
316 // ?man -f: force the operation
317 case 'f':
318 *flags |= MOD_F;
319 break;
320 // ?man -i: interactive mode or prompt for confirmation
321 case 'i':
322 *flags |= MOD_I;
323 break;
324 // ?man -n: print line numbers or counts
325 case 'n':
326 *flags |= MOD_N;
327 break;
328 // ?man -r: operate recursively
329 case 'r':
330 *flags |= MOD_R;
331 break;
332 default:
333 return -1;
334 }
335 }
336
337 return 0;
338}
339
340static void
341addkeydef(char *kdstr, int flags)
342{
343 struct keydef *kd;
344
345 kd = enmalloc(2, sizeof(*kd));
346
347 /* parse key definition kdstr with format
348 * start_column[.start_char][flags][,end_column[.end_char][flags]]
349 */
350 kd->start_column = 1;
351 kd->start_char = 1;
352 kd->end_column = 0; /* 0 means end of line */
353 kd->end_char = 0; /* 0 means end of column */
354 kd->flags = flags;
355
356 if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
357 enprintf(2, "invalid start column in key definition\n");
358
359 if (*kdstr == '.') {
360 if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
361 enprintf(2, "invalid start character in key "
362 "definition\n");
363 }
364 if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
365 enprintf(2, "invalid start flags in key definition\n");
366
367 if (*kdstr == ',') {
368 if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
369 enprintf(2, "invalid end column in key definition\n");
370 if (*kdstr == '.') {
371 if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
372 enprintf(2, "invalid end character in key "
373 "definition\n");
374 }
375 if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
376 enprintf(2, "invalid end flags in key definition\n");
377 }
378
379 if (*kdstr != '\0')
380 enprintf(2, "invalid key definition\n");
381
382 TAILQ_INSERT_TAIL(&kdhead, kd, entry);
383}
384
385static void
386usage(void)
387{
388 enprintf(2, "usage: %s [-Cbcdfimnru"
389#if FEATURE_SORT_STABLE
390 "s"
391#endif
392#if FEATURE_SORT_BIG
393 "z"
394#endif
395 "] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0);
396}
397
398// ?man sort: sort lines
399// ?man arguments: -Cbcdfimnru
400// ?man sort or merge lines of text files
401int
402main(int argc, char *argv[])
403{
404 FILE *fp, *ofp = stdout;
405 struct linebuf linebuf = EMPTY_LINEBUF;
406 size_t i;
407 int global_flags = 0, ret = 0;
408 char *outfile = NULL;
409
410 ARGBEGIN {
411 // ?man -C: specify option flag
412 case 'C':
413 Cflag = 1;
414 break;
415 // ?man -b: specify block size or base directory
416 case 'b':
417 global_flags |= MOD_STARTB | MOD_ENDB;
418 break;
419 // ?man -c: print count or perform stdout action
420 case 'c':
421 cflag = 1;
422 break;
423 // ?man -d: specify directory
424 case 'd':
425 global_flags |= MOD_D;
426 break;
427 // ?man -f: force the operation
428 case 'f':
429 global_flags |= MOD_F;
430 break;
431 // ?man -i: interactive mode or prompt for confirmation
432 case 'i':
433 global_flags |= MOD_I;
434 break;
435 // ?man -k:str: specify option flag
436 case 'k':
437 addkeydef(EARGF(usage()), global_flags);
438 break;
439 // ?man -m: specify mode or limit
440 case 'm':
441 /* more or less for free, but for performance-reasons,
442 * we should keep this flag in mind and maybe some later
443 * day implement it properly so we don't run out of memory
444 * while merging large sorted files.
445 */
446 break;
447 // ?man -n: print line numbers or counts
448 case 'n':
449 global_flags |= MOD_N;
450 break;
451 // ?man -o:file: specify output file
452 case 'o':
453 outfile = EARGF(usage());
454 break;
455 // ?man -r: operate recursively
456 case 'r':
457 global_flags |= MOD_R;
458 break;
459#if FEATURE_SORT_STABLE
460 // ?man -s: silent mode or print summary
461 case 's':
462 sflag = 1;
463 break;
464#endif
465 // ?man -t:str: sort or specify timestamp
466 case 't':
467 fieldsep = EARGF(usage());
468 if (!*fieldsep)
469 eprintf("empty delimiter\n");
470 fieldseplen = unescape(fieldsep);
471 break;
472 // ?man -u: unbuffered output
473 case 'u':
474 uflag = 1;
475 break;
476#if FEATURE_SORT_BIG
477 // ?man -z: specify option flag
478 case 'z':
479 zflag = 1;
480 break;
481#endif
482 default:
483 usage();
484 } ARGEND
485
486 /* -b shall only apply to custom key definitions */
487 if (TAILQ_EMPTY(&kdhead) && global_flags)
488 addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
489 if (TAILQ_EMPTY(&kdhead) || (!Cflag && !cflag))
490 addkeydef("1", global_flags & MOD_R);
491
492 if (!argc) {
493 if (Cflag || cflag) {
494 if (check(stdin, "<stdin>") && !ret)
495 ret = 1;
496 } else {
497#if FEATURE_SORT_BIG
498 if (zflag)
499 getlines_z(stdin, &linebuf);
500 else
501#endif
502 getlines(stdin, &linebuf);
503 }
504 } else for (; *argv; argc--, argv++) {
505 if (!strcmp(*argv, "-")) {
506 *argv = "<stdin>";
507 fp = stdin;
508 } else if (!(fp = fopen(*argv, "r"))) {
509 enprintf(2, "fopen %s:", *argv);
510 continue;
511 }
512 if (Cflag || cflag) {
513 if (check(fp, *argv) && !ret)
514 ret = 1;
515 } else {
516#if FEATURE_SORT_BIG
517 if (zflag)
518 getlines_z(fp, &linebuf);
519 else
520#endif
521 getlines(fp, &linebuf);
522 }
523 if (fp != stdin && fshut(fp, *argv))
524 ret = 2;
525 }
526
527 if (!Cflag && !cflag) {
528 if (outfile && !(ofp = fopen(outfile, "w")))
529 eprintf("fopen %s:", outfile);
530
531#if FEATURE_SORT_STABLE
532 if (sflag) {
533 struct sort_item *items = ecalloc(linebuf.nlines, sizeof(*items));
534 for (i = 0; i < linebuf.nlines; i++) {
535 items[i].line = linebuf.lines[i];
536 items[i].index = i;
537 }
538 qsort(items, linebuf.nlines, sizeof(*items),
539 (int (*)(const void *, const void *))slinecmp_stable);
540 for (i = 0; i < linebuf.nlines; i++) {
541 linebuf.lines[i] = items[i].line;
542 }
543 free(items);
544 } else {
545#endif
546 qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines),
547 (int (*)(const void *, const void *))slinecmp);
548#if FEATURE_SORT_STABLE
549 }
550#endif
551
552 for (i = 0; i < linebuf.nlines; i++) {
553 if (!uflag || i == 0 ||
554 slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) {
555 fwrite(linebuf.lines[i].data, 1,
556 linebuf.lines[i].len, ofp);
557 }
558 }
559 }
560
561 if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
562 fshut(stderr, "<stderr>"))
563 ret = 2;
564
565 return ret;
566}