1/* See LICENSE file for copyright and license details. */
2
3
4#include <ctype.h>
5#include <stdint.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9
10#include "text.h"
11#include "utf.h"
12#include "util.h"
13
14enum {
15 INIT = 1,
16 GROW = 2,
17};
18
19enum {
20 EXPAND = 0,
21 RESET = 1,
22};
23
24enum { FIELD_ERROR = -2, };
25
26struct field {
27 char *s;
28 size_t len;
29};
30
31struct jline {
32 struct line text;
33 size_t nf;
34 size_t maxf;
35 struct field *fields;
36};
37
38struct spec {
39 size_t fileno;
40 size_t fldno;
41};
42
43struct outlist {
44 size_t ns;
45 size_t maxs;
46 struct spec **specs;
47};
48
49struct span {
50 size_t nl;
51 size_t maxl;
52 struct jline **lines;
53};
54
55static char *sep = NULL;
56static char *replace = NULL;
57static const char defaultofs = ' ';
58static const int jfield = 1; /* POSIX default join field */
59static int unpairsa = 0, unpairsb = 0;
60static int oflag = 0;
61static int pairs = 1;
62static size_t seplen;
63static struct outlist output;
64
65static void
66usage(void)
67{
68 eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] "
69 "[-a | -v fileno] [-t delim] file1 file2\n", argv0);
70}
71
72static void
73prfield(struct field *fp)
74{
75 if (fwrite(fp->s, 1, fp->len, stdout) != fp->len)
76 eprintf("fwrite:");
77}
78
79static void
80prsep(void)
81{
82 if (sep)
83 fwrite(sep, 1, seplen, stdout);
84 else
85 putchar(defaultofs);
86}
87
88static void
89swaplines(struct jline *la, struct jline *lb)
90{
91 struct jline tmp;
92
93 tmp = *la;
94 *la = *lb;
95 *lb = tmp;
96}
97
98static void
99prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
100{
101 struct spec *sp;
102 struct field *joinfield;
103 size_t i;
104
105 if (jfa >= la->nf || jfb >= lb->nf)
106 return;
107
108 joinfield = &la->fields[jfa];
109
110 if (oflag) {
111 for (i = 0; i < output.ns; i++) {
112 sp = output.specs[i];
113
114 if (sp->fileno == 1) {
115 if (sp->fldno < la->nf)
116 prfield(&la->fields[sp->fldno]);
117 else if (replace)
118 fputs(replace, stdout);
119 } else if (sp->fileno == 2) {
120 if (sp->fldno < lb->nf)
121 prfield(&lb->fields[sp->fldno]);
122 else if (replace)
123 fputs(replace, stdout);
124 } else if (sp->fileno == 0) {
125 prfield(joinfield);
126 }
127
128 if (i < output.ns - 1)
129 prsep();
130 }
131 } else {
132 prfield(joinfield);
133 prsep();
134
135 for (i = 0; i < la->nf; i++) {
136 if (i != jfa) {
137 prfield(&la->fields[i]);
138 prsep();
139 }
140 }
141 for (i = 0; i < lb->nf; i++) {
142 if (i != jfb) {
143 prfield(&lb->fields[i]);
144 if (i < lb->nf - 1)
145 prsep();
146 }
147 }
148 }
149 putchar('\n');
150}
151
152static void
153prline(struct jline *lp)
154{
155 if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.len)
156 eprintf("fwrite:");
157 putchar('\n');
158}
159
160static int
161jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
162{
163 int status;
164
165 /* return FIELD_ERROR if both lines are short */
166 if (jfa >= la->nf) {
167 status = (jfb >= lb->nf) ? FIELD_ERROR : -1;
168 } else if (jfb >= lb->nf) {
169 status = 1;
170 } else {
171 status = memcmp(la->fields[jfa].s, lb->fields[jfb].s,
172 MAX(la->fields[jfa].len, lb->fields[jfb].len));
173 LIMIT(status, -1, 1);
174 }
175
176 return status;
177}
178
179static void
180addfield(struct jline *lp, char *sp, size_t len)
181{
182 if (lp->nf >= lp->maxf) {
183 lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf),
184 sizeof(struct field));
185 lp->maxf *= GROW;
186 }
187 lp->fields[lp->nf].s = sp;
188 lp->fields[lp->nf].len = len;
189 lp->nf++;
190}
191
192static void
193prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb)
194{
195 size_t i, j;
196
197 for (i = 0; i < (spa->nl - 1); i++)
198 for (j = 0; j < (spb->nl - 1); j++)
199 prjoin(spa->lines[i], spb->lines[j], jfa, jfb);
200}
201
202static struct jline *
203makeline(char *s, size_t len)
204{
205 struct jline *lp;
206 char *tmp;
207 size_t i, end;
208
209 if (s[len - 1] == '\n')
210 s[--len] = '\0';
211
212 lp = ereallocarray(NULL, INIT, sizeof(struct jline));
213 lp->text.data = s;
214 lp->text.len = len;
215 lp->fields = ereallocarray(NULL, INIT, sizeof(struct field));
216 lp->nf = 0;
217 lp->maxf = INIT;
218
219 for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++)
220 ;
221 while (i < lp->text.len) {
222 if (sep) {
223 if ((lp->text.len - i) < seplen ||
224 !(tmp = memmem(lp->text.data + i,
225 lp->text.len - i, sep, seplen))) {
226 goto eol;
227 }
228 end = tmp - lp->text.data;
229 addfield(lp, lp->text.data + i, end - i);
230 i = end + seplen;
231 } else {
232 for (end = i; !(isblank(lp->text.data[end])); end++) {
233 if (end + 1 == lp->text.len)
234 goto eol;
235 }
236 addfield(lp, lp->text.data + i, end - i);
237 for (i = end; isblank(lp->text.data[i]); i++)
238 ;
239 }
240 }
241eol:
242 addfield(lp, lp->text.data + i, lp->text.len - i);
243
244 return lp;
245}
246
247static int
248addtospan(struct span *sp, FILE *fp, int reset)
249{
250 char *newl = NULL;
251 ssize_t len;
252 size_t size = 0;
253
254 if ((len = getline(&newl, &size, fp)) < 0) {
255 if (ferror(fp))
256 eprintf("getline:");
257 else
258 return 0;
259 }
260
261 if (reset)
262 sp->nl = 0;
263
264 if (sp->nl >= sp->maxl) {
265 sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl),
266 sizeof(struct jline *));
267 sp->maxl *= GROW;
268 }
269
270 sp->lines[sp->nl] = makeline(newl, len);
271 sp->nl++;
272 return 1;
273}
274
275static void
276initspan(struct span *sp)
277{
278 sp->nl = 0;
279 sp->maxl = INIT;
280 sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *));
281}
282
283static void
284freespan(struct span *sp)
285{
286 size_t i;
287
288 for (i = 0; i < sp->nl; i++) {
289 free(sp->lines[i]->fields);
290 free(sp->lines[i]->text.data);
291 }
292 free(sp->lines);
293}
294
295static void
296initolist(struct outlist *olp)
297{
298 olp->ns = 0;
299 olp->maxs = 1;
300 olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *));
301}
302
303static void
304addspec(struct outlist *olp, struct spec *sp)
305{
306 if (olp->ns >= olp->maxs) {
307 olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs),
308 sizeof(struct spec *));
309 olp->maxs *= GROW;
310 }
311 olp->specs[olp->ns] = sp;
312 olp->ns++;
313}
314
315static struct spec *
316makespec(char *s)
317{
318 struct spec *sp;
319 int fileno;
320 size_t fldno;
321
322 if (!strcmp(s, "0")) { /* join field must be 0 and nothing else */
323 fileno = 0;
324 fldno = 0;
325 } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') {
326 fileno = s[0] - '0';
327 fldno = estrtonum(&s[2], 1, MIN((unsigned long long)LLONG_MAX, (unsigned long long)SIZE_MAX)) - 1;
328 } else {
329 eprintf("%s: invalid format\n", s);
330 }
331
332 sp = ereallocarray(NULL, INIT, sizeof(struct spec));
333 sp->fileno = fileno;
334 sp->fldno = fldno;
335 return sp;
336}
337
338static void
339makeolist(struct outlist *olp, char *s)
340{
341 char *item, *sp;
342 sp = s;
343
344 while (sp) {
345 item = sp;
346 sp = strpbrk(sp, ", \t");
347 if (sp)
348 *sp++ = '\0';
349 addspec(olp, makespec(item));
350 }
351}
352
353static void
354freespecs(struct outlist *olp)
355{
356 size_t i;
357
358 for (i = 0; i < olp->ns; i++)
359 free(olp->specs[i]);
360}
361
362static void
363join(FILE *fa, FILE *fb, size_t jfa, size_t jfb)
364{
365 struct span spa, spb;
366 int cmp, eofa, eofb;
367
368 initspan(&spa);
369 initspan(&spb);
370 cmp = eofa = eofb = 0;
371
372 addtospan(&spa, fa, RESET);
373 addtospan(&spb, fb, RESET);
374
375 while (spa.nl && spb.nl) {
376 if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb)) < 0) {
377 if (unpairsa)
378 prline(spa.lines[0]);
379 if (!addtospan(&spa, fa, RESET)) {
380 if (unpairsb) { /* a is EOF'd; print the rest of b */
381 do
382 prline(spb.lines[0]);
383 while (addtospan(&spb, fb, RESET));
384 }
385 eofa = eofb = 1;
386 } else {
387 continue;
388 }
389 } else if (cmp > 0) {
390 if (unpairsb)
391 prline(spb.lines[0]);
392 if (!addtospan(&spb, fb, RESET)) {
393 if (unpairsa) { /* b is EOF'd; print the rest of a */
394 do
395 prline(spa.lines[0]);
396 while (addtospan(&spa, fa, RESET));
397 }
398 eofa = eofb = 1;
399 } else {
400 continue;
401 }
402 } else if (cmp == 0) {
403 /* read all consecutive matching lines from a */
404 do {
405 if (!addtospan(&spa, fa, EXPAND)) {
406 eofa = 1;
407 spa.nl++;
408 break;
409 }
410 } while (jlinecmp(spa.lines[spa.nl-1], spb.lines[0], jfa, jfb) == 0);
411
412 /* read all consecutive matching lines from b */
413 do {
414 if (!addtospan(&spb, fb, EXPAND)) {
415 eofb = 1;
416 spb.nl++;
417 break;
418 }
419 } while (jlinecmp(spa.lines[0], spb.lines[spb.nl-1], jfa, jfb) == 0);
420
421 if (pairs)
422 prspanjoin(&spa, &spb, jfa, jfb);
423
424 } else { /* FIELD_ERROR: both lines lacked join fields */
425 if (unpairsa)
426 prline(spa.lines[0]);
427 if (unpairsb)
428 prline(spb.lines[0]);
429 eofa = addtospan(&spa, fa, RESET) ? 0 : 1;
430 eofb = addtospan(&spb, fb, RESET) ? 0 : 1;
431 if (!eofa && !eofb)
432 continue;
433 }
434
435 if (eofa) {
436 spa.nl = 0;
437 } else {
438 swaplines(spa.lines[0], spa.lines[spa.nl - 1]); /* ugly */
439 spa.nl = 1;
440 }
441
442 if (eofb) {
443 spb.nl = 0;
444 } else {
445 swaplines(spb.lines[0], spb.lines[spb.nl - 1]); /* ugly */
446 spb.nl = 1;
447 }
448 }
449 freespan(&spa);
450 freespan(&spb);
451}
452
453
454// ?man join: join lines on common field
455// ?man join lines of two sorted files on a common field
456int
457main(int argc, char *argv[])
458{
459 size_t jf[2] = { jfield, jfield, };
460 FILE *fp[2];
461 int ret = 0, n;
462 char *fno;
463
464 ARGBEGIN {
465 // ?man -1:num: specify option flag
466 case '1':
467 jf[0] = estrtonum(EARGF(usage()), 1, MIN((unsigned long long)LLONG_MAX, (unsigned long long)SIZE_MAX));
468 break;
469 // ?man -2:num: specify option flag
470 case '2':
471 jf[1] = estrtonum(EARGF(usage()), 1, MIN((unsigned long long)LLONG_MAX, (unsigned long long)SIZE_MAX));
472 break;
473 // ?man -a:str: print or show all entries
474 case 'a':
475 fno = EARGF(usage());
476 if (strcmp(fno, "1") == 0)
477 unpairsa = 1;
478 else if (strcmp(fno, "2") == 0)
479 unpairsb = 1;
480 else
481 usage();
482 break;
483 // ?man -e:str: specify expression or pattern
484 case 'e':
485 replace = EARGF(usage());
486 break;
487 // ?man -o:str: specify output file
488 case 'o':
489 oflag = 1;
490 initolist(&output);
491 makeolist(&output, EARGF(usage()));
492 break;
493 // ?man -t:str: sort or specify timestamp
494 case 't':
495 sep = EARGF(usage());
496 break;
497 // ?man -v:str: verbose mode; show progress
498 case 'v':
499 pairs = 0;
500 fno = EARGF(usage());
501 if (strcmp(fno, "1") == 0)
502 unpairsa = 1;
503 else if (strcmp(fno, "2") == 0)
504 unpairsb = 1;
505 else
506 usage();
507 break;
508 default:
509 usage();
510 } ARGEND
511
512 if (sep)
513 seplen = unescape(sep);
514
515 if (argc != 2)
516 usage();
517
518 for (n = 0; n < 2; n++) {
519 if (!strcmp(argv[n], "-")) {
520 argv[n] = "<stdin>";
521 fp[n] = stdin;
522 } else if (!(fp[n] = fopen(argv[n], "r"))) {
523 eprintf("fopen %s:", argv[n]);
524 }
525 }
526
527 jf[0]--;
528 jf[1]--;
529
530 join(fp[0], fp[1], jf[0], jf[1]);
531
532 if (oflag)
533 freespecs(&output);
534
535 if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv[1])) |
536 fshut(stdout, "<stdout>"))
537 ret = 2;
538
539 return ret;
540}