1/* See LICENSE file for copyright and license details. */
2
3
4#include <ctype.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8
9#include "text.h"
10#include "util.h"
11
12static const char *countfmt = "";
13static int dflag = 0;
14static int uflag = 0;
15static int fskip = 0;
16static int sskip = 0;
17
18static struct line prevl;
19static ssize_t prevoff = -1;
20static long prevlinecount = 0;
21
22static size_t
23uniqskip(struct line *l)
24{
25 size_t i;
26 int f = fskip, s = sskip;
27
28 for (i = 0; i < l->len && f; --f) {
29 while (isblank(l->data[i]))
30 i++;
31 while (i < l->len && !isblank(l->data[i]))
32 i++;
33 }
34 for (; s && i < l->len && l->data[i] != '\n'; --s, i++)
35 ;
36
37 return i;
38}
39
40static void
41uniqline(FILE *ofp, struct line *l)
42{
43 size_t loff;
44
45 if (l) {
46 loff = uniqskip(l);
47
48 if (prevoff >= 0 && (l->len - loff) == (prevl.len - prevoff) &&
49 !memcmp(l->data + loff, prevl.data + prevoff, l->len - loff)) {
50 ++prevlinecount;
51 return;
52 }
53 }
54
55 if (prevoff >= 0) {
56 if ((prevlinecount == 1 && !dflag) ||
57 (prevlinecount != 1 && !uflag)) {
58 if (*countfmt)
59 fprintf(ofp, countfmt, prevlinecount);
60 fwrite(prevl.data, 1, prevl.len, ofp);
61 }
62 prevoff = -1;
63 }
64
65 if (l) {
66 if (!prevl.data || l->len >= prevl.len) {
67 prevl.data = erealloc(prevl.data, l->len);
68 }
69 prevl.len = l->len;
70 memcpy(prevl.data, l->data, prevl.len);
71 prevoff = loff;
72 }
73 prevlinecount = 1;
74}
75
76static void
77uniq(FILE *fp, FILE *ofp)
78{
79 static struct line line;
80 static size_t size;
81 ssize_t len;
82
83 while ((len = getline(&line.data, &size, fp)) > 0) {
84 line.len = len;
85 uniqline(ofp, &line);
86 }
87}
88
89static void
90uniqfinish(FILE *ofp)
91{
92 uniqline(ofp, NULL);
93}
94
95static void
96usage(void)
97{
98 eprintf("usage: %s [-c] [-d | -u] [-f fields] [-s chars]"
99 " [input [output]]\n", argv0);
100}
101
102// ?man uniq: report duplicate lines
103// ?man filter out repeated lines from sorted files
104int
105main(int argc, char *argv[])
106{
107 FILE *fp[2] = { stdin, stdout };
108 int ret = 0, i;
109 char *fname[2] = { "<stdin>", "<stdout>" };
110
111 ARGBEGIN {
112 // ?man -c: print count or perform stdout action
113 case 'c':
114 countfmt = "%7ld ";
115 break;
116 // ?man -d: specify directory
117 case 'd':
118 dflag = 1;
119 break;
120 // ?man -u: unbuffered output
121 case 'u':
122 uflag = 1;
123 break;
124 // ?man -f:num: force the operation
125 case 'f':
126 fskip = estrtonum(EARGF(usage()), 0, INT_MAX);
127 break;
128 // ?man -s:num: silent mode or print summary
129 case 's':
130 sskip = estrtonum(EARGF(usage()), 0, INT_MAX);
131 break;
132 default:
133 usage();
134 } ARGEND
135
136 if (argc > 2)
137 usage();
138
139 for (i = 0; i < argc; i++) {
140 if (strcmp(argv[i], "-")) {
141 fname[i] = argv[i];
142 if (!(fp[i] = fopen(argv[i], (i == 0) ? "r" : "w")))
143 eprintf("fopen %s:", argv[i]);
144 }
145 }
146
147 uniq(fp[0], fp[1]);
148 uniqfinish(fp[1]);
149
150 ret |= fshut(fp[0], fname[0]) | fshut(fp[1], fname[1]);
151
152 return ret;
153}