master xplshn/aruu / cmd / posix / uniq.c
  1/* See LICENSE file for copyright and license details. */
  2
  3
  4#include <ctype.h>
  5#include <stdio.h>
  6#include <stdlib.h>
  7#include <string.h>
  8
  9#include "text.h"
 10#include "util.h"
 11
 12static const char *countfmt = "";
 13static int dflag = 0;
 14static int uflag = 0;
 15static int fskip = 0;
 16static int sskip = 0;
 17
 18static struct line prevl;
 19static ssize_t prevoff    = -1;
 20static long prevlinecount = 0;
 21
 22static size_t
 23uniqskip(struct line *l)
 24{
 25	size_t i;
 26	int f = fskip, s = sskip;
 27
 28	for (i = 0; i < l->len && f; --f) {
 29		while (isblank(l->data[i]))
 30			i++;
 31		while (i < l->len && !isblank(l->data[i]))
 32			i++;
 33	}
 34	for (; s && i < l->len && l->data[i] != '\n'; --s, i++)
 35		;
 36
 37	return i;
 38}
 39
 40static void
 41uniqline(FILE *ofp, struct line *l)
 42{
 43	size_t loff;
 44
 45	if (l) {
 46		loff = uniqskip(l);
 47
 48		if (prevoff >= 0 && (l->len - loff) == (prevl.len - prevoff) &&
 49		    !memcmp(l->data + loff, prevl.data + prevoff, l->len - loff)) {
 50			++prevlinecount;
 51			return;
 52		}
 53	}
 54
 55	if (prevoff >= 0) {
 56		if ((prevlinecount == 1 && !dflag) ||
 57		    (prevlinecount != 1 && !uflag)) {
 58			if (*countfmt)
 59				fprintf(ofp, countfmt, prevlinecount);
 60			fwrite(prevl.data, 1, prevl.len, ofp);
 61		}
 62		prevoff = -1;
 63	}
 64
 65	if (l) {
 66		if (!prevl.data || l->len >= prevl.len) {
 67			prevl.data = erealloc(prevl.data, l->len);
 68		}
 69		prevl.len = l->len;
 70		memcpy(prevl.data, l->data, prevl.len);
 71		prevoff = loff;
 72	}
 73	prevlinecount = 1;
 74}
 75
 76static void
 77uniq(FILE *fp, FILE *ofp)
 78{
 79	static struct line line;
 80	static size_t size;
 81	ssize_t len;
 82
 83	while ((len = getline(&line.data, &size, fp)) > 0) {
 84		line.len = len;
 85		uniqline(ofp, &line);
 86	}
 87}
 88
 89static void
 90uniqfinish(FILE *ofp)
 91{
 92	uniqline(ofp, NULL);
 93}
 94
 95static void
 96usage(void)
 97{
 98	eprintf("usage: %s [-c] [-d | -u] [-f fields] [-s chars]"
 99	        " [input [output]]\n", argv0);
100}
101
102// ?man uniq: report duplicate lines
103// ?man filter out repeated lines from sorted files
104int
105main(int argc, char *argv[])
106{
107	FILE *fp[2] = { stdin, stdout };
108	int ret = 0, i;
109	char *fname[2] = { "<stdin>", "<stdout>" };
110
111	ARGBEGIN {
112	// ?man -c: print count or perform stdout action
113	case 'c':
114		countfmt = "%7ld ";
115		break;
116	// ?man -d: specify directory
117	case 'd':
118		dflag = 1;
119		break;
120	// ?man -u: unbuffered output
121	case 'u':
122		uflag = 1;
123		break;
124	// ?man -f:num: force the operation
125	case 'f':
126		fskip = estrtonum(EARGF(usage()), 0, INT_MAX);
127		break;
128	// ?man -s:num: silent mode or print summary
129	case 's':
130		sskip = estrtonum(EARGF(usage()), 0, INT_MAX);
131		break;
132	default:
133		usage();
134	} ARGEND
135
136	if (argc > 2)
137		usage();
138
139	for (i = 0; i < argc; i++) {
140		if (strcmp(argv[i], "-")) {
141			fname[i] = argv[i];
142			if (!(fp[i] = fopen(argv[i], (i == 0) ? "r" : "w")))
143				eprintf("fopen %s:", argv[i]);
144		}
145	}
146
147	uniq(fp[0], fp[1]);
148	uniqfinish(fp[1]);
149
150	ret |= fshut(fp[0], fname[0]) | fshut(fp[1], fname[1]);
151
152	return ret;
153}