master xplshn/aruu / cmd / posix / cut.c
  1/* See LICENSE file for copyright and license details. */
  2
  3
  4#include <stdio.h>
  5#include <stdlib.h>
  6#include <string.h>
  7
  8#include "text.h"
  9#include "utf.h"
 10#include "util.h"
 11
 12typedef struct Range {
 13	size_t min, max;
 14	struct Range *next;
 15} Range;
 16
 17static Range *list     = NULL;
 18static char   mode     = 0;
 19static char  *delim    = "\t";
 20static size_t delimlen = 1;
 21static int    nflag    = 0;
 22static int    sflag    = 0;
 23
 24static void
 25insert(Range *r)
 26{
 27	Range *l, *p, *t;
 28
 29	for (p = NULL, l = list; l; p = l, l = l->next) {
 30		if (r->max && r->max + 1 < l->min) {
 31			r->next = l;
 32			break;
 33		} else if (!l->max || r->min < l->max + 2) {
 34			l->min = MIN(r->min, l->min);
 35			for (p = l, t = l->next; t; p = t, t = t->next)
 36				if (r->max && r->max + 1 < t->min)
 37					break;
 38			l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
 39			l->next = t;
 40			return;
 41		}
 42	}
 43	if (p)
 44		p->next = r;
 45	else
 46		list = r;
 47}
 48
 49static void
 50parselist(char *str)
 51{
 52	char *s;
 53	size_t n = 1;
 54	Range *r;
 55
 56	if (!*str)
 57		eprintf("empty list\n");
 58	for (s = str; *s; s++) {
 59		if (*s == ' ')
 60			*s = ',';
 61		if (*s == ',')
 62			n++;
 63	}
 64	r = ereallocarray(NULL, n, sizeof(*r));
 65	for (s = str; n; n--, s++) {
 66		r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
 67		r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min;
 68		r->next = NULL;
 69		if (!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
 70			eprintf("bad list value\n");
 71		insert(r++);
 72	}
 73}
 74
 75static size_t
 76seek(struct line *s, size_t pos, size_t *prev, size_t count)
 77{
 78	size_t n = pos - *prev, i, j;
 79
 80	if (mode == 'b') {
 81		if (n >= s->len)
 82			return s->len;
 83		if (nflag)
 84			while (n && !UTF8_POINT(s->data[n]))
 85				n--;
 86		*prev += n;
 87		return n;
 88	} else if (mode == 'c') {
 89		for (n++, i = 0; i < s->len; i++)
 90			if (UTF8_POINT(s->data[i]) && !--n)
 91				break;
 92	} else {
 93		for (i = (count < delimlen + 1) ? 0 : delimlen; n && i < s->len; ) {
 94			if ((s->len - i) >= delimlen &&
 95			    !memcmp(s->data + i, delim, delimlen)) {
 96				if (!--n && count)
 97					break;
 98				i += delimlen;
 99				continue;
100			}
101			for (j = 1; j + i <= s->len && !fullrune(s->data + i, j); j++);
102			i += j;
103		}
104	}
105	*prev = pos;
106
107	return i;
108}
109
110static void
111cut(FILE *fp, const char *fname)
112{
113	Range *r;
114	struct line s;
115	static struct line line;
116	static size_t size;
117	size_t i, n, p;
118	ssize_t len;
119
120	while ((len = getline(&line.data, &size, fp)) > 0) {
121		line.len = len;
122		if (line.data[line.len - 1] == '\n')
123			line.data[--line.len] = '\0';
124		if (mode == 'f' && !memmem(line.data, line.len, delim, delimlen)) {
125			if (!sflag) {
126				fwrite(line.data, 1, line.len, stdout);
127				fputc('\n', stdout);
128			}
129			continue;
130		}
131		for (i = 0, p = 1, s = line, r = list; r; r = r->next) {
132			n = seek(&s, r->min, &p, i);
133			s.data += n;
134			s.len -= n;
135			i += (mode == 'f') ? delimlen : 1;
136			if (!s.len)
137				break;
138			if (!r->max) {
139				fwrite(s.data, 1, s.len, stdout);
140				break;
141			}
142			n = seek(&s, r->max + 1, &p, i);
143			i += (mode == 'f') ? delimlen : 1;
144			if (fwrite(s.data, 1, n, stdout) != n)
145				eprintf("fwrite <stdout>:");
146			s.data += n;
147			s.len -= n;
148		}
149		putchar('\n');
150	}
151	if (ferror(fp))
152		eprintf("getline %s:", fname);
153}
154
155static void
156usage(void)
157{
158	eprintf("usage: %s -b list [-n] [file ...]\n"
159	        "       %s -c list [file ...]\n"
160	        "       %s -f list [-d delim] [-s] [file ...]\n",
161		argv0, argv0, argv0);
162}
163
164// ?man cut: cut out fields from lines
165// ?man arguments: -b list [file ...
166// ?man print selected parts of lines from files
167int
168main(int argc, char *argv[])
169{
170	FILE *fp;
171	int ret = 0;
172
173	ARGBEGIN {
174	// ?man -b: specify block size or base directory
175	case 'b':
176	// ?man -c: print count or perform stdout action
177	case 'c':
178	// ?man -f:mode: force the operation
179	case 'f':
180		mode = ARGC();
181		parselist(EARGF(usage()));
182		break;
183	// ?man -d:str: specify directory
184	case 'd':
185		delim = EARGF(usage());
186		if (!*delim)
187			eprintf("empty delimiter\n");
188		delimlen = unescape(delim);
189		break;
190	// ?man -n: print line numbers or counts
191	case 'n':
192		nflag = 1;
193		break;
194	// ?man -s: silent mode or print summary
195	case 's':
196		sflag = 1;
197		break;
198	default:
199		usage();
200	} ARGEND
201
202	if (!mode)
203		usage();
204
205	if (!argc)
206		cut(stdin, "<stdin>");
207	else {
208		for (; *argv; argc--, argv++) {
209			if (!strcmp(*argv, "-")) {
210				*argv = "<stdin>";
211				fp = stdin;
212			} else if (!(fp = fopen(*argv, "r"))) {
213				weprintf("fopen %s:", *argv);
214				ret = 1;
215				continue;
216			}
217			cut(fp, *argv);
218			if (fp != stdin && fshut(fp, *argv))
219				ret = 1;
220		}
221	}
222
223	ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
224
225	return ret;
226}