master xplshn/aruu / cmd / posix / grep.c
  1
  2#include "config.h"
  3#include "queue.h"
  4#include "util.h"
  5
  6#include <regex.h>
  7#include <stdio.h>
  8#include <stdlib.h>
  9#include <string.h>
 10#include <strings.h>
 11
 12enum { Match = 0, NoMatch = 1, Error = 2 };
 13
 14static void addpattern(const char *);
 15static void addpatternfile(FILE *);
 16static int grep(FILE *, const char *);
 17
 18static int Eflag;
 19static int Fflag;
 20static int Hflag;
 21static int eflag;
 22static int fflag;
 23static int hflag;
 24static int iflag;
 25static int sflag;
 26static int vflag;
 27static int wflag;
 28static int xflag;
 29static int many;
 30static int mode;
 31#if FEATURE_GREP_CONTEXT
 32static long Aflag = 0;
 33static long Bflag = 0;
 34#endif
 35#if FEATURE_GREP_MAX_COUNT
 36static long mval = -1;
 37#endif
 38
 39struct pattern {
 40	regex_t preg;
 41	SLIST_ENTRY(pattern) entry;
 42	char pattern[];
 43};
 44
 45static SLIST_HEAD(phead, pattern) phead;
 46
 47static void
 48addpattern(const char *pattern)
 49{
 50	struct pattern *pnode;
 51	size_t patlen;
 52
 53	patlen = strlen(pattern);
 54
 55	pnode = enmalloc(Error, sizeof(*pnode) + patlen + 9);
 56	SLIST_INSERT_HEAD(&phead, pnode, entry);
 57
 58	if (Fflag || (!xflag && !wflag)) {
 59		memcpy(pnode->pattern, pattern, patlen + 1);
 60	} else {
 61		sprintf(pnode->pattern, "%s%s%s%s%s",
 62			xflag ? "^" : "\\<",
 63			Eflag ? "(" : "\\(",
 64			pattern,
 65			Eflag ? ")" : "\\)",
 66			xflag ? "$" : "\\>");
 67	}
 68}
 69
 70static void
 71addpatternfile(FILE *fp)
 72{
 73	static char *buf = NULL;
 74	static size_t size = 0;
 75	ssize_t len = 0;
 76
 77	while ((len = getline(&buf, &size, fp)) > 0) {
 78		if (buf[len - 1] == '\n')
 79			buf[len - 1] = '\0';
 80		addpattern(buf);
 81	}
 82	if (ferror(fp))
 83		enprintf(Error, "read error:");
 84}
 85
 86#if FEATURE_GREP_CONTEXT
 87static void
 88print_line(const char *str, const char *line, long line_no, char sep)
 89{
 90	if (!hflag && (many || Hflag))
 91		printf("%s%c", str, sep);
 92	if (mode == 'n')
 93		printf("%ld%c", line_no, sep);
 94	puts(line);
 95}
 96#endif
 97
 98static int
 99grep(FILE *fp, const char *str)
100{
101	static char *buf = NULL;
102	static size_t size = 0;
103	ssize_t len = 0;
104	long c = 0, n;
105	struct pattern *pnode;
106	int match, result = NoMatch;
107#if FEATURE_GREP_MAX_COUNT
108	long matches = 0;
109#endif
110#if FEATURE_GREP_CONTEXT
111	struct context_line {
112		char *str;
113		long line_no;
114	} *before_buf = NULL;
115	size_t before_head = 0, before_count = 0, i = 0, idx = 0;
116	long after_left = 0;
117	long last_printed_line = 0;
118
119	if (Bflag > 0 && !(mode == 'c' || mode == 'l' || mode == 'q'))
120		before_buf = ecalloc(Bflag, sizeof(*before_buf));
121#endif
122
123	for (n = 1; (len = getline(&buf, &size, fp)) > 0; n++) {
124		/* remove the trailing newline if one is present */
125		if (buf[len - 1] == '\n')
126			buf[len - 1] = '\0';
127		match = 0;
128		SLIST_FOREACH(pnode, &phead, entry) {
129			if (Fflag) {
130				if (xflag) {
131					if (!(iflag ? strcasecmp : strcmp)(buf, pnode->pattern)) {
132						match = 1;
133						break;
134					}
135				} else {
136					if ((iflag ? strcasestr : strstr)(buf, pnode->pattern)) {
137						match = 1;
138						break;
139					}
140				}
141			} else {
142				if (regexec(&pnode->preg, buf, 0, NULL, 0) == 0) {
143					match = 1;
144					break;
145				}
146			}
147		}
148		if (match != vflag) {
149			result = Match;
150#if FEATURE_GREP_MAX_COUNT
151			matches++;
152#endif
153			switch (mode) {
154			case 'c':
155				c++;
156				break;
157			case 'l':
158				puts(str);
159				goto end;
160			case 'q':
161				exit(Match);
162			default:
163#if FEATURE_GREP_CONTEXT
164				if (Aflag > 0 || Bflag > 0) {
165					if (last_printed_line > 0 && n > last_printed_line + 1)
166						puts("--");
167					for (i = 0; i < before_count; i++) {
168						idx = (before_head - before_count + i + Bflag) % Bflag;
169						print_line(str, before_buf[idx].str, before_buf[idx].line_no, '-');
170						free(before_buf[idx].str);
171						before_buf[idx].str = NULL;
172					}
173					before_count = 0;
174					before_head = 0;
175					print_line(str, buf, n, ':');
176					after_left = Aflag;
177					last_printed_line = n;
178				} else {
179#endif
180					if (!hflag && (many || Hflag))
181						printf("%s:", str);
182					if (mode == 'n')
183						printf("%ld:", n);
184					puts(buf);
185#if FEATURE_GREP_CONTEXT
186				}
187#endif
188				break;
189			}
190#if FEATURE_GREP_MAX_COUNT
191			if (mval >= 0 && matches >= mval)
192				goto end;
193#endif
194		}
195#if FEATURE_GREP_CONTEXT
196		else if (Aflag > 0 || Bflag > 0) {
197			if (mode != 'c' && mode != 'l' && mode != 'q') {
198				if (after_left > 0) {
199					print_line(str, buf, n, '-');
200					after_left--;
201					last_printed_line = n;
202				}
203				if (Bflag > 0) {
204					if (before_count == (size_t)Bflag)
205						free(before_buf[before_head].str);
206					before_buf[before_head].str = estrdup(buf);
207					before_buf[before_head].line_no = n;
208					before_head = (before_head + 1) % Bflag;
209					if (before_count < (size_t)Bflag)
210						before_count++;
211				}
212			}
213		}
214#endif
215	}
216	if (mode == 'c')
217		printf("%ld\n", c);
218end:
219#if FEATURE_GREP_CONTEXT
220	if (before_buf) {
221		for (i = 0; i < (size_t)Bflag; i++)
222			free(before_buf[i].str);
223		free(before_buf);
224	}
225#endif
226	if (ferror(fp)) {
227		weprintf("%s: read error:", str);
228		result = Error;
229	}
230	return result;
231}
232
233static void
234usage(void)
235{
236	enprintf(Error, "usage: %s [-EFHchilnqsvwx]"
237#if FEATURE_GREP_CONTEXT
238	         " [-A num] [-B num] [-C num]"
239#endif
240#if FEATURE_GREP_MAX_COUNT
241	         " [-m num]"
242#endif
243	         " [-e pattern] [-f file] [pattern] [file ...]\n", argv0);
244}
245
246// ?man grep: search files for a pattern
247// ?man arguments: pattern] [file ...
248// ?man grep searches the named input files for lines matching the given pattern
249// ?man if no files are named, or a file is -, standard input is searched
250// ?man by default, matching lines are written to standard output
251int
252main(int argc, char *argv[])
253{
254	struct pattern *pnode;
255	int m, flags = REG_NOSUB, match = NoMatch;
256	FILE *fp;
257	char *arg;
258
259	SLIST_INIT(&phead);
260
261	ARGBEGIN {
262#if FEATURE_GREP_CONTEXT
263	// ?man -A:num: specify A option
264	case 'A':
265		// ?man -A num: print num lines of trailing context after each match
266		Aflag = estrtonum(EARGF(usage()), 0, LONG_MAX);
267		break;
268	// ?man -B:num: specify B option
269	case 'B':
270		// ?man -B num: print num lines of leading context before each match
271		Bflag = estrtonum(EARGF(usage()), 0, LONG_MAX);
272		break;
273	// ?man -C:num: specify C option
274	case 'C':
275		// ?man -C num: print num lines of context before and after each match; equivalent to -A num -B num
276		Aflag = Bflag = estrtonum(EARGF(usage()), 0, LONG_MAX);
277		break;
278	// ?man ARGNUM: specify RGNUM option
279	ARGNUM:
280		Aflag = Bflag = ARGNUMF();
281		break;
282#endif
283#if FEATURE_GREP_MAX_COUNT
284	// ?man -m:num: specify m option
285	case 'm':
286		// ?man -m num: stop reading a file after num matching lines
287		mval = estrtonum(EARGF(usage()), 0, LONG_MAX);
288		break;
289#endif
290	// ?man -E: specify E option
291	case 'E':
292		// ?man -E: interpret pattern as an extended regular expression
293		Eflag = 1;
294		Fflag = 0;
295		flags |= REG_EXTENDED;
296		break;
297	// ?man -F: specify F option
298	case 'F':
299		// ?man -F: interpret pattern as a list of fixed strings separated by newlines
300		Fflag = 1;
301		Eflag = 0;
302		flags &= ~REG_EXTENDED;
303		break;
304	// ?man -H: specify H option
305	case 'H':
306		// ?man -H: always print the file name with matching lines
307		Hflag = 1;
308		hflag = 0;
309		break;
310	// ?man -e:file: specify e option
311	case 'e':
312		// ?man -e pattern: specify a pattern to match; may be given multiple times
313		arg = EARGF(usage());
314		if (!(fp = fmemopen(arg, strlen(arg) + 1, "r")))
315			eprintf("fmemopen:");
316		addpatternfile(fp);
317		efshut(fp, arg);
318		eflag = 1;
319		break;
320	// ?man -f:file: specify f option
321	case 'f':
322		// ?man -f file: read patterns from file, one per line
323		arg = EARGF(usage());
324		fp = fopen(arg, "r");
325		if (!fp)
326			enprintf(Error, "fopen %s:", arg);
327		addpatternfile(fp);
328		efshut(fp, arg);
329		fflag = 1;
330		break;
331	// ?man -h: specify h option
332	case 'h':
333		// ?man -h: never print file names with matching lines
334		hflag = 1;
335		Hflag = 0;
336		break;
337	// ?man -c: specify c option
338	case 'c':
339		// ?man -c: print only a count of matching lines per file
340		/* FALLTHROUGH */
341	// ?man -l: specify l option
342	case 'l':
343		// ?man -l: print only the names of files with at least one matching line
344		/* FALLTHROUGH */
345	// ?man -n: specify n option
346	case 'n':
347		// ?man -n: prefix each matching line with its line number within its file
348		/* FALLTHROUGH */
349	// ?man -q: specify q option
350	case 'q':
351		// ?man -q: quiet mode; exit immediately with status 0 on first match and write nothing
352		mode = ARGC();
353		break;
354	// ?man -i: specify i option
355	case 'i':
356		// ?man -i: perform case-insensitive matching
357		flags |= REG_ICASE;
358		iflag = 1;
359		break;
360	// ?man -s: specify s option
361	case 's':
362		// ?man -s: suppress error messages about nonexistent or unreadable files
363		sflag = 1;
364		break;
365	// ?man -v: specify v option
366	case 'v':
367		// ?man -v: invert the sense of matching to select non-matching lines
368		vflag = 1;
369		break;
370	// ?man -w: specify w option
371	case 'w':
372		// ?man -w: match only whole words
373		wflag = 1;
374		break;
375	// ?man -x: specify x option
376	case 'x':
377		// ?man -x: match only whole lines
378		xflag = 1;
379		break;
380	default:
381		usage();
382	} ARGEND
383
384	if (argc == 0 && !eflag && !fflag)
385		usage(); /* no pattern */
386
387	/* just add literal pattern to list */
388	if (!eflag && !fflag) {
389		if (!(fp = fmemopen(argv[0], strlen(argv[0]) + 1, "r")))
390			eprintf("fmemopen:");
391		addpatternfile(fp);
392		efshut(fp, argv[0]);
393		argc--;
394		argv++;
395	}
396
397	if (!Fflag)
398		/* compile regex for all search patterns */
399		SLIST_FOREACH(pnode, &phead, entry)
400			enregcomp(Error, &pnode->preg, pnode->pattern, flags);
401	many = (argc > 1);
402	if (argc == 0) {
403		match = grep(stdin, "<stdin>");
404	} else {
405		for (; *argv; argc--, argv++) {
406			if (!strcmp(*argv, "-")) {
407				*argv = "<stdin>";
408				fp = stdin;
409			} else if (!(fp = fopen(*argv, "r"))) {
410				if (!sflag)
411					weprintf("fopen %s:", *argv);
412				match = Error;
413				continue;
414			}
415			m = grep(fp, *argv);
416			if (m == Error || (match != Error && m == Match))
417				match = m;
418			if (fp != stdin && fshut(fp, *argv))
419				match = Error;
420		}
421	}
422
423	if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"))
424		match = Error;
425
426	// ?man
427	// ?man ## Exit status
428	// ?man
429	// ?man 0
430	// ?man : one or more lines matched in at least one file
431	// ?man
432	// ?man 1
433	// ?man : no lines matched in any file
434	// ?man
435	// ?man 2
436	// ?man : an error occurred
437	// ?man
438	// ?man ## See also
439	// ?man
440	// ?man sed(1), awk(1)
441	// ?man
442
443	return match;
444}