1
2
3/* FIXME: summary
4 * decide whether we enforce valid UTF-8, right now it's enforced in certain
5 * parts of the script, but not the input...
6 * nul bytes cause explosions due to use of libc string functions. thoughts?
7 * lack of newline at end of file, currently we add one. what should we do?
8 * allow "\\t" for "\t" etc. in regex? in replacement text?
9 * POSIX says don't flush on N when out of input, but GNU and busybox do.
10 */
11
12#include "config.h"
13#include "utf.h"
14#include "util.h"
15
16#include <ctype.h>
17#include <errno.h>
18#include <libgen.h>
19#include <regex.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/stat.h>
23#include <unistd.h>
24
25/* Types */
26
27/* used as queue for writes and stack for {,:,b,t */
28typedef struct {
29 void **data;
30 size_t size;
31 size_t cap;
32} Vec;
33
34/* used for arbitrary growth, str is a C string
35 * FIXME: does it make sense to keep track of length? or just rely on libc
36 * string functions? If we want to support nul bytes everything changes
37 */
38typedef struct {
39 char *str;
40 size_t cap;
41} String;
42
43typedef struct Cmd Cmd;
44typedef struct {
45 void (*fn)(Cmd *);
46 char *(*getarg)(Cmd *, char *);
47 void (*freearg)(Cmd *);
48 unsigned char naddr;
49} Fninfo;
50
51typedef struct {
52 union {
53 size_t lineno;
54 regex_t *re;
55 } u;
56 enum {
57 IGNORE, /* empty address, ignore */
58 EVERY , /* every line */
59 LINE , /* line number */
60 LAST , /* last line ($) */
61 REGEX , /* use included regex */
62 LASTRE, /* use most recently used regex */
63 } type;
64} Addr;
65
66/* DISCUSS: naddr is not strictly necessary, but very helpful
67 * naddr == 0 iff beg.type == EVERY && end.type == IGNORE
68 * naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
69 * naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
70 */
71typedef struct {
72 Addr beg;
73 Addr end;
74 unsigned char naddr;
75} Range;
76
77typedef struct {
78 regex_t *re; /* if NULL use last regex */
79 String repl;
80 FILE *file;
81 size_t occurrence; /* 0 for all (g flag) */
82 Rune delim;
83 unsigned int p:1;
84} Sarg;
85
86typedef struct {
87 Rune *set1;
88 Rune *set2;
89} Yarg;
90
91typedef struct {
92 String str; /* a,c,i text. r file path */
93 void (*print)(char *, FILE *); /* check_puts for a, write_file for r, unused for c,i */
94} ACIRarg;
95
96struct Cmd {
97 Range range;
98 Fninfo *fninfo;
99 union {
100 Cmd *jump; /* used for b,t when running */
101 char *label; /* used for :,b,t when building */
102 ptrdiff_t offset; /* used for { (pointers break during realloc) */
103 FILE *file; /* used for w */
104
105 /* FIXME: Should the following be in the union? or pointers and malloc? */
106 Sarg s;
107 Yarg y;
108 ACIRarg acir;
109 } u; /* I find your lack of anonymous unions disturbing */
110 unsigned int in_match:1;
111 unsigned int negate :1;
112};
113
114/* Files for w command (and s' w flag) */
115typedef struct {
116 char *path;
117 FILE *file;
118} Wfile;
119
120/*
121 * Function Declarations
122 */
123
124/* Dynamically allocated arrays and strings */
125static void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next);
126static void *pop(Vec *v);
127static void push(Vec *v, void *p);
128static void stracat(String *dst, char *src);
129static void strnacat(String *dst, char *src, size_t n);
130static void stracpy(String *dst, char *src);
131
132/* Cleanup and errors */
133static void usage(void);
134
135/* Parsing functions and related utilities */
136static void compile(char *s, int isfile);
137static int read_line(FILE *f, String *s);
138static char *make_range(Range *range, char *s);
139static char *make_addr(Addr *addr, char *s);
140static char *find_delim(char *s, Rune delim, int do_brackets);
141static char *chompr(char *s, Rune rune);
142static char *chomp(char *s);
143static Rune *strtorunes(char *s, size_t nrunes);
144static long stol(char *s, char **endp);
145static size_t escapes(char *beg, char *end, Rune delim, int n_newline);
146static size_t echarntorune(Rune *r, char *s, size_t n);
147static void insert_labels(void);
148
149/* Get and Free arg and related utilities */
150static char *get_aci_arg(Cmd *c, char *s);
151static void aci_append(Cmd *c, char *s);
152static void free_acir_arg(Cmd *c);
153static char *get_bt_arg(Cmd *c, char *s);
154static char *get_r_arg(Cmd *c, char *s);
155static char *get_s_arg(Cmd *c, char *s);
156static void free_s_arg(Cmd *c);
157static char *get_w_arg(Cmd *c, char *s);
158static char *get_y_arg(Cmd *c, char *s);
159static void free_y_arg(Cmd *c);
160static char *get_colon_arg(Cmd *c, char *s);
161static char *get_lbrace_arg(Cmd *c, char *s);
162static char *get_rbrace_arg(Cmd *c, char *s);
163static char *semicolon_arg(char *s);
164
165/* Running */
166static void run(void);
167static int in_range(Cmd *c);
168static int match_addr(Addr *a);
169static int next_file(void);
170static int is_eof(FILE *f);
171static void do_writes(void);
172static void write_file(char *path, FILE *out);
173static void check_puts(char *s, FILE *f);
174static void write_patt(char *s, FILE *f);
175static void update_ranges(Cmd *beg, Cmd *end);
176
177/* Sed functions */
178static void cmd_y(Cmd *c);
179static void cmd_x(Cmd *c);
180static void cmd_w(Cmd *c);
181static void cmd_t(Cmd *c);
182static void cmd_s(Cmd *c);
183static void cmd_r(Cmd *c);
184static void cmd_q(Cmd *c);
185static void cmd_P(Cmd *c);
186static void cmd_p(Cmd *c);
187static void cmd_N(Cmd *c);
188static void cmd_n(Cmd *c);
189static void cmd_l(Cmd *c);
190static void cmd_i(Cmd *c);
191static void cmd_H(Cmd *c);
192static void cmd_h(Cmd *c);
193static void cmd_G(Cmd *c);
194static void cmd_g(Cmd *c);
195static void cmd_D(Cmd *c);
196static void cmd_d(Cmd *c);
197static void cmd_c(Cmd *c);
198static void cmd_b(Cmd *c);
199static void cmd_a(Cmd *c);
200static void cmd_colon(Cmd *c);
201static void cmd_equal(Cmd *c);
202static void cmd_lbrace(Cmd *c);
203static void cmd_rbrace(Cmd *c);
204static void cmd_last(Cmd *c);
205
206/* Actions */
207static void new_line(void);
208static void app_line(void);
209static void new_next(void);
210static void old_next(void);
211
212/*
213 * Globals
214 */
215static Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, bt */
216static Vec writes; /* holds cmd*. writes scheduled by a and r commands */
217static Vec wfiles; /* holds Wfile*. files for w and s///w commands */
218
219static Cmd *prog, *pc; /* Program, program counter */
220static size_t pcap;
221static size_t lineno;
222#if FEATURE_SED_PRESERVE_NEWLINE
223static int hadnl = 1;
224#endif
225
226static regex_t *lastre; /* last used regex for empty regex search */
227static char **files; /* list of file names from argv */
228static FILE *file; /* current file we are reading */
229static int ret; /* exit status */
230
231static String patt, hold, genbuf;
232
233static struct {
234 unsigned int n :1; /* -n (no print) */
235 unsigned int E :1; /* -E (extended re) */
236 unsigned int s :1; /* s/// replacement happened */
237 unsigned int aci_cont:1; /* a,c,i text continuation */
238 unsigned int s_cont :1; /* s/// replacement text continuation */
239 unsigned int halt :1; /* halt execution */
240} gflags;
241
242/* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) instead of 127*sizeof(Fninfo) bytes */
243static Fninfo fns[] = {
244 ['a'] = { cmd_a , get_aci_arg , free_acir_arg , 1 }, /* schedule write of text for later */
245 ['b'] = { cmd_b , get_bt_arg , NULL , 2 }, /* branch to label char *label when building, Cmd *jump when running */
246 ['c'] = { cmd_c , get_aci_arg , free_acir_arg , 2 }, /* delete pattern space. at 0 or 1 addr or end of 2 addr, write text */
247 ['d'] = { cmd_d , NULL , NULL , 2 }, /* delete pattern space */
248 ['D'] = { cmd_D , NULL , NULL , 2 }, /* delete to first newline and start new cycle without reading (if no newline, d) */
249 ['g'] = { cmd_g , NULL , NULL , 2 }, /* replace pattern space with hold space */
250 ['G'] = { cmd_G , NULL , NULL , 2 }, /* append newline and hold space to pattern space */
251 ['h'] = { cmd_h , NULL , NULL , 2 }, /* replace hold space with pattern space */
252 ['H'] = { cmd_H , NULL , NULL , 2 }, /* append newline and pattern space to hold space */
253 ['i'] = { cmd_i , get_aci_arg , free_acir_arg , 1 }, /* write text */
254 ['l'] = { cmd_l , NULL , NULL , 2 }, /* write pattern space in 'visually unambiguous form' */
255 ['n'] = { cmd_n , NULL , NULL , 2 }, /* write pattern space (unless -n) read to replace pattern space (if no input, quit) */
256 ['N'] = { cmd_N , NULL , NULL , 2 }, /* append to pattern space separated by newline, line number changes (if no input, quit) */
257 ['p'] = { cmd_p , NULL , NULL , 2 }, /* write pattern space */
258 ['P'] = { cmd_P , NULL , NULL , 2 }, /* write pattern space up to first newline */
259 ['q'] = { cmd_q , NULL , NULL , 1 }, /* quit */
260 ['r'] = { cmd_r , get_r_arg , free_acir_arg , 1 }, /* write contents of file (unable to open/read treated as empty file) */
261 ['s'] = { cmd_s , get_s_arg , free_s_arg , 2 }, /* find/replace/all that crazy s stuff */
262 ['t'] = { cmd_t , get_bt_arg , NULL , 2 }, /* if s/// succeeded (since input or last t) branch to label (branch to end if no label) */
263 ['w'] = { cmd_w , get_w_arg , NULL , 2 }, /* append pattern space to file */
264 ['x'] = { cmd_x , NULL , NULL , 2 }, /* exchange pattern and hold spaces */
265 ['y'] = { cmd_y , get_y_arg , free_y_arg , 2 }, /* replace runes in set1 with runes in set2 */
266 [':'] = { cmd_colon , get_colon_arg , NULL , 0 }, /* defines label for later b and t commands */
267 ['='] = { cmd_equal , NULL , NULL , 1 }, /* printf("%d\n", line_number); */
268 ['{'] = { cmd_lbrace, get_lbrace_arg, NULL , 2 }, /* if we match, run commands, otherwise jump to close */
269 ['}'] = { cmd_rbrace, get_rbrace_arg, NULL , 0 }, /* noop, hold onto open for ease of building scripts */
270
271 [0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isascii(3p). fill out rest of array */
272};
273
274/*
275 * Function Definitions
276 */
277
278/* given memory pointed to by *ptr that currently holds *nmemb members of size
279 * size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
280 * past old end in *next. if realloc fails...explode
281 */
282static void
283resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next)
284{
285 void *n, *tmp;
286
287 if (new_nmemb) {
288 tmp = ereallocarray(*ptr, new_nmemb, size);
289 } else { /* turns out realloc(*ptr, 0) != free(*ptr) */
290 free(*ptr);
291 tmp = NULL;
292 }
293 n = (char *)tmp + *nmemb * size;
294 *nmemb = new_nmemb;
295 *ptr = tmp;
296 if (next)
297 *next = n;
298}
299
300static void *
301pop(Vec *v)
302{
303 if (!v->size)
304 return NULL;
305 return v->data[--v->size];
306}
307
308static void
309push(Vec *v, void *p)
310{
311 if (v->size == v->cap)
312 resize((void **)&v->data, &v->cap, sizeof(*v->data), v->cap * 2 + 1, NULL);
313 v->data[v->size++] = p;
314}
315
316static void
317stracat(String *dst, char *src)
318{
319 int new = !dst->cap;
320 size_t len;
321
322 len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1;
323 if (dst->cap < len)
324 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
325 if (new)
326 *dst->str = '\0';
327 strcat(dst->str, src);
328}
329
330static void
331strnacat(String *dst, char *src, size_t n)
332{
333 int new = !dst->cap;
334 size_t len;
335
336 len = strlen(src);
337 len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1;
338 if (dst->cap < len)
339 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
340 if (new)
341 *dst->str = '\0';
342 strlcat(dst->str, src, len);
343}
344
345static void
346stracpy(String *dst, char *src)
347{
348 size_t len;
349
350 len = strlen(src) + 1;
351 if (dst->cap < len)
352 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
353 strcpy(dst->str, src);
354}
355
356static void
357leprintf(char *s)
358{
359 if (errno)
360 eprintf("%zu: %s: %s\n", lineno, s, strerror(errno));
361 else
362 eprintf("%zu: %s\n", lineno, s);
363}
364
365/* FIXME: write usage message */
366#if FEATURE_SED_INPLACE
367static int iflag = 0;
368static char *backup_suffix = NULL;
369
370static int
371create_temp_file(const char *orig_path, char **temp_path)
372{
373 char *dir, *dircopy, *tmpl;
374 int fd;
375
376 dircopy = estrdup(orig_path);
377 dir = dirname(dircopy);
378 tmpl = emalloc(strlen(dir) + 16);
379 sprintf(tmpl, "%s/sedtmpXXXXXX", dir);
380 free(dircopy);
381
382 fd = mkstemp(tmpl);
383 if (fd < 0) {
384 free(tmpl);
385 return -1;
386 }
387 *temp_path = tmpl;
388 return fd;
389}
390#endif
391
392static void
393usage(void)
394{
395 eprintf("usage: sed [-nrE] script [file ...]\n"
396 " sed [-nrE] -e script [-e script] ... [-f scriptfile] ... [file ...]\n"
397 " sed [-nrE] [-e script] ... -f scriptfile [-f scriptfile] ... [file ...]\n");
398}
399
400/* Differences from POSIX
401 * we allows semicolons and trailing blanks inside {}
402 * we allow spaces after ! (and in between !s)
403 * we allow extended regular expressions (-E)
404 */
405static void
406compile(char *s, int isfile)
407{
408 FILE *f;
409
410 if (isfile) {
411 f = fopen(s, "r");
412 if (!f)
413 eprintf("fopen %s:", s);
414 } else {
415 if (!*s) /* empty string script */
416 return;
417 f = fmemopen(s, strlen(s), "r");
418 if (!f)
419 eprintf("fmemopen:");
420 }
421
422 /* NOTE: get arg functions can't use genbuf */
423 while (read_line(f, &genbuf) != EOF) {
424 s = genbuf.str;
425
426 /* if the first two characters of the script are "#n" default output shall be suppressed */
427 if (++lineno == 1 && *s == '#' && s[1] == 'n') {
428 gflags.n = 1;
429 continue;
430 }
431
432 if (gflags.aci_cont) {
433 aci_append(pc - 1, s);
434 continue;
435 }
436 if (gflags.s_cont)
437 s = (pc - 1)->fninfo->getarg(pc - 1, s);
438
439 while (*s) {
440 s = chompr(s, ';');
441 if (!*s || *s == '#')
442 break;
443
444 if ((size_t)(pc - prog) == pcap)
445 resize((void **)&prog, &pcap, sizeof(*prog), pcap * 2 + 1, (void **)&pc);
446
447 pc->range.beg.type = pc->range.end.type = IGNORE;
448 pc->fninfo = NULL;
449 pc->in_match = 0;
450
451 s = make_range(&pc->range, s);
452 s = chomp(s);
453 pc->negate = *s == '!';
454 s = chompr(s, '!');
455
456 if (!isascii(*s) || !(pc->fninfo = &fns[(unsigned)*s])->fn)
457 leprintf("bad sed function");
458 if (pc->range.naddr > pc->fninfo->naddr)
459 leprintf("wrong number of addresses");
460 s++;
461
462 if (pc->fninfo->getarg)
463 s = pc->fninfo->getarg(pc, s);
464
465 pc++;
466 }
467 }
468
469 fshut(f, s);
470}
471
472/* FIXME: if we decide to honor lack of trailing newline, set/clear a global
473 * flag when reading a line
474 */
475static int
476read_line(FILE *f, String *s)
477{
478 ssize_t len;
479
480 if (!f)
481 return EOF;
482
483 if ((len = getline(&s->str, &s->cap, f)) < 0) {
484 if (ferror(f))
485 eprintf("getline:");
486 return EOF;
487 }
488#if FEATURE_SED_PRESERVE_NEWLINE
489 if (len > 0)
490 hadnl = (s->str[len - 1] == '\n');
491#endif
492 if (s->str[--len] == '\n')
493 s->str[len] = '\0';
494 return 0;
495}
496
497/* read first range from s, return pointer to one past end of range */
498static char *
499make_range(Range *range, char *s)
500{
501 s = make_addr(&range->beg, s);
502
503 if (*s == ',')
504 s = make_addr(&range->end, s + 1);
505 else
506 range->end.type = IGNORE;
507
508 if (range->beg.type == EVERY && range->end.type == IGNORE) range->naddr = 0;
509 else if (range->beg.type != IGNORE && range->end.type == IGNORE) range->naddr = 1;
510 else if (range->beg.type != IGNORE && range->end.type != IGNORE) range->naddr = 2;
511 else leprintf("this is impossible...");
512
513 return s;
514}
515
516/* read first addr from s, return pointer to one past end of addr */
517static char *
518make_addr(Addr *addr, char *s)
519{
520 Rune r;
521 char *p = s + strlen(s);
522 size_t rlen = echarntorune(&r, s, p - s);
523
524 if (r == '$') {
525 addr->type = LAST;
526 s += rlen;
527 } else if (isdigitrune(r)) {
528 addr->type = LINE;
529 addr->u.lineno = stol(s, &s);
530 } else if (r == '/' || r == '\\') {
531 Rune delim;
532 if (r == '\\') {
533 s += rlen;
534 rlen = echarntorune(&r, s, p - s);
535 }
536 if (r == '\\')
537 leprintf("bad delimiter '\\'");
538 delim = r;
539 s += rlen;
540 rlen = echarntorune(&r, s, p - s);
541 if (r == delim) {
542 addr->type = LASTRE;
543 s += rlen;
544 } else {
545 addr->type = REGEX;
546 p = find_delim(s, delim, 1);
547 if (!*p)
548 leprintf("unclosed regex");
549 p -= escapes(s, p, delim, 0);
550 *p++ = '\0';
551 addr->u.re = emalloc(sizeof(*addr->u.re));
552 eregcomp(addr->u.re, s, gflags.E ? REG_EXTENDED : 0);
553 s = p;
554 }
555 } else {
556 addr->type = EVERY;
557 }
558
559 return s;
560}
561
562/* return pointer to first delim in s that is not escaped
563 * and if do_brackets is set, not in [] (note possible [::], [..], [==], inside [])
564 * return pointer to trailing nul byte if no delim found
565 *
566 * any escaped character that is not special is just itself (POSIX undefined)
567 * FIXME: pull out into some util thing, will be useful for ed as well
568 */
569static char *
570find_delim(char *s, Rune delim, int do_brackets)
571{
572 enum {
573 OUTSIDE , /* not in brackets */
574 BRACKETS_OPENING, /* last char was first [ or last two were first [^ */
575 BRACKETS_INSIDE , /* inside [] */
576 INSIDE_OPENING , /* inside [] and last char was [ */
577 CLASS_INSIDE , /* inside class [::], or colating element [..] or [==], inside [] */
578 CLASS_CLOSING , /* inside class [::], or colating element [..] or [==], and last character was the respective : . or = */
579 } state = OUTSIDE;
580
581 Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall */
582 size_t rlen;
583 int escape = 0;
584 char *end = s + strlen(s);
585
586 for (; *s; s += rlen) {
587 rlen = echarntorune(&r, s, end - s);
588
589 if (state == BRACKETS_OPENING && r == '^' ) { continue; }
590 else if (state == BRACKETS_OPENING && r == ']' ) { state = BRACKETS_INSIDE ; continue; }
591 else if (state == BRACKETS_OPENING ) { state = BRACKETS_INSIDE ; }
592
593 if (state == CLASS_CLOSING && r == ']' ) { state = BRACKETS_INSIDE ; }
594 else if (state == CLASS_CLOSING ) { state = CLASS_INSIDE ; }
595 else if (state == CLASS_INSIDE && r == c ) { state = CLASS_CLOSING ; }
596 else if (state == INSIDE_OPENING && (r == ':' ||
597 r == '.' ||
598 r == '=') ) { state = CLASS_INSIDE ; c = r; }
599 else if (state == INSIDE_OPENING && r == ']' ) { state = OUTSIDE ; }
600 else if (state == INSIDE_OPENING ) { state = BRACKETS_INSIDE ; }
601 else if (state == BRACKETS_INSIDE && r == '[' ) { state = INSIDE_OPENING ; }
602 else if (state == BRACKETS_INSIDE && r == ']' ) { state = OUTSIDE ; }
603 else if (state == OUTSIDE && escape ) { escape = 0 ; }
604 else if (state == OUTSIDE && r == '\\' ) { escape = 1 ; }
605 else if (state == OUTSIDE && r == delim) return s;
606 else if (state == OUTSIDE && do_brackets && r == '[' ) { state = BRACKETS_OPENING; }
607 }
608 return s;
609}
610
611static char *
612chomp(char *s)
613{
614 return chompr(s, 0);
615}
616
617/* eat all leading whitespace and occurrences of rune */
618static char *
619chompr(char *s, Rune rune)
620{
621 Rune r;
622 size_t rlen;
623 char *end = s + strlen(s);
624
625 while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspacerune(r) || r == rune))
626 s += rlen;
627 return s;
628}
629
630/* convert first nrunes Runes from UTF-8 string s in allocated Rune*
631 * NOTE: sequence must be valid UTF-8, check first */
632static Rune *
633strtorunes(char *s, size_t nrunes)
634{
635 Rune *rs, *rp;
636
637 rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs));
638
639 while (nrunes--)
640 s += chartorune(rp++, s);
641
642 *rp = '\0';
643 return rs;
644}
645
646static long
647stol(char *s, char **endp)
648{
649 long n;
650 errno = 0;
651 n = strtol(s, endp, 10);
652
653 if (errno)
654 leprintf("strtol:");
655 if (*endp == s)
656 leprintf("strtol: invalid number");
657
658 return n;
659}
660
661/* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d is delim)
662 * if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (normal)
663 * if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (y command)
664 * if delim is 0 all escaped characters represent themselves (aci text)
665 * memmove rest of string (beyond end) into place
666 * return the number of converted escapes (backslashes removed)
667 * FIXME: this has had too many corner cases slapped on and is ugly. rewrite better
668 */
669static size_t
670escapes(char *beg, char *end, Rune delim, int n_newline)
671{
672 size_t num = 0;
673 char *src = beg, *dst = beg;
674
675 while (src < end) {
676 /* handle escaped backslash specially so we don't think the second
677 * backslash is escaping something */
678 if (*src == '\\' && src[1] == '\\') {
679 *dst++ = *src++;
680 if (delim)
681 *dst++ = *src++;
682 else
683 src++;
684 } else if (*src == '\\' && !delim) {
685 src++;
686 } else if (*src == '\\' && src[1]) {
687 Rune r;
688 size_t rlen;
689 num++;
690 src++;
691 rlen = echarntorune(&r, src, end - src);
692
693 if (r == 'n' && delim == 'n') {
694 *src = n_newline ? '\n' : 'n'; /* src so we can still memmove() */
695 } else if (r == 'n') {
696 *src = '\n';
697 } else if (r != delim) {
698 *dst++ = '\\';
699 num--;
700 }
701
702 memmove(dst, src, rlen);
703 dst += rlen;
704 src += rlen;
705 } else {
706 *dst++ = *src++;
707 }
708 }
709 memmove(dst, src, strlen(src) + 1);
710 return num;
711}
712
713static size_t
714echarntorune(Rune *r, char *s, size_t n)
715{
716 size_t rlen = charntorune(r, s, n);
717 if (!rlen || *r == Runeerror)
718 leprintf("invalid UTF-8");
719 return rlen;
720}
721
722static void
723insert_labels(void)
724{
725 size_t i;
726 Cmd *from, *to;
727
728 while (branches.size) {
729 from = prog + (ptrdiff_t)pop(&branches);
730
731 if (!from->u.label) {/* no label branch to end of script */
732 from->u.jump = pc - 1;
733 } else {
734 for (i = 0; i < labels.size; i++) {
735 to = prog + (ptrdiff_t)labels.data[i];
736 if (!strcmp(from->u.label, to->u.label)) {
737 from->u.jump = to;
738 break;
739 }
740 }
741 if (i == labels.size)
742 leprintf("bad label");
743 }
744 }
745}
746
747/*
748 * Getargs / Freeargs
749 * Read argument from s, return pointer to one past last character of argument
750 */
751
752/* POSIX compliant
753 * i\
754 * foobar
755 *
756 * also allow the following non POSIX compliant
757 * i # empty line
758 * ifoobar
759 * ifoobar\
760 * baz
761 *
762 * FIXME: GNU and busybox discard leading spaces
763 * i foobar
764 * i foobar
765 * ifoobar
766 * are equivalent in GNU and busybox. We don't. Should we?
767 */
768static char *
769get_aci_arg(Cmd *c, char *s)
770{
771 c->u.acir.print = check_puts;
772 c->u.acir.str = (String){ NULL, 0 };
773
774 gflags.aci_cont = !!*s; /* no continue flag if empty string */
775
776 /* neither empty string nor POSIX compliant */
777 if (*s && !(*s == '\\' && !s[1]))
778 aci_append(c, s);
779
780 return s + strlen(s);
781}
782
783static void
784aci_append(Cmd *c, char *s)
785{
786 char *end = s + strlen(s), *p = end;
787
788 gflags.aci_cont = 0;
789 while (--p >= s && *p == '\\')
790 gflags.aci_cont = !gflags.aci_cont;
791
792 if (gflags.aci_cont)
793 *--end = '\n';
794
795 escapes(s, end, 0, 0);
796 stracat(&c->u.acir.str, s);
797}
798
799static void
800free_acir_arg(Cmd *c)
801{
802 free(c->u.acir.str.str);
803}
804
805/* POSIX dictates that label is rest of line, including semicolons, trailing
806 * whitespace, closing braces, etc. and can be limited to 8 bytes
807 *
808 * I allow a semicolon or closing brace to terminate a label name, it's not
809 * POSIX compliant, but it's useful and every sed version I've tried to date
810 * does the same.
811 *
812 * FIXME: POSIX dictates that leading whitespace is ignored but trailing
813 * whitespace is not. This is annoying and we should probably get rid of it.
814 */
815static char *
816get_bt_arg(Cmd *c, char *s)
817{
818 char *p = semicolon_arg(s = chomp(s));
819
820 if (p != s) {
821 c->u.label = estrndup(s, p - s);
822 } else {
823 c->u.label = NULL;
824 }
825
826 push(&branches, (void *)(c - prog));
827
828 return p;
829}
830
831/* POSIX dictates file name is rest of line including semicolons, trailing
832 * whitespace, closing braces, etc. and file name must be preceded by a space
833 *
834 * I allow a semicolon or closing brace to terminate a file name and don't
835 * enforce leading space.
836 *
837 * FIXME: decide whether trailing whitespace should be included and fix
838 * accordingly
839 */
840static char *
841get_r_arg(Cmd *c, char *s)
842{
843 char *p = semicolon_arg(s = chomp(s));
844
845 if (p == s)
846 leprintf("no file name");
847
848 c->u.acir.str.str = estrndup(s, p - s);
849 c->u.acir.print = write_file;
850
851 return p;
852}
853
854/* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX)
855 *
856 * FIXME: allow other escapes in regex and replacement? if so change escapes()
857 */
858static char *
859get_s_arg(Cmd *c, char *s)
860{
861 Rune delim, r;
862 Cmd buf;
863 char *p;
864 int esc, lastre;
865
866 /* s/Find/Replace/Flags */
867
868 /* Find */
869 if (!gflags.s_cont) { /* NOT continuing from literal newline in replacement text */
870 lastre = 0;
871 c->u.s.repl = (String){ NULL, 0 };
872 c->u.s.occurrence = 1;
873 c->u.s.file = NULL;
874 c->u.s.p = 0;
875
876 if (!*s || *s == '\\')
877 leprintf("bad delimiter");
878
879 p = s + strlen(s);
880 s += echarntorune(&delim, s, p - s);
881 c->u.s.delim = delim;
882
883 echarntorune(&r, s, p - s);
884 if (r == delim) /* empty regex */
885 lastre = 1;
886
887 p = find_delim(s, delim, 1);
888 if (!*p)
889 leprintf("missing second delimiter");
890 p -= escapes(s, p, delim, 0);
891 *p = '\0';
892
893 if (lastre) {
894 c->u.s.re = NULL;
895 } else {
896 c->u.s.re = emalloc(sizeof(*c->u.s.re));
897 /* FIXME: different eregcomp that calls fatal */
898 eregcomp(c->u.s.re, s, gflags.E ? REG_EXTENDED : 0);
899 }
900 s = p + runelen(delim);
901 }
902
903 /* Replace */
904 delim = c->u.s.delim;
905
906 p = find_delim(s, delim, 0);
907 p -= escapes(s, p, delim, 0);
908 if (!*p) { /* no third delimiter */
909 /* FIXME: same backslash counting as aci_append() */
910 if (p[-1] != '\\')
911 leprintf("missing third delimiter or <backslash><newline>");
912 p[-1] = '\n';
913 gflags.s_cont = 1;
914 } else {
915 gflags.s_cont = 0;
916 }
917
918 /* check for bad references in replacement text */
919 *p = '\0';
920 for (esc = 0, p = s; *p; p++) {
921 if (esc) {
922 esc = 0;
923 if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0') > c->u.s.re->re_nsub)
924 leprintf("back reference number greater than number of groups");
925 } else if (*p == '\\') {
926 esc = 1;
927 }
928 }
929 stracat(&c->u.s.repl, s);
930
931 if (gflags.s_cont)
932 return p;
933
934 s = p + runelen(delim);
935
936 /* Flags */
937 p = semicolon_arg(s = chomp(s));
938
939 /* FIXME: currently for simplicity take last of g or occurrence flags and
940 * ignore multiple p flags. need to fix that */
941 for (; s < p; s++) {
942 if (isdigit(*s)) {
943 c->u.s.occurrence = stol(s, &s);
944 s--; /* for loop will advance pointer */
945 } else {
946 switch (*s) {
947 case 'g': c->u.s.occurrence = 0; break;
948 case 'p': c->u.s.p = 1; break;
949 case 'w':
950 /* must be last flag, take everything up to newline/semicolon
951 * s == p after this */
952 s = get_w_arg(&buf, chomp(s+1));
953 c->u.s.file = buf.u.file;
954 break;
955 }
956 }
957 }
958 return p;
959}
960
961static void
962free_s_arg(Cmd *c)
963{
964 if (c->u.s.re)
965 regfree(c->u.s.re);
966 free(c->u.s.re);
967 free(c->u.s.repl.str);
968}
969
970/* see get_r_arg notes */
971static char *
972get_w_arg(Cmd *c, char *s)
973{
974 char *p = semicolon_arg(s = chomp(s));
975 Wfile *w, **wp;
976
977 if (p == s)
978 leprintf("no file name");
979
980 for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.data) < wfiles.size; wp++) {
981 if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s, (*wp)->path, p - s)) {
982 c->u.file = (*wp)->file;
983 return p;
984 }
985 }
986
987 w = emalloc(sizeof(*w));
988 w->path = estrndup(s, p - s);
989
990 if (!(w->file = fopen(w->path, "w")))
991 leprintf("fopen failed");
992
993 c->u.file = w->file;
994
995 push(&wfiles, w);
996 return p;
997}
998
999static char *
1000get_y_arg(Cmd *c, char *s)
1001{
1002 Rune delim;
1003 char *p = s + strlen(s);
1004 size_t rlen = echarntorune(&delim, s, p - s);
1005 size_t nrunes1, nrunes2;
1006
1007 c->u.y.set1 = c->u.y.set2 = NULL;
1008
1009 s += rlen;
1010 p = find_delim(s, delim, 0);
1011 p -= escapes(s, p, delim, 1);
1012 nrunes1 = utfnlen(s, p - s);
1013 c->u.y.set1 = strtorunes(s, nrunes1);
1014
1015 s = p + rlen;
1016 p = find_delim(s, delim, 0);
1017 p -= escapes(s, p, delim, 1);
1018 nrunes2 = utfnlen(s, p - s);
1019
1020 if (nrunes1 != nrunes2)
1021 leprintf("different set lengths");
1022
1023 c->u.y.set2 = strtorunes(s, utfnlen(s, p - s));
1024
1025 return p + rlen;
1026}
1027
1028static void
1029free_y_arg(Cmd *c)
1030{
1031 free(c->u.y.set1);
1032 free(c->u.y.set2);
1033}
1034
1035/* see get_bt_arg notes */
1036static char *
1037get_colon_arg(Cmd *c, char *s)
1038{
1039 char *p = semicolon_arg(s = chomp(s));
1040
1041 if (p == s)
1042 leprintf("no label name");
1043
1044 c->u.label = estrndup(s, p - s);
1045 push(&labels, (void *)(c - prog));
1046 return p;
1047}
1048
1049static char *
1050get_lbrace_arg(Cmd *c, char *s)
1051{
1052 push(&braces, (void *)(c - prog));
1053 return s;
1054}
1055
1056static char *
1057get_rbrace_arg(Cmd *c, char *s)
1058{
1059 Cmd *lbrace;
1060
1061 if (!braces.size)
1062 leprintf("extra }");
1063
1064 lbrace = prog + (ptrdiff_t)pop(&braces);
1065 lbrace->u.offset = c - prog;
1066 return s;
1067}
1068
1069/* s points to beginning of an argument that may be semicolon terminated
1070 * return pointer to semicolon or nul byte after string
1071 * or closing brace as to not force ; before }
1072 * FIXME: decide whether or not to eat trailing whitespace for arguments that
1073 * we allow semicolon/brace termination that POSIX doesn't
1074 * b, r, t, w, :
1075 * POSIX says trailing whitespace is part of label name, file name, etc.
1076 * we should probably eat it
1077 */
1078static char *
1079semicolon_arg(char *s)
1080{
1081 char *p = strpbrk(s, ";}");
1082 if (!p)
1083 p = s + strlen(s);
1084 return p;
1085}
1086
1087static void
1088run(void)
1089{
1090 lineno = 0;
1091 if (braces.size)
1092 leprintf("extra {");
1093
1094 /* genbuf has already been initialized, patt will be in new_line
1095 * (or we'll halt) */
1096 stracpy(&hold, "");
1097
1098 insert_labels();
1099 next_file();
1100 new_line();
1101
1102 for (pc = prog; !gflags.halt; pc++)
1103 pc->fninfo->fn(pc);
1104}
1105
1106/* return true if we are in range for c, set c->in_match appropriately */
1107static int
1108in_range(Cmd *c)
1109{
1110 if (match_addr(&c->range.beg)) {
1111 if (c->range.naddr == 2) {
1112 if (c->range.end.type == LINE && c->range.end.u.lineno <= lineno)
1113 c->in_match = 0;
1114 else
1115 c->in_match = 1;
1116 }
1117 return !c->negate;
1118 }
1119 if (c->in_match && match_addr(&c->range.end)) {
1120 c->in_match = 0;
1121 return !c->negate;
1122 }
1123 return c->in_match ^ c->negate;
1124}
1125
1126/* return true if addr matches current line */
1127static int
1128match_addr(Addr *a)
1129{
1130 switch (a->type) {
1131 default:
1132 case IGNORE: return 0;
1133 case EVERY: return 1;
1134 case LINE: return lineno == a->u.lineno;
1135 case LAST:
1136 while (is_eof(file) && !next_file())
1137 ;
1138 return !file;
1139 case REGEX:
1140 lastre = a->u.re;
1141 return !regexec(a->u.re, patt.str, 0, NULL, 0);
1142 case LASTRE:
1143 if (!lastre)
1144 leprintf("no previous regex");
1145 return !regexec(lastre, patt.str, 0, NULL, 0);
1146 }
1147}
1148
1149/* move to next input file
1150 * stdin if first call and no files
1151 * return 0 for success and 1 for no more files
1152 */
1153static int
1154next_file(void)
1155{
1156 static unsigned char first = 1;
1157
1158 if (file == stdin)
1159 clearerr(file);
1160 else if (file)
1161 fshut(file, "<file>");
1162 /* given no files, default to stdin */
1163 file = first && !*files ? stdin : NULL;
1164 first = 0;
1165
1166 while (!file && *files) {
1167 if (!strcmp(*files, "-")) {
1168 file = stdin;
1169 } else if (!(file = fopen(*files, "r"))) {
1170 /* warn this file didn't open, but move on to next */
1171 weprintf("fopen %s:", *files);
1172 ret = 1;
1173 }
1174 files++;
1175 }
1176
1177 return !file;
1178}
1179
1180/* test if stream is at EOF */
1181static int
1182is_eof(FILE *f)
1183{
1184 int c;
1185
1186 if (!f || feof(f))
1187 return 1;
1188
1189 c = fgetc(f);
1190 if (c == EOF && ferror(f))
1191 eprintf("fgetc:");
1192 if (c != EOF && ungetc(c, f) == EOF)
1193 eprintf("ungetc EOF\n");
1194
1195 return c == EOF;
1196}
1197
1198/* perform writes that were scheduled
1199 * for aci this is check_puts(string, stdout)
1200 * for r this is write_file(path, stdout)
1201 */
1202static void
1203do_writes(void)
1204{
1205 Cmd *c;
1206 size_t i;
1207
1208 for (i = 0; i < writes.size; i++) {
1209 c = writes.data[i];
1210 c->u.acir.print(c->u.acir.str.str, stdout);
1211 }
1212 writes.size = 0;
1213}
1214
1215/* used for r's u.acir.print()
1216 * FIXME: something like util's concat() would be better
1217 */
1218static void
1219write_file(char *path, FILE *out)
1220{
1221 FILE *in = fopen(path, "r");
1222 if (!in) /* no file is treated as empty file */
1223 return;
1224
1225 while (read_line(in, &genbuf) != EOF)
1226 check_puts(genbuf.str, out);
1227
1228 fshut(in, path);
1229}
1230
1231static void
1232check_puts(char *s, FILE *f)
1233{
1234 if (s && fputs(s, f) == EOF)
1235 eprintf("fputs:");
1236 if (fputs("\n", f) == EOF)
1237 eprintf("fputs:");
1238}
1239
1240static void
1241write_patt(char *s, FILE *f)
1242{
1243#if FEATURE_SED_PRESERVE_NEWLINE
1244 if (s && fputs(s, f) == EOF)
1245 eprintf("fputs:");
1246 if (hadnl) {
1247 if (fputs("\n", f) == EOF)
1248 eprintf("fputs:");
1249 }
1250#else
1251 check_puts(s, f);
1252#endif
1253}
1254
1255/* iterate from beg to end updating ranges so we don't miss any commands
1256 * e.g. sed -n '1d;1,3p' should still print lines 2 and 3
1257 */
1258static void
1259update_ranges(Cmd *beg, Cmd *end)
1260{
1261 while (beg < end)
1262 in_range(beg++);
1263}
1264
1265/*
1266 * Sed functions
1267 */
1268static void
1269cmd_a(Cmd *c)
1270{
1271 if (in_range(c))
1272 push(&writes, c);
1273}
1274
1275static void
1276cmd_b(Cmd *c)
1277{
1278 if (!in_range(c))
1279 return;
1280
1281 /* if we jump backwards update to end, otherwise update to destination */
1282 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
1283 pc = c->u.jump;
1284}
1285
1286static void
1287cmd_c(Cmd *c)
1288{
1289 if (!in_range(c))
1290 return;
1291
1292 /* write the text on the last line of the match */
1293 if (!c->in_match)
1294 check_puts(c->u.acir.str.str, stdout);
1295 /* otherwise start the next cycle without printing pattern space
1296 * effectively deleting the text */
1297 new_next();
1298}
1299
1300static void
1301cmd_d(Cmd *c)
1302{
1303 if (!in_range(c))
1304 return;
1305
1306 new_next();
1307}
1308
1309static void
1310cmd_D(Cmd *c)
1311{
1312 char *p;
1313
1314 if (!in_range(c))
1315 return;
1316
1317 if ((p = strchr(patt.str, '\n'))) {
1318 p++;
1319 memmove(patt.str, p, strlen(p) + 1);
1320 old_next();
1321 } else {
1322 new_next();
1323 }
1324}
1325
1326static void
1327cmd_g(Cmd *c)
1328{
1329 if (in_range(c))
1330 stracpy(&patt, hold.str);
1331}
1332
1333static void
1334cmd_G(Cmd *c)
1335{
1336 if (!in_range(c))
1337 return;
1338
1339 stracat(&patt, "\n");
1340 stracat(&patt, hold.str);
1341}
1342
1343static void
1344cmd_h(Cmd *c)
1345{
1346 if (in_range(c))
1347 stracpy(&hold, patt.str);
1348}
1349
1350static void
1351cmd_H(Cmd *c)
1352{
1353 if (!in_range(c))
1354 return;
1355
1356 stracat(&hold, "\n");
1357 stracat(&hold, patt.str);
1358}
1359
1360static void
1361cmd_i(Cmd *c)
1362{
1363 if (in_range(c))
1364 check_puts(c->u.acir.str.str, stdout);
1365}
1366
1367/* I think it makes sense to print invalid UTF-8 sequences in octal to satisfy
1368 * the "visually unambiguous form" sed(1p)
1369 */
1370static void
1371cmd_l(Cmd *c)
1372{
1373 Rune r;
1374 char *p, *end;
1375 size_t rlen;
1376
1377 char *escapes[] = { /* FIXME: 7 entries and search instead of 127 */
1378 ['\\'] = "\\\\", ['\a'] = "\\a", ['\b'] = "\\b",
1379 ['\f'] = "\\f" , ['\r'] = "\\r", ['\t'] = "\\t",
1380 ['\v'] = "\\v" , [0x7f] = NULL, /* fill out the table */
1381 };
1382
1383 if (!in_range(c))
1384 return;
1385
1386 /* FIXME: line wrapping. sed(1p) says "length at which folding occurs is
1387 * unspecified, but should be appropraite for the output device"
1388 * just wrap at 80 Runes?
1389 */
1390 for (p = patt.str, end = p + strlen(p); p < end; p += rlen) {
1391 if (isascii(*p) && escapes[(unsigned int)*p]) {
1392 fputs(escapes[(unsigned int)*p], stdout);
1393 rlen = 1;
1394 } else if (!(rlen = charntorune(&r, p, end - p))) {
1395 /* ran out of chars, print the bytes of the short sequence */
1396 for (; p < end; p++)
1397 printf("\\%03hho", (unsigned char)*p);
1398 break;
1399 } else if (r == Runeerror) {
1400 for (; rlen; rlen--, p++)
1401 printf("\\%03hho", (unsigned char)*p);
1402 } else {
1403 while (fwrite(p, rlen, 1, stdout) < 1 && errno == EINTR)
1404 ;
1405 if (ferror(stdout))
1406 eprintf("fwrite:");
1407 }
1408 }
1409 check_puts("$", stdout);
1410}
1411
1412static void
1413cmd_n(Cmd *c)
1414{
1415 if (!in_range(c))
1416 return;
1417
1418 if (!gflags.n)
1419 write_patt(patt.str, stdout);
1420 do_writes();
1421 new_line();
1422}
1423
1424static void
1425cmd_N(Cmd *c)
1426{
1427 if (!in_range(c))
1428 return;
1429 do_writes();
1430 app_line();
1431}
1432
1433static void
1434cmd_p(Cmd *c)
1435{
1436 if (in_range(c))
1437 write_patt(patt.str, stdout);
1438}
1439
1440static void
1441cmd_P(Cmd *c)
1442{
1443 char *p;
1444
1445 if (!in_range(c))
1446 return;
1447
1448 if ((p = strchr(patt.str, '\n')))
1449 *p = '\0';
1450
1451 write_patt(patt.str, stdout);
1452
1453 if (p)
1454 *p = '\n';
1455}
1456
1457static void
1458cmd_q(Cmd *c)
1459{
1460 if (!in_range(c))
1461 return;
1462
1463 if (!gflags.n)
1464 check_puts(patt.str, stdout);
1465 do_writes();
1466 gflags.halt = 1;
1467}
1468
1469static void
1470cmd_r(Cmd *c)
1471{
1472 if (in_range(c))
1473 push(&writes, c);
1474}
1475
1476static void
1477cmd_s(Cmd *c)
1478{
1479 String tmp;
1480 Rune r;
1481 size_t plen, rlen, len;
1482 char *p, *s, *end;
1483 unsigned int matches = 0, last_empty = 1, qflag = 0, cflags = 0;
1484 regex_t *re;
1485 regmatch_t *rm, *pmatch = NULL;
1486
1487 if (!in_range(c))
1488 return;
1489
1490 if (!c->u.s.re && !lastre)
1491 leprintf("no previous regex");
1492
1493 re = c->u.s.re ? c->u.s.re : lastre;
1494 lastre = re;
1495
1496 plen = re->re_nsub + 1;
1497 pmatch = ereallocarray(NULL, plen, sizeof(regmatch_t));
1498
1499 *genbuf.str = '\0';
1500 s = patt.str;
1501
1502 while (!qflag && !regexec(re, s, plen, pmatch, cflags)) {
1503 cflags = REG_NOTBOL; /* match against beginning of line first time, but not again */
1504 if (!*s) /* match against empty string first time, but not again */
1505 qflag = 1;
1506
1507 /* don't substitute if last match was not empty but this one is.
1508 * s_a*_._g
1509 * foobar -> .f.o.o.b.r.
1510 */
1511 if ((last_empty || pmatch[0].rm_eo) &&
1512 (++matches == c->u.s.occurrence || !c->u.s.occurrence)) {
1513 /* copy over everything before the match */
1514 strnacat(&genbuf, s, pmatch[0].rm_so);
1515
1516 /* copy over replacement text, taking into account &, backreferences, and \ escapes */
1517 for (p = c->u.s.repl.str, len = strcspn(p, "\\&"); *p; len = strcspn(++p, "\\&")) {
1518 strnacat(&genbuf, p, len);
1519 p += len;
1520 switch (*p) {
1521 default:
1522 leprintf("this shouldn't be possible");
1523 break;
1524 case '\0':
1525 /* we're at the end, back up one so the ++p will put us on
1526 * the null byte to break out of the loop */
1527 --p;
1528 break;
1529 case '&':
1530 strnacat(&genbuf, s + pmatch[0].rm_so, pmatch[0].rm_eo - pmatch[0].rm_so);
1531 break;
1532 case '\\':
1533 if (isdigit(*++p)) { /* backreference */
1534 /* only need to check here if using lastre, otherwise we checked when building */
1535 if (!c->u.s.re && (size_t)(*p - '0') > re->re_nsub)
1536 leprintf("back reference number greater than number of groups");
1537 rm = &pmatch[*p - '0'];
1538 strnacat(&genbuf, s + rm->rm_so, rm->rm_eo - rm->rm_so);
1539 } else { /* character after backslash taken literally (well one byte, but it works) */
1540 strnacat(&genbuf, p, 1);
1541 }
1542 break;
1543 }
1544 }
1545 } else {
1546 /* not replacing, copy over everything up to and including the match */
1547 strnacat(&genbuf, s, pmatch[0].rm_eo);
1548 }
1549
1550 if (!pmatch[0].rm_eo) { /* empty match, advance one rune and add it to output */
1551 end = s + strlen(s);
1552 rlen = charntorune(&r, s, end - s);
1553
1554 if (!rlen) { /* ran out of bytes, copy short sequence */
1555 stracat(&genbuf, s);
1556 s = end;
1557 } else { /* copy whether or not it's a good rune */
1558 strnacat(&genbuf, s, rlen);
1559 s += rlen;
1560 }
1561 }
1562 last_empty = !pmatch[0].rm_eo;
1563 s += pmatch[0].rm_eo;
1564 }
1565 free(pmatch);
1566
1567 if (!(matches && matches >= c->u.s.occurrence)) /* no replacement */
1568 return;
1569
1570 gflags.s = 1;
1571
1572 stracat(&genbuf, s);
1573
1574 tmp = patt;
1575 patt = genbuf;
1576 genbuf = tmp;
1577
1578 if (c->u.s.p)
1579 write_patt(patt.str, stdout);
1580 if (c->u.s.file)
1581 write_patt(patt.str, c->u.s.file);
1582}
1583
1584static void
1585cmd_t(Cmd *c)
1586{
1587 if (!in_range(c) || !gflags.s)
1588 return;
1589
1590 /* if we jump backwards update to end, otherwise update to destination */
1591 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
1592 pc = c->u.jump;
1593 gflags.s = 0;
1594}
1595
1596static void
1597cmd_w(Cmd *c)
1598{
1599 if (in_range(c))
1600 write_patt(patt.str, c->u.file);
1601}
1602
1603static void
1604cmd_x(Cmd *c)
1605{
1606 String tmp;
1607
1608 if (!in_range(c))
1609 return;
1610
1611 tmp = patt;
1612 patt = hold;
1613 hold = tmp;
1614}
1615
1616static void
1617cmd_y(Cmd *c)
1618{
1619 String tmp;
1620 Rune r, *rp;
1621 size_t n, rlen;
1622 char *s, *end, buf[UTFmax];
1623
1624 if (!in_range(c))
1625 return;
1626
1627 *genbuf.str = '\0';
1628 for (s = patt.str, end = s + strlen(s); *s; s += rlen) {
1629 if (!(rlen = charntorune(&r, s, end - s))) { /* ran out of chars, copy rest */
1630 stracat(&genbuf, s);
1631 break;
1632 } else if (r == Runeerror) { /* bad UTF-8 sequence, copy bytes */
1633 strnacat(&genbuf, s, rlen);
1634 } else {
1635 for (rp = c->u.y.set1; *rp; rp++)
1636 if (*rp == r)
1637 break;
1638 if (*rp) { /* found r in set1, replace with Rune from set2 */
1639 n = runetochar(buf, c->u.y.set2 + (rp - c->u.y.set1));
1640 strnacat(&genbuf, buf, n);
1641 } else {
1642 strnacat(&genbuf, s, rlen);
1643 }
1644 }
1645 }
1646 tmp = patt;
1647 patt = genbuf;
1648 genbuf = tmp;
1649}
1650
1651static void
1652cmd_colon(Cmd *c)
1653{
1654 (void)c;
1655}
1656
1657static void
1658cmd_equal(Cmd *c)
1659{
1660 if (in_range(c))
1661 printf("%zu\n", lineno);
1662}
1663
1664static void
1665cmd_lbrace(Cmd *c)
1666{
1667 Cmd *jump;
1668
1669 if (in_range(c))
1670 return;
1671
1672 /* update ranges on all commands we skip */
1673 jump = prog + c->u.offset;
1674 update_ranges(c + 1, jump);
1675 pc = jump;
1676}
1677
1678static void
1679cmd_rbrace(Cmd *c)
1680{
1681 (void)c;
1682}
1683
1684/* not actually a sed function, but acts like one, put in last spot of script */
1685static void
1686cmd_last(Cmd *c)
1687{
1688 (void)c;
1689 if (!gflags.n)
1690 write_patt(patt.str, stdout);
1691 do_writes();
1692 new_next();
1693}
1694
1695/*
1696 * Actions
1697 */
1698
1699/* read new line, continue current cycle */
1700static void
1701new_line(void)
1702{
1703 while (read_line(file, &patt) == EOF) {
1704 if (next_file()) {
1705 gflags.halt = 1;
1706 return;
1707 }
1708 }
1709 gflags.s = 0;
1710 lineno++;
1711}
1712
1713/* append new line, continue current cycle
1714 * FIXME: used for N, POSIX specifies do not print pattern space when out of
1715 * input, but GNU does so busybox does as well. Currently we don't.
1716 * Should we?
1717 */
1718static void
1719app_line(void)
1720{
1721 while (read_line(file, &genbuf) == EOF) {
1722 if (next_file()) {
1723 gflags.halt = 1;
1724 return;
1725 }
1726 }
1727
1728 stracat(&patt, "\n");
1729 stracat(&patt, genbuf.str);
1730 gflags.s = 0;
1731 lineno++;
1732}
1733
1734/* read new line, start new cycle */
1735static void
1736new_next(void)
1737{
1738 *patt.str = '\0';
1739 update_ranges(pc + 1, prog + pcap);
1740 new_line();
1741 pc = prog - 1;
1742}
1743
1744/* keep old pattern space, start new cycle */
1745static void
1746old_next(void)
1747{
1748 update_ranges(pc + 1, prog + pcap);
1749 pc = prog - 1;
1750}
1751
1752// ?man sed: stream editor
1753// ?man arguments: script [file ...
1754// ?man stream editor for filtering and transforming text
1755int
1756main(int argc, char *argv[])
1757{
1758 char *arg;
1759 int script = 0;
1760
1761 ARGBEGIN {
1762 // ?man -n: print line numbers or counts
1763 case 'n':
1764 gflags.n = 1;
1765 break;
1766 // ?man -r: operate recursively
1767 case 'r':
1768 // ?man -E: specify option flag
1769 case 'E':
1770 gflags.E = 1;
1771 break;
1772 // ?man -e:str: specify expression or pattern
1773 case 'e':
1774 arg = EARGF(usage());
1775 compile(arg, 0);
1776 script = 1;
1777 break;
1778 // ?man -f:str: force the operation
1779 case 'f':
1780 arg = EARGF(usage());
1781 compile(arg, 1);
1782 script = 1;
1783 break;
1784#if FEATURE_SED_INPLACE
1785 // ?man -i: interactive mode or prompt for confirmation
1786 case 'i':
1787 iflag = 1;
1788 if (argv[0][1] != '\0') {
1789 backup_suffix = &argv[0][1];
1790 brk_ = 1;
1791 } else {
1792 backup_suffix = "";
1793 }
1794 break;
1795#endif
1796 default : usage();
1797 } ARGEND
1798
1799 /* no script to run */
1800 if (!script && !argc)
1801 usage();
1802
1803 /* no script yet, next argument is script */
1804 if (!script)
1805 compile(*argv++, 0);
1806
1807 /* shrink/grow memory to fit and add our last instruction */
1808 resize((void **)&prog, &pcap, sizeof(*prog), pc - prog + 1, NULL);
1809 pc = prog + pcap - 1;
1810 pc->fninfo = &(Fninfo){ cmd_last, NULL, NULL, 0 };
1811
1812#if FEATURE_SED_INPLACE
1813 if (iflag) {
1814 char *single_file[2] = { NULL, NULL };
1815 char **orig_files = argv;
1816 int i;
1817
1818 if (!*orig_files)
1819 eprintf("no input files\n");
1820
1821 for (i = 0; orig_files[i]; i++) {
1822 char *temp_path = NULL;
1823 int temp_fd;
1824 int real_stdout;
1825 struct stat st;
1826 Cmd *c;
1827
1828 if (strcmp(orig_files[i], "-") == 0) {
1829 weprintf("cannot edit stdin in-place\n");
1830 ret = 1;
1831 continue;
1832 }
1833
1834 if (stat(orig_files[i], &st) < 0) {
1835 weprintf("stat %s:", orig_files[i]);
1836 ret = 1;
1837 continue;
1838 }
1839
1840 temp_fd = create_temp_file(orig_files[i], &temp_path);
1841 if (temp_fd < 0) {
1842 weprintf("create_temp_file:");
1843 ret = 1;
1844 continue;
1845 }
1846
1847 real_stdout = dup(1);
1848 if (real_stdout < 0) {
1849 weprintf("dup stdout:");
1850 close(temp_fd);
1851 free(temp_path);
1852 ret = 1;
1853 continue;
1854 }
1855 if (dup2(temp_fd, 1) < 0) {
1856 weprintf("dup2 stdout:");
1857 close(temp_fd);
1858 close(real_stdout);
1859 free(temp_path);
1860 ret = 1;
1861 continue;
1862 }
1863 close(temp_fd);
1864
1865 single_file[0] = orig_files[i];
1866 files = single_file;
1867
1868 /* reset state for next file */
1869 lineno = 0;
1870 gflags.halt = 0;
1871 stracpy(&hold, "");
1872 stracpy(&patt, "");
1873 writes.size = 0;
1874 for (c = prog; c->fninfo->fn != cmd_last; c++) {
1875 c->in_match = 0;
1876 }
1877
1878 run();
1879
1880 fflush(stdout);
1881 dup2(real_stdout, 1);
1882 close(real_stdout);
1883
1884 if (backup_suffix && *backup_suffix) {
1885 char *backup_path = emalloc(strlen(orig_files[i]) + strlen(backup_suffix) + 1);
1886 sprintf(backup_path, "%s%s", orig_files[i], backup_suffix);
1887 if (rename(orig_files[i], backup_path) < 0) {
1888 weprintf("rename %s to %s:", orig_files[i], backup_path);
1889 unlink(temp_path);
1890 free(backup_path);
1891 free(temp_path);
1892 ret = 1;
1893 continue;
1894 }
1895 free(backup_path);
1896 } else {
1897 unlink(orig_files[i]);
1898 }
1899
1900 if (rename(temp_path, orig_files[i]) < 0) {
1901 weprintf("rename %s to %s:", temp_path, orig_files[i]);
1902 unlink(temp_path);
1903 free(temp_path);
1904 ret = 1;
1905 continue;
1906 }
1907
1908 chmod(orig_files[i], st.st_mode);
1909 chown(orig_files[i], st.st_uid, st.st_gid);
1910
1911 free(temp_path);
1912 }
1913 } else
1914#endif
1915 {
1916 files = argv;
1917 run();
1918 }
1919
1920 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
1921
1922 return ret;
1923}