1/* $NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $ */
2
3/*-
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * Copyright (c) 1990, 1993, 1994
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Michael Rendell of the Memorial University of Newfoundland.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#ifndef lint
39__COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\
40 The Regents of the University of California. All rights reserved.");
41#endif /* not lint */
42
43#ifndef lint
44#if 0
45static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95";
46__FBSDID("$FreeBSD: head/usr.bin/col/col.c 366577 2020-10-09 15:27:37Z markj $")
47;
48
49#endif
50__RCSID("$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $");
51#endif /* not lint */
52
53#include <err.h>
54#include <errno.h>
55#include <inttypes.h>
56#include <limits.h>
57#include <locale.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <termios.h>
62#include <unistd.h>
63#include <wchar.h>
64#include <wctype.h>
65
66#define BS '\b' /* backspace */
67#define TAB '\t' /* tab */
68#define SPACE ' ' /* space */
69#define NL '\n' /* newline */
70#define CR '\r' /* carriage return */
71#define ESC '\033' /* escape */
72#define SI '\017' /* shift in to normal character set */
73#define SO '\016' /* shift out to alternate character set */
74#define VT '\013' /* vertical tab (aka reverse line feed) */
75#define RLF '7' /* ESC-7 reverse line feed */
76#define RHLF '8' /* ESC-8 reverse half-line feed */
77#define FHLF '9' /* ESC-9 forward half-line feed */
78
79/* build up at least this many lines before flushing them out */
80#define BUFFER_MARGIN 32
81
82typedef char CSET;
83
84typedef struct char_str {
85#define CS_NORMAL 1
86#define CS_ALTERNATE 2
87 int c_column; /* column character is in */
88 CSET c_set; /* character set (currently only 2) */
89 wchar_t c_char; /* character in question */
90 int c_width; /* character width */
91} CHAR;
92
93typedef struct line_str LINE;
94struct line_str {
95 CHAR *l_line; /* characters on the line */
96 LINE *l_prev; /* previous line */
97 LINE *l_next; /* next line */
98 int l_lsize; /* allocated sizeof l_line */
99 int l_line_len; /* strlen(l_line) */
100 int l_needs_sort; /* set if chars went in out of order */
101 int l_max_col; /* max column in the line */
102};
103
104static void addto_lineno(int *, int);
105static LINE *alloc_line(void);
106static void dowarn(int);
107static void flush_line(LINE *);
108static void flush_lines(int);
109static void flush_blanks(void);
110static void free_line(LINE *);
111__dead static void usage(void);
112
113static CSET last_set; /* char_set of last char printed */
114static LINE *lines;
115static int compress_spaces; /* if doing space -> tab conversion */
116static int fine; /* if `fine' resolution (half lines) */
117static int max_bufd_lines; /* max # of half lines to keep in memory */
118static int nblank_lines; /* # blanks after last flushed line */
119static int no_backspaces; /* if not to output any backspaces */
120static int pass_unknown_seqs; /* pass unknown control sequences */
121
122#define PUTC(ch) \
123 do { \
124 if (putwchar(ch) == WEOF) \
125 errx(EXIT_FAILURE, "write error"); \
126 } while (0)
127
128int
129main(int argc, char **argv)
130{
131 wint_t ch;
132 CHAR *c;
133 CSET cur_set; /* current character set */
134 LINE *l; /* current line */
135 int extra_lines; /* # of lines above first line */
136 int cur_col; /* current column */
137 int cur_line; /* line number of current position */
138 int max_line; /* max value of cur_line */
139 int this_line; /* line l points to */
140 int nflushd_lines; /* number of lines that were flushed */
141 int adjust, opt, warned, width;
142 int e;
143
144 (void)setlocale(LC_CTYPE, "");
145
146 max_bufd_lines = 256;
147 compress_spaces = 1; /* compress spaces into tabs */
148 while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
149 switch (opt) {
150 case 'b': /* do not output backspaces */
151 no_backspaces = 1;
152 break;
153 case 'f': /* allow half forward line feeds */
154 fine = 1;
155 break;
156 case 'h': /* compress spaces into tabs */
157 compress_spaces = 1;
158 break;
159 case 'l': /* buffered line count */
160 max_bufd_lines = (int)strtoi(optarg, NULL, 0, 1,
161 (INT_MAX - BUFFER_MARGIN) / 2, &e) * 2;
162 if (e)
163 errc(EXIT_FAILURE, e, "bad -l argument `%s'",
164 optarg);
165 break;
166 case 'p': /* pass unknown control sequences */
167 pass_unknown_seqs = 1;
168 break;
169 case 'x': /* do not compress spaces into tabs */
170 compress_spaces = 0;
171 break;
172 case '?':
173 default:
174 usage();
175 }
176
177 if (optind != argc)
178 usage();
179
180 adjust = cur_col = extra_lines = warned = 0;
181 cur_line = max_line = nflushd_lines = this_line = 0;
182 cur_set = last_set = CS_NORMAL;
183 lines = l = alloc_line();
184
185 while ((ch = getwchar()) != WEOF) {
186 if (!iswgraph(ch)) {
187 switch (ch) {
188 case BS: /* can't go back further */
189 if (cur_col == 0)
190 continue;
191 --cur_col;
192 continue;
193 case CR:
194 cur_col = 0;
195 continue;
196 case ESC: /* just ignore EOF */
197 switch(getwchar()) {
198 /*
199 * In the input stream, accept both the
200 * XPG5 sequences ESC-digit and the
201 * traditional BSD sequences ESC-ctrl.
202 */
203 case '\007':
204 /* FALLTHROUGH */
205 case RLF:
206 addto_lineno(&cur_line, -2);
207 break;
208 case '\010':
209 /* FALLTHROUGH */
210 case RHLF:
211 addto_lineno(&cur_line, -1);
212 break;
213 case '\011':
214 /* FALLTHROUGH */
215 case FHLF:
216 addto_lineno(&cur_line, 1);
217 if (cur_line > max_line)
218 max_line = cur_line;
219 }
220 continue;
221 case NL:
222 addto_lineno(&cur_line, 2);
223 if (cur_line > max_line)
224 max_line = cur_line;
225 cur_col = 0;
226 continue;
227 case SPACE:
228 ++cur_col;
229 continue;
230 case SI:
231 cur_set = CS_NORMAL;
232 continue;
233 case SO:
234 cur_set = CS_ALTERNATE;
235 continue;
236 case TAB: /* adjust column */
237 cur_col |= 7;
238 ++cur_col;
239 continue;
240 case VT:
241 addto_lineno(&cur_line, -2);
242 continue;
243 }
244 if (iswspace(ch)) {
245 if ((width = wcwidth(ch)) > 0)
246 cur_col += width;
247 continue;
248 }
249 if (!pass_unknown_seqs)
250 continue;
251 }
252
253 /* Must stuff ch in a line - are we at the right one? */
254 if (cur_line + adjust != this_line) {
255 LINE *lnew;
256
257 /* round up to next line */
258 adjust = !fine && (cur_line & 1);
259
260 if (cur_line + adjust < this_line) {
261 while (cur_line + adjust < this_line &&
262 l->l_prev != NULL) {
263 l = l->l_prev;
264 this_line--;
265 }
266 if (cur_line + adjust < this_line) {
267 if (nflushd_lines == 0) {
268 /*
269 * Allow backup past first
270 * line if nothing has been
271 * flushed yet.
272 */
273 while (cur_line + adjust
274 < this_line) {
275 lnew = alloc_line();
276 l->l_prev = lnew;
277 lnew->l_next = l;
278 l = lines = lnew;
279 extra_lines++;
280 this_line--;
281 }
282 } else {
283 if (!warned++)
284 dowarn(cur_line);
285 cur_line = this_line - adjust;
286 }
287 }
288 } else {
289 /* may need to allocate here */
290 while (cur_line + adjust > this_line) {
291 if (l->l_next == NULL) {
292 l->l_next = alloc_line();
293 l->l_next->l_prev = l;
294 }
295 l = l->l_next;
296 this_line++;
297 }
298 }
299 if (this_line > nflushd_lines &&
300 this_line - nflushd_lines >=
301 max_bufd_lines + BUFFER_MARGIN) {
302 if (extra_lines) {
303 flush_lines(extra_lines);
304 extra_lines = 0;
305 }
306 flush_lines(this_line - nflushd_lines -
307 max_bufd_lines);
308 nflushd_lines = this_line - max_bufd_lines;
309 }
310 }
311 /* grow line's buffer? */
312 if (l->l_line_len + 1 >= l->l_lsize) {
313 int need;
314
315 need = l->l_lsize ? l->l_lsize * 2 : 90;
316 if ((l->l_line = realloc(l->l_line,
317 (unsigned)need * sizeof(CHAR))) == NULL)
318 err(EXIT_FAILURE, NULL);
319 l->l_lsize = need;
320 }
321 c = &l->l_line[l->l_line_len++];
322 c->c_char = ch;
323 c->c_set = cur_set;
324 c->c_column = cur_col;
325 c->c_width = wcwidth(ch);
326 /*
327 * If things are put in out of order, they will need sorting
328 * when it is flushed.
329 */
330 if (cur_col < l->l_max_col)
331 l->l_needs_sort = 1;
332 else
333 l->l_max_col = cur_col;
334 if (c->c_width > 0)
335 cur_col += c->c_width;
336 }
337 if (ferror(stdin))
338 err(EXIT_FAILURE, NULL);
339 if (extra_lines) {
340 /*
341 * Extra lines only exist if no lines have been flushed
342 * yet. This means that 'lines' must point to line zero
343 * after we flush the extra lines.
344 */
345 flush_lines(extra_lines);
346 l = lines;
347 this_line = 0;
348 }
349
350 /* goto the last line that had a character on it */
351 for (; l->l_next; l = l->l_next)
352 this_line++;
353 flush_lines(this_line - nflushd_lines + 1);
354
355 /* make sure we leave things in a sane state */
356 if (last_set != CS_NORMAL)
357 PUTC(SI);
358
359 /* flush out the last few blank lines */
360 if (max_line >= this_line)
361 nblank_lines = max_line - this_line + (max_line & 1);
362 if (nblank_lines == 0)
363 /* end with a newline even if the source doesn't */
364 nblank_lines = 2;
365 flush_blanks();
366 exit(EXIT_SUCCESS);
367}
368
369/*
370 * Prints the first 'nflush' lines. Printed lines are freed.
371 * After this function returns, 'lines' points to the first
372 * of the remaining lines, and 'nblank_lines' will have the
373 * number of half line feeds between the final flushed line
374 * and the first remaining line.
375 */
376static void
377flush_lines(int nflush)
378{
379 LINE *l;
380
381 while (--nflush >= 0) {
382 l = lines;
383 lines = l->l_next;
384 if (l->l_line) {
385 flush_blanks();
386 flush_line(l);
387 free(l->l_line);
388 }
389 if (l->l_next)
390 nblank_lines++;
391 free_line(l);
392 }
393 if (lines)
394 lines->l_prev = NULL;
395}
396
397/*
398 * Print a number of newline/half newlines.
399 * nblank_lines is the number of half line feeds.
400 */
401static void
402flush_blanks(void)
403{
404 int half, i, nb;
405
406 half = 0;
407 nb = nblank_lines;
408 if (nb & 1) {
409 if (fine)
410 half = 1;
411 else
412 nb++;
413 }
414 nb /= 2;
415 for (i = nb; --i >= 0;)
416 PUTC('\n');
417 if (half) {
418 PUTC(ESC);
419 PUTC(FHLF);
420 if (!nb)
421 PUTC('\r');
422 }
423 nblank_lines = 0;
424}
425
426/*
427 * Write a line to stdout taking care of space to tab conversion (-h flag)
428 * and character set shifts.
429 */
430static void
431flush_line(LINE *l)
432{
433 CHAR *c, *endc;
434 int i, j, nchars, last_col, save, this_col, tot;
435
436 last_col = 0;
437 nchars = l->l_line_len;
438
439 if (l->l_needs_sort) {
440 static CHAR *sorted;
441 static int count_size, *count, sorted_size;
442
443 /*
444 * Do an O(n) sort on l->l_line by column being careful to
445 * preserve the order of characters in the same column.
446 */
447 if (l->l_lsize > sorted_size) {
448 sorted_size = l->l_lsize;
449 if ((sorted = realloc(sorted,
450 sizeof(CHAR) * (size_t)sorted_size)) == NULL)
451 err(EXIT_FAILURE, NULL);
452 }
453 if (l->l_max_col >= count_size) {
454 count_size = l->l_max_col + 1;
455 if ((count = realloc(count,
456 sizeof(int) * (size_t)count_size)) == NULL)
457 err(EXIT_FAILURE, NULL);
458 }
459 memset(count, 0, sizeof(int) * (size_t)l->l_max_col + 1);
460 for (i = nchars, c = l->l_line; --i >= 0; c++)
461 count[c->c_column]++;
462
463 /*
464 * calculate running total (shifted down by 1) to use as
465 * indices into new line.
466 */
467 for (tot = 0, i = 0; i <= l->l_max_col; i++) {
468 save = count[i];
469 count[i] = tot;
470 tot += save;
471 }
472
473 for (i = nchars, c = l->l_line; --i >= 0; c++)
474 sorted[count[c->c_column]++] = *c;
475 c = sorted;
476 } else
477 c = l->l_line;
478 while (nchars > 0) {
479 this_col = c->c_column;
480 endc = c;
481 do {
482 ++endc;
483 } while (--nchars > 0 && this_col == endc->c_column);
484
485 /* if -b only print last character */
486 if (no_backspaces) {
487 c = endc - 1;
488 if (nchars > 0 &&
489 this_col + c->c_width > endc->c_column)
490 continue;
491 }
492
493 if (this_col > last_col) {
494 int nspace = this_col - last_col;
495
496 if (compress_spaces && nspace > 1) {
497 while (1) {
498 int tab_col, tab_size;
499
500 tab_col = (last_col + 8) & ~7;
501 if (tab_col > this_col)
502 break;
503 tab_size = tab_col - last_col;
504 if (tab_size == 1)
505 PUTC(' ');
506 else
507 PUTC('\t');
508 nspace -= tab_size;
509 last_col = tab_col;
510 }
511 }
512 while (--nspace >= 0)
513 PUTC(' ');
514 last_col = this_col;
515 }
516
517 for (;;) {
518 if (c->c_set != last_set) {
519 switch (c->c_set) {
520 case CS_NORMAL:
521 PUTC(SI);
522 break;
523 case CS_ALTERNATE:
524 PUTC(SO);
525 }
526 last_set = c->c_set;
527 }
528 PUTC(c->c_char);
529 if ((c + 1) < endc)
530 for (j = 0; j < c->c_width; j++)
531 PUTC('\b');
532 if (++c >= endc)
533 break;
534 }
535 last_col += (c - 1)->c_width;
536 }
537}
538
539/*
540 * Increment or decrement a line number, checking for overflow.
541 * Stop one below INT_MAX such that the adjust variable is safe.
542 */
543void
544addto_lineno(int *lno, int offset)
545{
546 if (offset > 0) {
547 if (*lno >= INT_MAX - offset)
548 errx(EXIT_FAILURE, "too many lines");
549 } else {
550 if (*lno < INT_MIN - offset)
551 errx(EXIT_FAILURE, "too many reverse line feeds");
552 }
553 *lno += offset;
554}
555
556#define NALLOC 64
557
558static LINE *line_freelist;
559
560static LINE *
561alloc_line(void)
562{
563 LINE *l;
564 int i;
565
566 if (!line_freelist) {
567 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
568 err(EXIT_FAILURE, NULL);
569 line_freelist = l;
570 for (i = 1; i < NALLOC; i++, l++)
571 l->l_next = l + 1;
572 l->l_next = NULL;
573 }
574 l = line_freelist;
575 line_freelist = l->l_next;
576
577 memset(l, 0, sizeof(LINE));
578 return (l);
579}
580
581static void
582free_line(LINE *l)
583{
584
585 l->l_next = line_freelist;
586 line_freelist = l;
587}
588
589static void
590usage(void)
591{
592
593 (void)fprintf(stderr, "Usage: %s [-bfhpx] [-l nline]\n", getprogname());
594 exit(EXIT_FAILURE);
595}
596
597static void
598dowarn(int line)
599{
600
601 warnx("warning: can't back up %s",
602 line < 0 ? "past first line" : "-- line already flushed");
603}