1/* see license file for copyright and license details */
2
3
4#include "util.h"
5#include "arg.h"
6#include "tls.h"
7
8#include <arpa/inet.h>
9#include <ctype.h>
10#include <errno.h>
11#include <fcntl.h>
12#include <netdb.h>
13#include <netinet/in.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <sys/socket.h>
18#include <sys/stat.h>
19#include <sys/types.h>
20#include <unistd.h>
21
22struct Stream {
23 struct TlsSocket *ts;
24 char buf[8192];
25 size_t len;
26 size_t idx;
27};
28
29static int qflag = 0;
30static int Sflag = 0;
31static int cflag = 0;
32static int spider = 0;
33static int no_check_certificate = 0;
34static int timeout_sec = 900;
35static char *Pflag = NULL;
36static char *Oflag = NULL;
37static char *user_agent = "wget/aruu";
38static char *post_data = NULL;
39static char *post_file = NULL;
40static char **custom_headers = NULL;
41static size_t custom_headers_num = 0;
42
43static void
44usage(void)
45{
46 eprintf("usage: %s [-cqS] [-O file] [-P dir] [-T timeout] [-U user_agent] "
47 "[-post-data data] [-post-file file] [-header header] "
48 "[-no-check-certificate] [-spider] url\n", argv0);
49}
50
51static void
52add_header(const char *hdr)
53{
54 custom_headers = ereallocarray(custom_headers, custom_headers_num + 1, sizeof(*custom_headers));
55 custom_headers[custom_headers_num++] = estrdup(hdr);
56}
57
58static int
59dial(const char *host, const char *port)
60{
61 struct addrinfo hints, *res, *rp;
62 int fd = -1, r;
63
64 memset(&hints, 0, sizeof(hints));
65 hints.ai_family = AF_UNSPEC;
66 hints.ai_socktype = SOCK_STREAM;
67
68 r = getaddrinfo(host, port, &hints, &res);
69 if (r != 0) {
70 if (!qflag)
71 weprintf("getaddrinfo %s:%s: %s\n", host, port, gai_strerror(r));
72 return -1;
73 }
74
75 for (rp = res; rp; rp = rp->ai_next) {
76 fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
77 if (fd < 0)
78 continue;
79 if (timeout_sec > 0) {
80 struct timeval tv;
81 tv.tv_sec = timeout_sec;
82 tv.tv_usec = 0;
83 setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
84 setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
85 }
86 if (connect(fd, rp->ai_addr, rp->ai_addrlen) == 0)
87 break;
88 close(fd);
89 fd = -1;
90 }
91
92 freeaddrinfo(res);
93 return fd;
94}
95
96static void
97parse_url(char *url, char **host, char **port, char **path, int *is_tls)
98{
99 char *p, *ss;
100
101 *is_tls = 0;
102 if (strncasecmp(url, "http://", 7) == 0) {
103 url += 7;
104 } else if (strncasecmp(url, "https://", 8) == 0) {
105 url += 8;
106 *is_tls = 1;
107 } else {
108 eprintf("unsupported protocol or invalid url: %s\n", url);
109 }
110
111 *host = url;
112 p = strchr(url, '/');
113 if (p) {
114 *p = '\0';
115 *path = p + 1;
116 } else {
117 *path = "";
118 }
119
120 /* handle ipv6 brackets or host:port */
121 if (**host == '[') {
122 (*host)++;
123 ss = strchr(*host, ']');
124 if (ss) {
125 *ss = '\0';
126 ss++;
127 if (*ss == ':')
128 *port = ss + 1;
129 else
130 *port = *is_tls ? "443" : "80";
131 } else {
132 eprintf("invalid ipv6 literal: %s\n", *host);
133 }
134 } else {
135 p = strrchr(*host, ':');
136 if (p) {
137 *p = '\0';
138 *port = p + 1;
139 } else {
140 *port = *is_tls ? "443" : "80";
141 }
142 }
143}
144
145static char *
146find_header(const char *headers, const char *name)
147{
148 const char *p;
149 size_t len = strlen(name);
150
151 p = headers;
152 while (p && *p) {
153 if (strncasecmp(p, name, len) == 0) {
154 p += len;
155 while (*p == ' ' || *p == '\t')
156 p++;
157 len = strcspn(p, "\r\n");
158 return estrndup(p, len);
159 }
160 p = strchr(p, '\n');
161 if (p)
162 p++;
163 }
164 return NULL;
165}
166
167static int
168stream_getc(struct Stream *s)
169{
170 ssize_t r;
171
172 if (s->idx < s->len) {
173 return (unsigned char)s->buf[s->idx++];
174 }
175 s->idx = 0;
176 r = tls_read(s->ts, s->buf, sizeof(s->buf));
177 if (r <= 0) {
178 s->len = 0;
179 return EOF;
180 }
181 s->len = (size_t)r;
182 return (unsigned char)s->buf[s->idx++];
183}
184
185static size_t
186stream_read(struct Stream *s, void *ptr, size_t size)
187{
188 size_t total = 0;
189 size_t n;
190 char *p = ptr;
191 ssize_t r;
192
193 while (total < size) {
194 if (s->idx < s->len) {
195 n = MIN(size - total, s->len - s->idx);
196 memcpy(p + total, s->buf + s->idx, n);
197 s->idx += n;
198 total += n;
199 } else {
200 s->idx = 0;
201 r = tls_read(s->ts, s->buf, sizeof(s->buf));
202 if (r <= 0) {
203 s->len = 0;
204 break;
205 }
206 s->len = (size_t)r;
207 }
208 }
209 return total;
210}
211
212static void
213read_chunked(struct Stream *s, int out_fd)
214{
215 char line[128];
216 char chunk_buf[8192];
217 size_t line_len, n;
218 long long chunk_size, remaining;
219 int c;
220
221 for (;;) {
222 line_len = 0;
223 for (;;) {
224 c = stream_getc(s);
225 if (c == EOF)
226 eprintf("unexpected end of file reading chunk size\n");
227 if (c == '\n') {
228 line[line_len] = '\0';
229 break;
230 }
231 if (c != '\r' && line_len < sizeof(line) - 1) {
232 line[line_len++] = c;
233 }
234 }
235
236 chunk_size = strtoll(line, NULL, 16);
237 if (chunk_size == 0) {
238 stream_getc(s);
239 stream_getc(s);
240 break;
241 }
242
243 remaining = chunk_size;
244 while (remaining > 0) {
245 n = stream_read(s, chunk_buf, MIN(remaining, (long long)sizeof(chunk_buf)));
246 if (n == 0)
247 eprintf("unexpected end of file in chunk data\n");
248 if (writeall(out_fd, chunk_buf, n) < 0)
249 eprintf("write output:\n");
250 remaining -= n;
251 }
252
253 stream_getc(s);
254 stream_getc(s);
255 }
256}
257
258static void
259read_non_chunked(struct Stream *s, int out_fd, long long content_len)
260{
261 char chunk_buf[8192];
262 long long remaining = content_len;
263 size_t n, to_read;
264
265 while (content_len < 0 || remaining > 0) {
266 to_read = sizeof(chunk_buf);
267 if (content_len >= 0)
268 to_read = (size_t)MIN(remaining, (long long)sizeof(chunk_buf));
269 n = stream_read(s, chunk_buf, to_read);
270 if (n == 0) {
271 if (content_len >= 0)
272 eprintf("unexpected end of file\n");
273 break;
274 }
275 if (writeall(out_fd, chunk_buf, n) < 0)
276 eprintf("write output:\n");
277 if (content_len >= 0)
278 remaining -= n;
279 }
280}
281
282static void
283req_printf(struct TlsSocket *ts, const char *fmt, ...)
284{
285 va_list ap;
286 char buf[1024];
287 int len;
288
289 va_start(ap, fmt);
290 len = vsnprintf(buf, sizeof(buf), fmt, ap);
291 va_end(ap);
292 if (len > 0)
293 tls_write(ts, buf, len);
294}
295
296// ?man wget: retrieve files from the web
297// ?man arguments: url
298// ?man download files over http or https
299int
300main(int argc, char *argv[])
301{
302 struct Stream s;
303 char *url, *host, *port, *path, *loc;
304 char *curr_host, *curr_port, *curr_path;
305 char *new_url;
306 char *cl_str;
307 char *te_str;
308 char *header_end;
309 char *out_name;
310 int redirects = 0;
311 int max_redirects = 20;
312 int sock_fd = -1;
313 int out_fd = 1;
314 int chunked;
315 int status;
316 long long content_len;
317 size_t total_read;
318 ssize_t n;
319 size_t dir_len;
320 char *last_slash;
321 int is_tls = 0;
322 struct TlsSocket *tls_sock = NULL;
323 off_t resume_offset = 0;
324 int out_mode = O_WRONLY | O_CREAT | O_TRUNC;
325 long long post_len = 0;
326 int post_fd = -1;
327 size_t i;
328
329 ARGBEGIN {
330 // ?man -O:str: specify output file path
331 case 'O':
332 Oflag = EARGF(usage());
333 break;
334 // ?man -P:str: specify output directory prefix
335 case 'P':
336 Pflag = EARGF(usage());
337 break;
338 // ?man -T:num: set network read and connect timeout
339 case 'T':
340 timeout_sec = estrtonum(EARGF(usage()), 0, 100000);
341 break;
342 // ?man -U:str: set User-Agent header
343 case 'U':
344 user_agent = EARGF(usage());
345 break;
346 // ?man -c: continue retrieval of aborted transfer
347 case 'c':
348 cflag = 1;
349 break;
350 // ?man -q: quiet mode to suppress stderr output
351 case 'q':
352 qflag = 1;
353 break;
354 // ?man -S: print server response headers to stderr
355 case 'S':
356 Sflag = 1;
357 break;
358 // ?man --: specify - option
359 case '-':
360 if (strcmp(argv[0], "-no-check-certificate") == 0) {
361 no_check_certificate = 1;
362 brk_ = 1;
363 } else if (strncmp(argv[0], "-header=", 8) == 0) {
364 add_header(argv[0] + 8);
365 brk_ = 1;
366 } else if (strcmp(argv[0], "-header") == 0) {
367 brk_ = 1;
368 if (!argv[1])
369 usage();
370 add_header(argv[1]);
371 argv++;
372 argc--;
373 } else if (strncmp(argv[0], "-post-data=", 11) == 0) {
374 post_data = argv[0] + 11;
375 brk_ = 1;
376 } else if (strcmp(argv[0], "-post-data") == 0) {
377 brk_ = 1;
378 if (!argv[1])
379 usage();
380 post_data = argv[1];
381 argv++;
382 argc--;
383 } else if (strncmp(argv[0], "-post-file=", 11) == 0) {
384 post_file = argv[0] + 11;
385 brk_ = 1;
386 } else if (strcmp(argv[0], "-post-file") == 0) {
387 brk_ = 1;
388 if (!argv[1])
389 usage();
390 post_file = argv[1];
391 argv++;
392 argc--;
393 } else if (strcmp(argv[0], "-spider") == 0) {
394 spider = 1;
395 brk_ = 1;
396 } else {
397 usage();
398 }
399 break;
400 default:
401 usage();
402 } ARGEND
403
404 if (argc < 1)
405 usage();
406
407 url = estrdup(argv[0]);
408
409 /* determine output filename early to check for resume */
410 out_name = NULL;
411 if (Oflag) {
412 out_name = Oflag;
413 } else {
414 last_slash = strrchr(url, '/');
415 if (last_slash && *(last_slash + 1))
416 out_name = last_slash + 1;
417 else
418 out_name = "index.html";
419
420 if (Pflag) {
421 char *tmp = emalloc(strlen(Pflag) + 1 + strlen(out_name) + 1);
422 sprintf(tmp, "%s/%s", Pflag, out_name);
423 out_name = tmp;
424 }
425 }
426
427 if (cflag && out_name && strcmp(out_name, "-") != 0) {
428 struct stat st;
429 if (stat(out_name, &st) == 0 && S_ISREG(st.st_mode)) {
430 resume_offset = st.st_size;
431 }
432 }
433
434 if (post_data) {
435 post_len = strlen(post_data);
436 } else if (post_file) {
437 struct stat st;
438 post_fd = open(post_file, O_RDONLY);
439 if (post_fd < 0)
440 eprintf("open %s:\n", post_file);
441 if (fstat(post_fd, &st) < 0)
442 eprintf("stat %s:\n", post_file);
443 post_len = st.st_size;
444 }
445
446 while (!tls_sock) {
447 if (redirects > max_redirects)
448 eprintf("too many redirects\n");
449
450 curr_host = curr_port = curr_path = NULL;
451 parse_url(url, &curr_host, &curr_port, &curr_path, &is_tls);
452
453 host = estrdup(curr_host);
454 port = estrdup(curr_port);
455 path = estrdup(curr_path);
456
457 sock_fd = dial(host, port);
458 if (sock_fd < 0)
459 eprintf("failed to connect to %s:%s\n", host, port);
460
461 tls_sock = tls_connect(sock_fd, host, !no_check_certificate, is_tls);
462 if (!tls_sock) {
463 close(sock_fd);
464 eprintf("failed to establish TLS connection with %s\n", host);
465 }
466
467 /* send http request */
468 const char *method = spider ? "HEAD" : ((post_data || post_file) ? "POST" : "GET");
469 req_printf(tls_sock, "%s /%s HTTP/1.1\r\n", method, path);
470 req_printf(tls_sock, "Host: %s\r\n", host);
471 req_printf(tls_sock, "User-Agent: %s\r\n", user_agent);
472 req_printf(tls_sock, "Connection: close\r\n");
473
474 if (resume_offset > 0) {
475 req_printf(tls_sock, "Range: bytes=%lld-\r\n", (long long)resume_offset);
476 }
477
478 if (post_data || post_file) {
479 int has_ct = 0;
480 for (i = 0; i < custom_headers_num; i++) {
481 if (strncasecmp(custom_headers[i], "Content-Type:", 13) == 0) {
482 has_ct = 1;
483 break;
484 }
485 }
486 if (!has_ct) {
487 req_printf(tls_sock, "Content-Type: application/x-www-form-urlencoded\r\n");
488 }
489 req_printf(tls_sock, "Content-Length: %lld\r\n", post_len);
490 }
491
492 for (i = 0; i < custom_headers_num; i++) {
493 req_printf(tls_sock, "%s\r\n", custom_headers[i]);
494 }
495
496 req_printf(tls_sock, "\r\n");
497
498 if (post_data) {
499 tls_write(tls_sock, post_data, strlen(post_data));
500 } else if (post_file) {
501 char io_buf[8192];
502 ssize_t r;
503 while ((r = read(post_fd, io_buf, sizeof(io_buf))) > 0) {
504 if (tls_write(tls_sock, io_buf, r) < 0) {
505 eprintf("failed to write post data:\n");
506 }
507 }
508 close(post_fd);
509 post_fd = -1;
510 }
511
512 /* read headers */
513 total_read = 0;
514 header_end = NULL;
515 memset(s.buf, 0, sizeof(s.buf));
516 while (total_read < sizeof(s.buf) - 1) {
517 n = tls_read(tls_sock, s.buf + total_read, sizeof(s.buf) - 1 - total_read);
518 if (n <= 0) {
519 if (n < 0)
520 eprintf("read socket:\n");
521 else
522 eprintf("connection closed by server\n");
523 }
524 total_read += n;
525 s.buf[total_read] = '\0';
526 header_end = strstr(s.buf, "\r\n\r\n");
527 if (header_end)
528 break;
529 }
530
531 if (!header_end)
532 eprintf("http header too large or not found\n");
533
534 *header_end = '\0';
535 s.ts = tls_sock;
536 s.len = total_read;
537 s.idx = (header_end + 4) - s.buf;
538
539 if (Sflag) {
540 fprintf(stderr, "%s\n\n", s.buf);
541 }
542
543 if (strncasecmp(s.buf, "HTTP/1.1 ", 9) != 0 &&
544 strncasecmp(s.buf, "HTTP/1.0 ", 9) != 0) {
545 eprintf("invalid http response: %s\n", s.buf);
546 }
547 status = atoi(s.buf + 9);
548
549 if (status >= 300 && status < 400) {
550 loc = find_header(s.buf, "Location:");
551 if (!loc)
552 eprintf("redirect response without location header\n");
553
554 if (strncasecmp(loc, "http://", 7) == 0 ||
555 strncasecmp(loc, "https://", 8) == 0) {
556 new_url = estrdup(loc);
557 } else if (loc[0] == '/') {
558 new_url = emalloc(8 + strlen(host) + strlen(port) + strlen(loc) + 2);
559 sprintf(new_url, "%s://%s:%s%s", is_tls ? "https" : "http", host, port, loc);
560 } else {
561 last_slash = strrchr(path, '/');
562 dir_len = 0;
563 if (last_slash)
564 dir_len = last_slash - path + 1;
565 new_url = emalloc(8 + strlen(host) + strlen(port) + 1 + dir_len + strlen(loc) + 2);
566 sprintf(new_url, "%s://%s:%s/", is_tls ? "https" : "http", host, port);
567 if (dir_len > 0)
568 strncat(new_url, path, dir_len);
569 strcat(new_url, loc);
570 }
571
572 free(loc);
573 free(url);
574 url = new_url;
575 tls_close(tls_sock, 1);
576 tls_sock = NULL;
577 redirects++;
578 } else if (status == 206) {
579 out_mode = O_WRONLY | O_CREAT | O_APPEND;
580 } else if (status == 200) {
581 out_mode = O_WRONLY | O_CREAT | O_TRUNC;
582 } else if (status == 416) {
583 if (!qflag)
584 weprintf("file already fully retrieved or range invalid\n");
585 tls_close(tls_sock, 1);
586 free(url);
587 free(host);
588 free(port);
589 free(path);
590 return 0;
591 } else {
592 eprintf("server returned status: %d\n", status);
593 }
594
595 free(host);
596 free(port);
597 free(path);
598 }
599
600 if (spider) {
601 tls_close(tls_sock, 1);
602 free(url);
603 return 0;
604 }
605
606 cl_str = find_header(s.buf, "Content-Length:");
607 content_len = -1;
608 if (cl_str) {
609 content_len = strtoll(cl_str, NULL, 10);
610 free(cl_str);
611 }
612
613 te_str = find_header(s.buf, "Transfer-Encoding:");
614 chunked = 0;
615 if (te_str) {
616 if (strcasecmp(te_str, "chunked") == 0)
617 chunked = 1;
618 free(te_str);
619 }
620
621 if (strcmp(out_name, "-") != 0) {
622 out_fd = open(out_name, out_mode, 0644);
623 if (out_fd < 0)
624 eprintf("open %s:\n", out_name);
625 }
626
627 if (chunked)
628 read_chunked(&s, out_fd);
629 else
630 read_non_chunked(&s, out_fd, content_len);
631
632 tls_close(tls_sock, 1);
633 if (out_fd != 1)
634 close(out_fd);
635 if (Oflag != out_name && Pflag)
636 free(out_name);
637 free(url);
638
639 for (i = 0; i < custom_headers_num; i++) {
640 free(custom_headers[i]);
641 }
642 free(custom_headers);
643
644 return 0;
645}