main drudge
 1#!/bin/sh
 2# fetch n format drudge report headlines
 3set -eu
 4
 5url=https://www.drudgereport.com/
 6limit=5
 7
 8usage() {
 9	printf 'usage: %s [-n count]\n' "${0##*/}" >&2
10	exit 2
11}
12
13while getopts 'n:' opt; do
14	case $opt in
15	n)
16		limit=$OPTARG
17		;;
18	*)
19		usage
20		;;
21	esac
22done
23
24shift $((OPTIND - 1))
25
26[ "$#" -eq 0 ] || usage
27
28case $limit in
29	''|*[!0-9]*)
30		usage
31		;;
32esac
33
34[ "$limit" -gt 0 ] 2>/dev/null || usage
35
36curl -L -s "$url" |
37tr '\n' ' ' |
38sed 's#</[Aa]>#</A>\
39#g; s#<[Aa][[:space:]]#\
40<A #g' |
41awk -v limit="$limit" '
42BEGIN {
43	n = 0
44}
45
46{
47	segment = $0
48	sub(/^[[:space:]]*/, "", segment)
49
50	if (segment !~ /^<[Aa][[:space:]]/)
51		next
52
53	if (segment !~ /<\/[Aa]>/)
54		next
55
56	href = segment
57	sub(/^.*[Hh][Rr][Ee][Ff]="/, "", href)
58	sub(/".*$/, "", href)
59
60	text = segment
61	sub(/^<[Aa][^>]*>/, "", text)
62	sub(/<\/[Aa]>.*/, "", text)
63	gsub(/<[^>]*>/, " ", text)
64	gsub(/&nbsp;/, " ", text)
65	gsub(/&#39;/, "'"'"'", text)
66	gsub(/&quot;/, "\"", text)
67	gsub(/&amp;/, "\\&", text)
68	gsub(/[[:space:]][[:space:]]*/, " ", text)
69	sub(/^ /, "", text)
70	sub(/ $/, "", text)
71
72	if (href ~ /^https?:\/\/(www\.)?drudgereport\.com\/?$/)
73		next
74	if (text == "")
75		next
76
77	print text
78	print href
79
80	n++
81	if (n == limit)
82		exit
83}
84'