main
drudge
1#!/bin/sh
2# fetch n format drudge report headlines
3set -eu
4
5url=https://www.drudgereport.com/
6limit=5
7
8usage() {
9 printf 'usage: %s [-n count]\n' "${0##*/}" >&2
10 exit 2
11}
12
13while getopts 'n:' opt; do
14 case $opt in
15 n)
16 limit=$OPTARG
17 ;;
18 *)
19 usage
20 ;;
21 esac
22done
23
24shift $((OPTIND - 1))
25
26[ "$#" -eq 0 ] || usage
27
28case $limit in
29 ''|*[!0-9]*)
30 usage
31 ;;
32esac
33
34[ "$limit" -gt 0 ] 2>/dev/null || usage
35
36curl -L -s "$url" |
37tr '\n' ' ' |
38sed 's#</[Aa]>#</A>\
39#g; s#<[Aa][[:space:]]#\
40<A #g' |
41awk -v limit="$limit" '
42BEGIN {
43 n = 0
44}
45
46{
47 segment = $0
48 sub(/^[[:space:]]*/, "", segment)
49
50 if (segment !~ /^<[Aa][[:space:]]/)
51 next
52
53 if (segment !~ /<\/[Aa]>/)
54 next
55
56 href = segment
57 sub(/^.*[Hh][Rr][Ee][Ff]="/, "", href)
58 sub(/".*$/, "", href)
59
60 text = segment
61 sub(/^<[Aa][^>]*>/, "", text)
62 sub(/<\/[Aa]>.*/, "", text)
63 gsub(/<[^>]*>/, " ", text)
64 gsub(/ /, " ", text)
65 gsub(/'/, "'"'"'", text)
66 gsub(/"/, "\"", text)
67 gsub(/&/, "\\&", text)
68 gsub(/[[:space:]][[:space:]]*/, " ", text)
69 sub(/^ /, "", text)
70 sub(/ $/, "", text)
71
72 if (href ~ /^https?:\/\/(www\.)?drudgereport\.com\/?$/)
73 next
74 if (text == "")
75 next
76
77 print text
78 print href
79
80 n++
81 if (n == limit)
82 exit
83}
84'