0686c215b6898f78ac4a394b5e3c11f040913684
[bollux.git/.git] / bollux
1 #!/usr/bin/env bash
2 # bollux: a bash gemini client
3 # Author: Case Duckworth
4 # License: MIT
5 # Version: 0.1
6
7 # Program information
8 PRGN="${0##*/}"
9 VRSN=0.1
10 # State
11 REDIRECTS=0
12
13 bollux_usage() {
14         cat <<END
15 $PRGN (v. $VRSN): a bash gemini client
16 usage:
17         $PRGN [-h]
18         $PRGN [-q] [-v] [URL]
19 flags:
20         -h      show this help and exit
21         -q      be quiet: log no messages
22         -v      verbose: log more messages
23 parameters:
24         URL     the URL to start in
25                 If not provided, the user will be prompted.
26 END
27 }
28
29 run() {
30         log debug "$@"
31         "$@"
32 }
33
34 die() {
35         ec="$1"
36         shift
37         log error "$*"
38         exit "$ec"
39 }
40
41 trim() { sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'; }
42
43 log() {
44         [[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return
45         case "$1" in
46         d* | D*) # debug
47                 [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return
48                 fmt=34
49                 ;;
50         e* | E*) # error
51                 fmt=31
52                 ;;
53         *) fmt=1 ;;
54         esac
55         shift
56         printf >&2 '\e[%sm%s:\e[0m\t%s\n' "$fmt" "$PRGN" "$*"
57 }
58
59 # main entry point
60 bollux() {
61         run bollux_args "$@"
62         run bollux_config
63
64         if [[ ! "${BOLLUX_URL:+isset}" ]]; then
65                 run prompt GO BOLLUX_URL
66         fi
67
68         run blastoff "$BOLLUX_URL"
69 }
70
71 bollux_args() {
72         while getopts :hvq OPT; do
73                 case "$OPT" in
74                 h)
75                         bollux_usage
76                         exit
77                         ;;
78                 v) BOLLUX_LOGLEVEL=DEBUG ;;
79                 q) BOLLUX_LOGLEVEL=QUIET ;;
80                 :) die 1 "Option -$OPTARG requires an argument" ;;
81                 *) die 1 "Unknown option: -$OPTARG" ;;
82                 esac
83         done
84         shift $((OPTIND - 1))
85         if (($# == 1)); then
86                 BOLLUX_URL="$1"
87         fi
88 }
89
90 bollux_config() {
91         : "${BOLLUX_CONFIG:=${XDG_CONFIG_DIR:-$HOME/.config}/bollux/config}"
92
93         if [ -f "$BOLLUX_CONFIG" ]; then
94                 # shellcheck disable=1090
95                 . "$BOLLUX_CONFIG"
96         else
97                 log debug "Can't load config file '$BOLLUX_CONFIG'."
98         fi
99
100         : "${BOLLUX_DOWNDIR:=.}"                   # where to save downloads
101         : "${BOLLUX_LOGLEVEL:=3}"                  # log level
102         : "${BOLLUX_MAXREDIR:=5}"                  # max redirects
103         : "${BOLLUX_PORT:=1965}"                   # port number
104         : "${BOLLUX_PROTO:=gemini}"                # default protocol
105         : "${BOLLUX_LESSKEY:=/tmp/bollux-lesskey}" # where to store binds
106         : "${BOLLUX_PAGESRC:=/tmp/bollux-src}"     # where to save the page source
107         : "${BOLLUX_URL:=}"                        # start url
108 }
109
110 prompt() {
111         prompt="$1"
112         shift
113         read </dev/tty -e -r -p "$prompt> " "$@"
114 }
115
116 blastoff() { # load a url
117         local well_formed=true
118         if [[ "$1" == "-u" ]]; then
119                 well_formed=false
120                 shift
121         fi
122         URL="$1"
123
124         if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
125                 URL="$(run transform_resource "$BOLLUX_URL" "$1")"
126         fi
127         [[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL"
128         URL="$(trim <<<"$URL")"
129
130         server="${URL#*://}"
131         server="${server%%/*}"
132
133         run request_url "$server" "$BOLLUX_PORT" "$URL" |
134                 run handle_response "$URL"
135 }
136
137 transform_resource() { # transform_resource BASE_URL REFERENCE_URL
138         declare -A R B T # reference, base url, target
139         eval "$(parse_url B "$1")"
140         eval "$(parse_url R "$2")"
141         # A non-strict parser may ignore a scheme in the reference
142         # if it is identical to the base URI's scheme.
143         if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
144                 unset "${R[scheme]}"
145         fi
146
147         # basically pseudo-code from spec ported to bash
148         if isdefined "R[scheme]"; then
149                 T[scheme]="${R[scheme]}"
150                 isdefined "R[authority]" && T[authority]="${R[authority]}"
151                 isdefined R[path] &&
152                         T[path]="$(remove_dot_segments "${R[path]}")"
153                 isdefined "R[query]" && T[query]="${R[query]}"
154         else
155                 if isdefined "R[authority]"; then
156                         T[authority]="${R[authority]}"
157                         isdefined "R[authority]" &&
158                                 T[path]="$(remove_dot_segments "${R[path]}")"
159                         isdefined R[query] && T[query]="${R[query]}"
160                 else
161                         if isempty "R[path]"; then
162                                 T[path]="${B[path]}"
163                                 if isdefined R[query]; then
164                                         T[query]="${R[query]}"
165                                 else
166                                         T[query]="${B[query]}"
167                                 fi
168                         else
169                                 if [[ "${R[path]}" == /* ]]; then
170                                         T[path]="$(remove_dot_segments "${R[path]}")"
171                                 else
172                                         T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")"
173                                         T[path]="$(remove_dot_segments "${T[path]}")"
174                                 fi
175                                 isdefined R[query] && T[query]="${R[query]}"
176                         fi
177                         T[authority]="${B[authority]}"
178                 fi
179                 T[scheme]="${B[scheme]}"
180         fi
181         isdefined R[fragment] && T[fragment]="${R[fragment]}"
182         # cf. 5.3 -- recomposition
183         local r=""
184         isdefined "T[scheme]" && r="$r${T[scheme]}:"
185         isdefined "T[authority]" && r="$r//${T[authority]}"
186         r="$r${T[path]}"
187         isdefined T[query] && r="$r?${T[query]}"
188         isdefined T[fragment] && r="$r#${T[fragment]}"
189         printf '%s\n' "$r"
190 }
191
192 merge_paths() { # 5.2.3
193         # shellcheck disable=2034
194         B_authority="$1"
195         B_path="$2"
196         R_path="$3"
197         # if R_path is empty, get rid of // in B_path
198         if [[ -z "$R_path" ]]; then
199                 printf '%s\n' "${B_path//\/\//\//}"
200                 return
201         fi
202
203         if isdefined "B_authority" && isempty "B_path"; then
204                 printf '/%s\n' "${R_path//\/\//\//}"
205         else
206                 if [[ "$B_path" == */* ]]; then
207                         B_path="${B_path%/*}/"
208                 else
209                         B_path=""
210                 fi
211                 printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
212         fi
213 }
214
215 remove_dot_segments() { # 5.2.4
216         local input="$1"
217         local output=
218         # ^/\.(/|$) - BASH_REMATCH[0]
219         while [[ "$input" ]]; do
220                 if [[ "$input" =~ ^\.\.?/ ]]; then
221                         input="${input#${BASH_REMATCH[0]}}"
222                 elif [[ "$input" =~ ^/\.(/|$) ]]; then
223                         input="/${input#${BASH_REMATCH[0]}}"
224                 elif [[ "$input" =~ ^/\.\.(/|$) ]]; then
225                         input="/${input#${BASH_REMATCH[0]}}"
226                         [[ "$output" =~ /?[^/]+$ ]]
227                         output="${output%${BASH_REMATCH[0]}}"
228                 elif [[ "$input" == . || "$input" == .. ]]; then
229                         input=
230                 else
231                         [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
232                         output="$output${BASH_REMATCH[1]}"
233                         input="${BASH_REMATCH[2]}"
234                 fi
235         done
236         printf '%s\n' "${output//\/\//\//}"
237 }
238
239 parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
240         local name="$1"
241         local string="$2"
242         local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
243         [[ $string =~ $re ]] || return $?
244
245         local scheme="${BASH_REMATCH[2]}"
246         local authority="${BASH_REMATCH[4]}"
247         local path="${BASH_REMATCH[5]}"
248         local query="${BASH_REMATCH[7]}"
249         local fragment="${BASH_REMATCH[9]}"
250
251         for c in scheme authority query fragment; do
252                 [[ "${!c}" ]] &&
253                         printf '%s[%s]=%q\n' "$name" "$c" "${!c}"
254         done
255         # unclear if the path is always set even if empty but it looks that way
256         printf '%s[path]=%q\n' "$name" "$path"
257 }
258
259 # is a NAME defined ('set' in bash)?
260 isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME
261 # is a NAME defined AND empty?
262 isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME
263
264 request_url() {
265         local server="$1"
266         local port="$2"
267         local url="$3"
268
269         ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port")
270         ssl_cmd+=(-servername "$server") # SNI
271         run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null
272 }
273
274 handle_response() {
275         local url="$1" code meta
276
277         while read -r -d $'\r' hdr; do
278                 code="$(gawk '{print $1}' <<<"$hdr")"
279                 meta="$(
280                         gawk '{for(i=2;i<=NF;i++)printf "%s ",$i;printf "\n"}' <<<"$hdr"
281                 )"
282                 break
283         done
284
285         log x "[$code] $meta"
286
287         case "$code" in
288         1*)
289                 REDIRECTS=0
290                 BOLLUX_URL="$URL"
291                 run prompt "$meta" QUERY
292                 run blastoff "?$QUERY"
293                 ;;
294         2*)
295                 REDIRECTS=0
296                 BOLLUX_URL="$URL"
297                 run display "$meta"
298                 ;;
299         3*)
300                 ((REDIRECTS += 1))
301                 if ((REDIRECTS > BOLLUX_MAXREDIR)); then
302                         die $((100 + code)) "Too many redirects!"
303                 fi
304                 BOLLUX_URL="$URL"
305                 run blastoff "$meta"
306                 ;;
307         4*)
308                 REDIRECTS=0
309                 die "$((100 + code))" "$code"
310                 ;;
311         5*)
312                 REDIRECTS=0
313                 die "$((100 + code))" "$code"
314                 ;;
315         6*)
316                 REDIRECTS=0
317                 die "$((100 + code))" "$code"
318                 ;;
319         *) die "$((100 + code)) Unknown response code: $code." ;;
320         esac
321 }
322
323 display() {
324         case "$1" in
325         *\;*)
326                 mime="$(cut -d\; -f1 <<<"$1" | trim)"
327                 charset="$(cut -d\; -f2 <<<"$1" | trim)"
328                 ;;
329         *) mime="$(trim <<<"$1")" ;;
330         esac
331
332         [[ -z "$mime" ]] && mime="text/gemini"
333         if [[ -z "$charset" ]]; then
334                 charset="utf-8"
335         else
336                 charset="${charset#charset=}"
337         fi
338
339         log debug "mime=$mime; charset=$charset"
340
341         case "$mime" in
342         text/*)
343                 less_cmd=(less -R)
344                 {
345                         [[ -r "$BOLLUX_LESSKEY" ]] || mklesskey "$BOLLUX_LESSKEY"
346                 } && less_cmd+=(-k "$BOLLUX_LESSKEY")
347                 less_cmd+=(
348                         -Pm'bollux$'
349                         -PM'o\:open, g\:goto, r\:refresh$'
350                         -M
351                 )
352
353                 submime="${mime#*/}"
354                 if declare -F | grep -q "$submime"; then
355                         log d "typeset_$submime"
356                         {
357                                 normalize_crlf |
358                                         tee "$BOLLUX_PAGESRC" |
359                                         run "typeset_$submime" |
360                                         run "${less_cmd[@]}"
361                         } || run handle_keypress "$?"
362                 else
363                         log "cat"
364                         {
365                                 normalize_crlf |
366                                         tee "$BOLLUX_PAGESRC" |
367                                         run "${less_cmd[@]}"
368                         } || run handle_keypress "$?"
369                 fi
370                 ;;
371         *) run download "$BOLLUX_URL" ;;
372         esac
373 }
374
375 mklesskey() {
376         lesskey -o "$1" - <<-END
377                 #command
378                 o quit 0 # 48 open a link
379                 g quit 1 # 49 goto a url
380                 [ quit 2 # 50 back
381                 ] quit 3 # 51 forward
382                 r quit 4 # 52 re-request / download
383         END
384 }
385
386 normalize_crlf() {
387         gawk 'BEGIN{RS="\n\n"}{gsub(/\r\n?/,"\n");print;print ""}'
388 }
389
390 typeset_gemini() {
391         gawk '
392         BEGIN {
393                 pre = 0
394                 margin = margin ? margin : 4
395                 txs = ""        
396                 lns = "\033[1m" 
397                 lus = "\033[36m"        
398                 lts = "\033[4m" 
399                 pfs = ""        
400                 h1s = "\033[1;4m"       
401                 h2s = "\033[1m" 
402                 h3s = "\033[3m" 
403                 lis = ""        
404                 res = "\033[0m" 
405                 ms = "\033[35m"
406         }
407         /```/ {
408                 pre = ! pre
409                 next
410         }
411         pre {
412                 mark = "```"
413                 fmt = pfs "%s" res
414                 text = $0
415         }
416         /^#/ {
417                 match($0, /#+/)
418                 mark = substr($0, RSTART, RLENGTH)
419                 sub(/#+[[:space:]]*/, "", $0)
420                 level = length(mark)
421                 if (level == 1) {
422                         fmt = h1s "%s" res
423                 } else if (level == 2) {
424                         fmt = h2s "%s" res
425                 } else {
426                         fmt = h3s "%s" res
427                 }
428         }
429         /^=>/ {
430                 mark = "=>"
431                 sub(/=>[[:space:]]*/, "", $0)
432                 desc = $1
433                 text = ""
434                 for (w = 2; w <= NF; w++) {
435                         text = text (text ? " " : "") $w
436                 }
437                 fmt = lns "[" (++ln) "]" res " " lts "%s" res "\t" lus "%s" res
438         }
439         /^\*[[:space:]]/ {
440                 mark = "*"
441                 sub(/\*[[:space:]]*/, "", $0)
442                 fmt = lis "%s" res
443         }
444         {
445                 mark = mark ? mark : mark
446                 fmt = fmt ? fmt : "%s"
447                 text = text ? text : $0
448                 desc = desc ? desc : ""
449                 printf ms "%" (margin-1) "s " res fmt "\n", mark, text, desc
450                 mark = fmt = text = desc = ""
451         }
452         '
453 }
454
455 handle_keypress() {
456         case "$1" in
457         48) # o - open a link -- show a menu of links on the page
458                 run select_url "$BOLLUX_PAGESRC"
459                 ;;
460         49) # g - goto a url -- input a new url
461                 prompt GO URL
462                 run blastoff -u "$URL"
463                 ;;
464         50) # [ - back in the history
465                 run history_back
466                 ;;
467         51) # ] - forward in the history
468                 run history_forward
469                 ;;
470         52) # r - re-request the current resource
471                 run blastoff "$BOLLUX_URL"
472                 ;;
473         *) # 53-57 -- still available for binding
474                 ;;
475         esac
476 }
477
478 select_url() {
479         run mapfile -t < <(extract_links <"$1")
480         select u in "${MAPFILE[@]}"; do
481                 run blastoff "$(gawk '{print $1}' <<<"$u")" && break
482         done </dev/tty
483 }
484
485 extract_links() {
486         gawk -F$'\t' '
487         /^=>/ {
488                 sub(/=>[[:space:]]*/,"")
489                 if ($2) 
490                         printf "%s (\033[34m%s\033[0m)\n", $1, $2
491                 else
492                         printf "%s\n", $1
493         }'
494 }
495
496 download() {
497         tn="$(mktemp)"
498         log x "Downloading: '$BOLLUX_URL' => '$tn'..."
499         dd status=progress >"$tn"
500         fn="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}"
501         if [[ -f "$fn" ]]; then
502                 log x "Saved '$tn'."
503         elif mv "$tn" "$fn"; then
504                 log x "Saved '$fn'."
505         else
506                 log error "Error saving '$fn': downloaded to '$tn'."
507         fi
508 }
509
510 history_back() { log error "Not implemented."; }
511 history_forward() { log error "Not implemented."; }
512
513 if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
514         run bollux "$@"
515 else
516         BOLLUX_LOGLEVEL=DEBUG
517 fi