2e0aa0e0521b3d7044f7f9c164556c340bc2ba7e
[bollux.git/.git] / bollux
1 #!/usr/bin/env bash
2 # bollux: a bash gemini client
3 # Author: Case Duckworth
4 # License: MIT
5 # Version: 0.1
6
7 # Program information
8 PRGN="${0##*/}"
9 VRSN=0.1
10 # State
11 REDIRECTS=0
12 # Bash options
13 # shopt -s extglob
14
15 bollux_usage() {
16         cat <<END
17 $PRGN (v. $VRSN): a bash gemini client
18 usage:
19         $PRGN [-h]
20         $PRGN [-q] [-v] [URL]
21 flags:
22         -h      show this help and exit
23         -q      be quiet: log no messages
24         -v      verbose: log more messages
25 parameters:
26         URL     the URL to start in
27                 If not provided, the user will be prompted.
28 END
29 }
30
31 run() {
32         log debug "$@"
33         "$@"
34 }
35
36 die() {
37         ec="$1"
38         shift
39         log error "$*"
40         exit "$ec"
41 }
42
43 # pure bash bible trim_string
44 trim() {
45         : "${1#"${1%%[![:space:]]*}"}"
46         : "${_%"${_##*[![:space:]]}"}"
47         printf '%s\n' "$_"
48 }
49
50 log() {
51         [[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return
52         case "$1" in
53         d* | D*) # debug
54                 [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return
55                 fmt=34
56                 ;;
57         e* | E*) # error
58                 fmt=31
59                 ;;
60         *) fmt=1 ;;
61         esac
62         shift
63         printf >&2 '\e[%sm%s:\e[0m\t%s\n' "$fmt" "$PRGN" "$*"
64 }
65
66 # main entry point
67 bollux() {
68         run bollux_args "$@"
69         run bollux_config
70
71         if [[ ! "${BOLLUX_URL:+isset}" ]]; then
72                 run prompt GO BOLLUX_URL
73         fi
74
75         run blastoff "$BOLLUX_URL"
76 }
77
78 bollux_args() {
79         while getopts :hvq OPT; do
80                 case "$OPT" in
81                 h)
82                         bollux_usage
83                         exit
84                         ;;
85                 v) BOLLUX_LOGLEVEL=DEBUG ;;
86                 q) BOLLUX_LOGLEVEL=QUIET ;;
87                 :) die 1 "Option -$OPTARG requires an argument" ;;
88                 *) die 1 "Unknown option: -$OPTARG" ;;
89                 esac
90         done
91         shift $((OPTIND - 1))
92         if (($# == 1)); then
93                 BOLLUX_URL="$1"
94         fi
95 }
96
97 bollux_config() {
98         : "${BOLLUX_CONFIG:=${XDG_CONFIG_DIR:-$HOME/.config}/bollux/config}"
99
100         if [ -f "$BOLLUX_CONFIG" ]; then
101                 # shellcheck disable=1090
102                 . "$BOLLUX_CONFIG"
103         else
104                 log debug "Can't load config file '$BOLLUX_CONFIG'."
105         fi
106
107         ## behavior
108         : "${BOLLUX_DOWNDIR:=.}"                   # where to save downloads
109         : "${BOLLUX_LOGLEVEL:=3}"                  # log level
110         : "${BOLLUX_MAXREDIR:=5}"                  # max redirects
111         : "${BOLLUX_PORT:=1965}"                   # port number
112         : "${BOLLUX_PROTO:=gemini}"                # default protocol
113         : "${BOLLUX_LESSKEY:=/tmp/bollux-lesskey}" # where to store binds
114         : "${BOLLUX_PAGESRC:=/tmp/bollux-src}"     # where to save the page source
115         : "${BOLLUX_URL:=}"                        # start url
116         ## typesetting
117         : "${T_MARGIN:=4}"      # left and right margin
118         : "${T_WIDTH:=0}"       # width of the viewport -- 0 = get term width
119         # colors -- these will be wrapped in \e[ __ m
120         C_RESET='\e[0m'         # reset
121         : "${C_SIGIL:=35}"      # sigil (=>, #, ##, ###, *, ```)
122         : "${C_LINK_NUMBER:=1}" # link number
123         : "${C_LINK_TITLE:=4}"  # link title
124         : "${C_LINK_URL:=36}"   # link URL
125         : "${C_HEADER1:=1;4}"   # header 1 formatting
126         : "${C_HEADER2:=1}"     # header 2 formatting
127         : "${C_HEADER3:=3}"     # header 3 formatting
128         : "${C_LIST:=0}"        # list formatting
129         : "${C_PRE:=0}"         # preformatted text formatting
130 }
131
132 prompt() {
133         prompt="$1"
134         shift
135         read </dev/tty -e -r -p "$prompt> " "$@"
136 }
137
138 blastoff() { # load a url
139         local well_formed=true
140         if [[ "$1" == "-u" ]]; then
141                 well_formed=false
142                 shift
143         fi
144         URL="$1"
145
146         if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
147                 URL="$(run transform_resource "$BOLLUX_URL" "$1")"
148         fi
149         [[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL"
150         URL="$(trim "$URL")"
151
152         server="${URL#*://}"
153         server="${server%%/*}"
154
155         log d "URL='$URL' server='$server'"
156
157         run request_url "$server" "$BOLLUX_PORT" "$URL" |
158                 run handle_response "$URL"
159 }
160
161 transform_resource() { # transform_resource BASE_URL REFERENCE_URL
162         declare -A R B T # reference, base url, target
163         eval "$(run parse_url B "$1")"
164         eval "$(run parse_url R "$2")"
165         # A non-strict parser may ignore a scheme in the reference
166         # if it is identical to the base URI's scheme.
167         if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
168                 unset "${R[scheme]}"
169         fi
170
171         # basically pseudo-code from spec ported to bash
172         if isdefined "R[scheme]"; then
173                 T[scheme]="${R[scheme]}"
174                 isdefined "R[authority]" && T[authority]="${R[authority]}"
175                 isdefined R[path] &&
176                         T[path]="$(run remove_dot_segments "${R[path]}")"
177                 isdefined "R[query]" && T[query]="${R[query]}"
178         else
179                 if isdefined "R[authority]"; then
180                         T[authority]="${R[authority]}"
181                         isdefined "R[authority]" &&
182                                 T[path]="$(remove_dot_segments "${R[path]}")"
183                         isdefined R[query] && T[query]="${R[query]}"
184                 else
185                         if isempty "R[path]"; then
186                                 T[path]="${B[path]}"
187                                 if isdefined R[query]; then
188                                         T[query]="${R[query]}"
189                                 else
190                                         T[query]="${B[query]}"
191                                 fi
192                         else
193                                 if [[ "${R[path]}" == /* ]]; then
194                                         T[path]="$(remove_dot_segments "${R[path]}")"
195                                 else
196                                         T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")"
197                                         T[path]="$(remove_dot_segments "${T[path]}")"
198                                 fi
199                                 isdefined R[query] && T[query]="${R[query]}"
200                         fi
201                         T[authority]="${B[authority]}"
202                 fi
203                 T[scheme]="${B[scheme]}"
204         fi
205         isdefined R[fragment] && T[fragment]="${R[fragment]}"
206         # cf. 5.3 -- recomposition
207         local r=""
208         isdefined "T[scheme]" && r="$r${T[scheme]}:"
209         # remove the port from the authority
210         isdefined "T[authority]" && r="$r//${T[authority]%:*}"
211         r="$r${T[path]}"
212         isdefined T[query] && r="$r?${T[query]}"
213         isdefined T[fragment] && r="$r#${T[fragment]}"
214         printf '%s\n' "$r"
215 }
216
217 merge_paths() { # 5.2.3
218         # shellcheck disable=2034
219         B_authority="$1"
220         B_path="$2"
221         R_path="$3"
222         # if R_path is empty, get rid of // in B_path
223         if [[ -z "$R_path" ]]; then
224                 printf '%s\n' "${B_path//\/\//\//}"
225                 return
226         fi
227
228         if isdefined "B_authority" && isempty "B_path"; then
229                 printf '/%s\n' "${R_path//\/\//\//}"
230         else
231                 if [[ "$B_path" == */* ]]; then
232                         B_path="${B_path%/*}/"
233                 else
234                         B_path=""
235                 fi
236                 printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
237         fi
238 }
239
240 remove_dot_segments() { # 5.2.4
241         local input="$1"
242         local output=
243         # ^/\.(/|$) - BASH_REMATCH[0]
244         while [[ "$input" ]]; do
245                 if [[ "$input" =~ ^\.\.?/ ]]; then
246                         input="${input#${BASH_REMATCH[0]}}"
247                 elif [[ "$input" =~ ^/\.(/|$) ]]; then
248                         input="/${input#${BASH_REMATCH[0]}}"
249                 elif [[ "$input" =~ ^/\.\.(/|$) ]]; then
250                         input="/${input#${BASH_REMATCH[0]}}"
251                         [[ "$output" =~ /?[^/]+$ ]]
252                         output="${output%${BASH_REMATCH[0]}}"
253                 elif [[ "$input" == . || "$input" == .. ]]; then
254                         input=
255                 else
256                         [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
257                         output="$output${BASH_REMATCH[1]}"
258                         input="${BASH_REMATCH[2]}"
259                 fi
260         done
261         printf '%s\n' "${output//\/\//\//}"
262 }
263
264 parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
265         local name="$1"
266         local string="$2"
267         # shopt -u extglob # TODO port re ^ to extglob syntax
268         local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
269         [[ $string =~ $re ]] || return $?
270         # shopt -s extglob
271
272         local scheme="${BASH_REMATCH[2]}"
273         local authority="${BASH_REMATCH[4]}"
274         local path="${BASH_REMATCH[5]}"
275         local query="${BASH_REMATCH[7]}"
276         local fragment="${BASH_REMATCH[9]}"
277
278         for c in scheme authority query fragment; do
279                 [[ "${!c}" ]] &&
280                         run printf '%s[%s]=%q\n' "$name" "$c" "${!c}"
281         done
282         # unclear if the path is always set even if empty but it looks that way
283         run printf '%s[path]=%q\n' "$name" "$path"
284 }
285
286 # is a NAME defined ('set' in bash)?
287 isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME
288 # is a NAME defined AND empty?
289 isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME
290
291 request_url() {
292         local server="$1"
293         local port="$2"
294         local url="$3"
295
296         ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port")
297         ssl_cmd+=(-servername "$server") # SNI
298         run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null
299 }
300
301 handle_response() {
302         local url="$1" code meta
303
304         while read -r -d $'\r' hdr; do
305                 code="$(gawk '{print $1}' <<<"$hdr")"
306                 meta="$(
307                         gawk '{for(i=2;i<=NF;i++)printf "%s ",$i;printf "\n"}' <<<"$hdr"
308                 )"
309                 break
310         done
311
312         log x "[$code] $meta"
313
314         case "$code" in
315         1*)
316                 REDIRECTS=0
317                 BOLLUX_URL="$URL"
318                 run prompt "$meta" QUERY
319                 # shellcheck disable=2153
320                 run blastoff "?$QUERY"
321                 ;;
322         2*)
323                 REDIRECTS=0
324                 BOLLUX_URL="$URL"
325                 run display "$meta"
326                 ;;
327         3*)
328                 ((REDIRECTS += 1))
329                 if ((REDIRECTS > BOLLUX_MAXREDIR)); then
330                         die $((100 + code)) "Too many redirects!"
331                 fi
332                 BOLLUX_URL="$URL"
333                 run blastoff "$meta"
334                 ;;
335         4*)
336                 REDIRECTS=0
337                 die "$((100 + code))" "$code"
338                 ;;
339         5*)
340                 REDIRECTS=0
341                 die "$((100 + code))" "$code"
342                 ;;
343         6*)
344                 REDIRECTS=0
345                 die "$((100 + code))" "$code"
346                 ;;
347         *)
348                 [[ -z "${code-}" ]] && die 100 "Empty response code."
349                 die "$((100 + code)) Unknown response code: $code."
350                 ;;
351         esac
352 }
353
354 display() {
355         case "$1" in
356         *\;*)
357                 mime="${1%;*}"
358                 charset="${1#*;}"
359                 trim "$mime"
360                 trim "$charset"
361                 log d "$mime $charset"
362                 ;;
363         *) mime="$(trim "$1")" ;;
364         esac
365
366         [[ -z "$mime" ]] && mime="text/gemini"
367         if [[ -z "$charset" ]]; then
368                 charset="utf-8"
369         else
370                 charset="${charset#charset=}"
371         fi
372
373         log debug "mime=$mime; charset=$charset"
374
375         case "$mime" in
376         text/*)
377                 less_cmd=(less -R)
378                 {
379                         [[ -r "$BOLLUX_LESSKEY" ]] || mklesskey "$BOLLUX_LESSKEY"
380                 } && less_cmd+=(-k "$BOLLUX_LESSKEY")
381                 less_cmd+=(
382                         -Pm'bollux$'
383                         -PM'o\:open, g\:goto, r\:refresh$'
384                         -M
385                 )
386
387                 submime="${mime#*/}"
388                 if declare -F | grep -q "$submime"; then
389                         log d "typeset_$submime"
390                         {
391                                 normalize_crlf |
392                                         tee "$BOLLUX_PAGESRC" |
393                                         run "typeset_$submime" |
394                                         run "${less_cmd[@]}"
395                         } || run handle_keypress "$?"
396                 else
397                         log "cat"
398                         {
399                                 normalize_crlf |
400                                         tee "$BOLLUX_PAGESRC" |
401                                         run "${less_cmd[@]}"
402                         } || run handle_keypress "$?"
403                 fi
404                 ;;
405         *) run download "$BOLLUX_URL" ;;
406         esac
407 }
408
409 mklesskey() {
410         lesskey -o "$1" - <<-END
411                 #command
412                 o quit 0 # 48 open a link
413                 g quit 1 # 49 goto a url
414                 [ quit 2 # 50 back
415                 ] quit 3 # 51 forward
416                 r quit 4 # 52 re-request / download
417         END
418 }
419
420 normalize_crlf() {
421         while IFS= read -r; do
422                 printf '%s\n' "${REPLY//$'\r'?($'\n')/}"
423         done
424 }
425
426 typeset_gemini() {
427         local pre=false
428         local ln=0 # link number
429
430         if ((T_WIDTH == 0)); then
431                 shopt -s checkwinsize
432                 (
433                         :
434                         :
435                 ) # XXX this doesn't work!?
436                 log d "LINES=$LINES; COLUMNS=$COLUMNS"
437                 T_WIDTH=$COLUMNS
438         fi
439         WIDTH=$((T_WIDTH - T_MARGIN))
440         ((WIDTH < 0)) && WIDTH=80  # default if dumb
441         S_MARGIN=$((T_MARGIN - 1)) # spacing
442
443         log d "T_WIDTH=$T_WIDTH"
444         log d "WIDTH=$WIDTH"
445
446         while IFS= read -r; do
447                 case "$REPLY" in
448                 '```')
449                         if $pre; then
450                                 pre=false
451                         else
452                                 pre=true
453                         fi
454                         continue
455                         ;;
456                 =\>*)
457                         : $((ln += 1))
458                         gemini_link "$REPLY" $pre "$ln"
459                         ;;
460                 \#*) gemini_header "$REPLY" $pre ;;
461                 \**)
462                         if [[ "$REPLY" =~ ^\*[[:space:]]+ ]]; then
463                                 gemini_list "$REPLY" $pre
464                         else
465                                 gemini_text "$REPLY" $pre
466                         fi
467                         ;;
468                 *) gemini_text "$REPLY" $pre ;;
469                 esac
470         done
471 }
472
473 gemini_link() {
474         local re="^(=>)[[:blank:]]*([^[:blank:]]+)[[:blank:]]*(.*)"
475         local s t a l # sigil, text, annotation(url), line
476         if ! ${2-false} && [[ "$1" =~ $re ]]; then
477                 s="${BASH_REMATCH[1]}"
478                 a="${BASH_REMATCH[2]}"
479                 t="${BASH_REMATCH[3]}"
480                 if [[ -z "$t" ]]; then
481                         t="$a"
482                         a=
483                 fi
484
485                 printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s"
486                 printf -v l "\e[${C_LINK_NUMBER}m[%d]${C_RESET} \
487                         \e[${C_LINK_TITLE}m%s${C_RESET} \
488                         \e[${C_LINK_URL}m%s${C_RESET}\n" \
489                         "$3" "$t" "$a"
490                 fold_line "$WIDTH" "$l"
491         else
492                 gemini_pre "$1"
493         fi
494 }
495
496 gemini_header() {
497         local re="^(#+)[[:blank:]]*(.*)"
498         local s t a l # sigil, text, annotation(lvl), line
499         if ! ${2-false} && [[ "$1" =~ $re ]]; then
500                 s="${BASH_REMATCH[1]}"
501                 a="${#BASH_REMATCH[1]}"
502                 t="${BASH_REMATCH[2]}"
503                 local hdrfmt
504                 hdrfmt="$(eval echo "\$C_HEADER$a")"
505
506                 printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s"
507                 printf -v l "\e[${hdrfmt}m%s${C_RESET}\n" "$t"
508                 fold_line "$WIDTH" "$l"
509         else
510                 gemini_pre "$1"
511         fi
512 }
513
514 gemini_list() {
515         local re="^(\*)[[:blank:]]*(.*)"
516         local s t a l # sigil, text, annotation(n/a), line
517         if ! ${2-false} && [[ "$1" =~ $re ]]; then
518                 s="${BASH_REMATCH[1]}"
519                 t="${BASH_REMATCH[2]}"
520
521                 printf "\e[${C_SIGIL}m%${S_MARGIN}s " "$s"
522                 printf -v l "\e[${C_LIST}m%s${C_RESET}\n" "$t"
523                 fold_line "$WIDTH" "$l"
524         else
525                 gemini_pre "$1"
526         fi
527 }
528
529 gemini_text() {
530         if ! ${2-false}; then
531                 printf "%${S_MARGIN}s " ' '
532                 fold_line "$WIDTH" "$1"
533         else
534                 gemini_pre "$1"
535         fi
536 }
537
538 gemini_pre() {
539         printf "\e[${C_SIGIL}m%${S_MARGIN}s " '```'
540         printf "\e[${C_PRE}m%s${C_RESET}\n" "$1"
541 }
542
543 fold_line() { # fold_line WIDTH TEXT
544         local width="$1"
545         local margin="${2%%[![:space:]]*}"
546         if [[ "$margin" ]]; then
547                 margin="${#margin}"
548         else
549                 margin="$T_MARGIN"
550         fi
551         local ll=0 wl plain
552         # shellcheck disable=2086
553         set -- $2 # TODO: is this the best way?
554
555         for word; do
556                 plain="${word//$'\x1b'\[*([0-9;])m/}"
557                 wl=$((${#plain} + 1))
558                 if (((ll + wl) >= width)); then
559                         printf "\n%${margin}s" ' '
560                         ll=$wl
561                 else
562                         ll=$((ll + wl))
563                 fi
564                 printf '%s ' "$word"
565         done
566         printf '\n'
567 }
568
569 handle_keypress() {
570         case "$1" in
571         48) # o - open a link -- show a menu of links on the page
572                 run select_url "$BOLLUX_PAGESRC"
573                 ;;
574         49) # g - goto a url -- input a new url
575                 prompt GO URL
576                 run blastoff -u "$URL"
577                 ;;
578         50) # [ - back in the history
579                 run history_back
580                 ;;
581         51) # ] - forward in the history
582                 run history_forward
583                 ;;
584         52) # r - re-request the current resource
585                 run blastoff "$BOLLUX_URL"
586                 ;;
587         *) # 53-57 -- still available for binding
588                 ;;
589         esac
590 }
591
592 select_url() {
593         run mapfile -t < <(extract_links <"$1")
594         select u in "${MAPFILE[@]}"; do
595                 run blastoff "$(gawk '{print $1}' <<<"$u")" && break
596         done </dev/tty
597 }
598
599 extract_links() {
600         gawk '
601         /^=>/ {
602                 sub(/=>[[:space:]]*/,"")
603                 if ($2) {
604                         rest=""
605                         for (i=2;i<=NF;i++) {
606                                 rest=rest (rest?" ":"")$i
607                         }
608                         printf "%s (\033[34m%s\033[0m)\n", $1, rest
609                 } else {
610                         printf "%s\n", $1
611                 }
612         }'
613 }
614
615 download() {
616         tn="$(mktemp)"
617         log x "Downloading: '$BOLLUX_URL' => '$tn'..."
618         dd status=progress >"$tn"
619         fn="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}"
620         if [[ -f "$fn" ]]; then
621                 log x "Saved '$tn'."
622         elif mv "$tn" "$fn"; then
623                 log x "Saved '$fn'."
624         else
625                 log error "Error saving '$fn': downloaded to '$tn'."
626         fi
627 }
628
629 history_back() { log error "Not implemented."; }
630 history_forward() { log error "Not implemented."; }
631
632 if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
633         run bollux "$@"
634 else
635         BOLLUX_LOGLEVEL=DEBUG
636 fi