Remove iconv dash
[bollux.git/.git] / bollux
1 #!/usr/bin/env bash
2 # bollux: a bash gemini client
3 # Author: Case Duckworth
4 # License: MIT
5 # Version: 0.2.2
6
7 # Program information
8 PRGN="${0##*/}"
9 VRSN=0.2.2
10 # State
11 REDIRECTS=0
12 set -f
13
14 bollux_usage() {
15         cat <<END
16 $PRGN (v. $VRSN): a bash gemini client
17 usage:
18         $PRGN [-h]
19         $PRGN [-q] [-v] [URL]
20 flags:
21         -h      show this help and exit
22         -q      be quiet: log no messages
23         -v      verbose: log more messages
24 parameters:
25         URL     the URL to start in
26                 If not provided, the user will be prompted.
27 END
28 }
29
30 run() {
31         log debug "$@"
32         "$@"
33 }
34
35 die() {
36         ec="$1"
37         shift
38         log error "$*"
39         exit "$ec"
40 }
41
42 # pure bash bible trim_string
43 trim() {
44         : "${1#"${1%%[![:space:]]*}"}"
45         : "${_%"${_##*[![:space:]]}"}"
46         printf '%s\n' "$_"
47 }
48
49 log() {
50         [[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return
51         case "$1" in
52         d* | D*) # debug
53                 [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return
54                 fmt=34
55                 ;;
56         e* | E*) # error
57                 fmt=31
58                 ;;
59         *) fmt=1 ;;
60         esac
61         shift
62         printf >&2 '\e[%sm%s:\e[0m\t%s\n' "$fmt" "$PRGN" "$*"
63 }
64
65 # main entry point
66 bollux() {
67         run bollux_config
68         run bollux_args "$@"
69         run history_init
70
71         if [[ ! "${BOLLUX_URL:+isset}" ]]; then
72                 run prompt GO BOLLUX_URL
73         fi
74
75         run blastoff "$BOLLUX_URL"
76 }
77
78 bollux_args() {
79         while getopts :hvq OPT; do
80                 case "$OPT" in
81                 h)
82                         bollux_usage
83                         exit
84                         ;;
85                 v) BOLLUX_LOGLEVEL=DEBUG ;;
86                 q) BOLLUX_LOGLEVEL=QUIET ;;
87                 :) die 1 "Option -$OPTARG requires an argument" ;;
88                 *) die 1 "Unknown option: -$OPTARG" ;;
89                 esac
90         done
91         shift $((OPTIND - 1))
92         if (($# == 1)); then
93                 BOLLUX_URL="$1"
94         fi
95 }
96
97 bollux_config() {
98         : "${BOLLUX_CONFIG:=${XDG_CONFIG_DIR:-$HOME/.config}/bollux/bollux.conf}"
99
100         if [ -f "$BOLLUX_CONFIG" ]; then
101                 # shellcheck disable=1090
102                 . "$BOLLUX_CONFIG"
103         else
104                 log debug "Can't load config file '$BOLLUX_CONFIG'."
105         fi
106
107         ## behavior
108         : "${BOLLUX_DOWNDIR:=.}"                   # where to save downloads
109         : "${BOLLUX_LOGLEVEL:=3}"                  # log level
110         : "${BOLLUX_MAXREDIR:=5}"                  # max redirects
111         : "${BOLLUX_PORT:=1965}"                   # port number
112         : "${BOLLUX_PROTO:=gemini}"                # default protocol
113         : "${BOLLUX_LESSKEY:=/tmp/bollux-lesskey}" # where to store binds
114         : "${BOLLUX_PAGESRC:=/tmp/bollux-src}"     # where to save the page source
115         : "${BOLLUX_URL:=}"                        # start url
116         ## typesetting
117         : "${T_MARGIN:=4}"      # left and right margin
118         : "${T_WIDTH:=0}"       # width of the viewport -- 0 = get term width
119         # colors -- these will be wrapped in \e[ __ m
120         C_RESET='\e[0m'         # reset
121         : "${C_SIGIL:=35}"      # sigil (=>, #, ##, ###, *, ```)
122         : "${C_LINK_NUMBER:=1}" # link number
123         : "${C_LINK_TITLE:=4}"  # link title
124         : "${C_LINK_URL:=36}"   # link URL
125         : "${C_HEADER1:=1;4}"   # header 1 formatting
126         : "${C_HEADER2:=1}"     # header 2 formatting
127         : "${C_HEADER3:=3}"     # header 3 formatting
128         : "${C_LIST:=0}"        # list formatting
129         : "${C_PRE:=0}"         # preformatted text formatting
130 }
131
132 bollux_quit() {
133         log x "Thanks for flying $PRGN"
134         exit
135 }
136
137 set_title() {
138         printf '\e]2;%s - bollux\007' "$*"
139 }
140
141 prompt() {
142         prompt="$1"
143         shift
144         read </dev/tty -e -r -p "$prompt> " "$@"
145 }
146
147 blastoff() { # load a url
148         local well_formed=true
149         if [[ "$1" == "-u" ]]; then
150                 well_formed=false
151                 shift
152         fi
153         URL="$1"
154
155         if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
156                 URL="$(run transform_resource "$BOLLUX_URL" "$1")"
157         fi
158         [[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL"
159         URL="$(trim "$URL")"
160
161         server="${URL#*://}"
162         server="${server%%/*}"
163
164         log d "URL='$URL' server='$server'"
165
166         run request_url "$server" "$BOLLUX_PORT" "$URL" |
167                 run handle_response "$URL"
168 }
169
170 transform_resource() { # transform_resource BASE_URL REFERENCE_URL
171         declare -A R B T # reference, base url, target
172         eval "$(run parse_url B "$1")"
173         eval "$(run parse_url R "$2")"
174         # A non-strict parser may ignore a scheme in the reference
175         # if it is identical to the base URI's scheme.
176         if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
177                 unset "${R[scheme]}"
178         fi
179
180         # basically pseudo-code from spec ported to bash
181         if isdefined "R[scheme]"; then
182                 T[scheme]="${R[scheme]}"
183                 isdefined "R[authority]" && T[authority]="${R[authority]}"
184                 isdefined R[path] &&
185                         T[path]="$(run remove_dot_segments "${R[path]}")"
186                 isdefined "R[query]" && T[query]="${R[query]}"
187         else
188                 if isdefined "R[authority]"; then
189                         T[authority]="${R[authority]}"
190                         isdefined "R[authority]" &&
191                                 T[path]="$(remove_dot_segments "${R[path]}")"
192                         isdefined R[query] && T[query]="${R[query]}"
193                 else
194                         if isempty "R[path]"; then
195                                 T[path]="${B[path]}"
196                                 if isdefined R[query]; then
197                                         T[query]="${R[query]}"
198                                 else
199                                         T[query]="${B[query]}"
200                                 fi
201                         else
202                                 if [[ "${R[path]}" == /* ]]; then
203                                         T[path]="$(remove_dot_segments "${R[path]}")"
204                                 else
205                                         T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")"
206                                         T[path]="$(remove_dot_segments "${T[path]}")"
207                                 fi
208                                 isdefined R[query] && T[query]="${R[query]}"
209                         fi
210                         T[authority]="${B[authority]}"
211                 fi
212                 T[scheme]="${B[scheme]}"
213         fi
214         isdefined R[fragment] && T[fragment]="${R[fragment]}"
215         # cf. 5.3 -- recomposition
216         local r=""
217         isdefined "T[scheme]" && r="$r${T[scheme]}:"
218         # remove the port from the authority
219         isdefined "T[authority]" && r="$r//${T[authority]%:*}"
220         r="$r${T[path]}"
221         isdefined T[query] && r="$r?${T[query]}"
222         isdefined T[fragment] && r="$r#${T[fragment]}"
223         printf '%s\n' "$r"
224 }
225
226 merge_paths() { # 5.2.3
227         # shellcheck disable=2034
228         B_authority="$1"
229         B_path="$2"
230         R_path="$3"
231         # if R_path is empty, get rid of // in B_path
232         if [[ -z "$R_path" ]]; then
233                 printf '%s\n' "${B_path//\/\//\//}"
234                 return
235         fi
236
237         if isdefined "B_authority" && isempty "B_path"; then
238                 printf '/%s\n' "${R_path//\/\//\//}"
239         else
240                 if [[ "$B_path" == */* ]]; then
241                         B_path="${B_path%/*}/"
242                 else
243                         B_path=""
244                 fi
245                 printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
246         fi
247 }
248
249 remove_dot_segments() { # 5.2.4
250         local input="$1"
251         local output=
252         # ^/\.(/|$) - BASH_REMATCH[0]
253         while [[ "$input" ]]; do
254                 if [[ "$input" =~ ^\.\.?/ ]]; then
255                         input="${input#${BASH_REMATCH[0]}}"
256                 elif [[ "$input" =~ ^/\.(/|$) ]]; then
257                         input="/${input#${BASH_REMATCH[0]}}"
258                 elif [[ "$input" =~ ^/\.\.(/|$) ]]; then
259                         input="/${input#${BASH_REMATCH[0]}}"
260                         [[ "$output" =~ /?[^/]+$ ]]
261                         output="${output%${BASH_REMATCH[0]}}"
262                 elif [[ "$input" == . || "$input" == .. ]]; then
263                         input=
264                 else
265                         [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || log debug NOMATCH
266                         output="$output${BASH_REMATCH[1]}"
267                         input="${BASH_REMATCH[2]}"
268                 fi
269         done
270         printf '%s\n' "${output//\/\//\//}"
271 }
272
273 parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
274         local name="$1"
275         local string="$2"
276         # shopt -u extglob # TODO port re ^ to extglob syntax
277         local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
278         [[ $string =~ $re ]] || return $?
279         # shopt -s extglob
280
281         local scheme="${BASH_REMATCH[2]}"
282         local authority="${BASH_REMATCH[4]}"
283         local path="${BASH_REMATCH[5]}"
284         local query="${BASH_REMATCH[7]}"
285         local fragment="${BASH_REMATCH[9]}"
286
287         for c in scheme authority query fragment; do
288                 [[ "${!c}" ]] &&
289                         run printf '%s[%s]=%q\n' "$name" "$c" "${!c}"
290         done
291         # unclear if the path is always set even if empty but it looks that way
292         run printf '%s[path]=%q\n' "$name" "$path"
293 }
294
295 # is a NAME defined ('set' in bash)?
296 isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME
297 # is a NAME defined AND empty?
298 isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME
299
300 request_url() {
301         local server="$1"
302         local port="$2"
303         local url="$3"
304
305         ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port")
306         ssl_cmd+=(-servername "$server") # SNI
307         run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null
308 }
309
310 handle_response() {
311         local URL="$1" code meta
312
313         while read -r -d $'\r' hdr; do
314                 code="$(gawk '{print $1}' <<<"$hdr")"
315                 meta="$(
316                         gawk '{for(i=2;i<=NF;i++)printf "%s ",$i;printf "\n"}' <<<"$hdr"
317                 )"
318                 break
319         done
320
321         log x "[$code] $meta"
322
323         case "$code" in
324         1*)
325                 REDIRECTS=0
326                 run history_append "$URL"
327                 run prompt "$meta" QUERY
328                 # shellcheck disable=2153
329                 run blastoff "?$QUERY"
330                 ;;
331         2*)
332                 REDIRECTS=0
333                 run history_append "$URL"
334                 run display "$meta"
335                 ;;
336         3*)
337                 ((REDIRECTS += 1))
338                 if ((REDIRECTS > BOLLUX_MAXREDIR)); then
339                         die $((100 + code)) "Too many redirects!"
340                 fi
341                 run blastoff "$meta"
342                 ;;
343         4*)
344                 REDIRECTS=0
345                 die "$((100 + code))" "$code"
346                 ;;
347         5*)
348                 REDIRECTS=0
349                 die "$((100 + code))" "$code"
350                 ;;
351         6*)
352                 REDIRECTS=0
353                 die "$((100 + code))" "$code"
354                 ;;
355         *)
356                 [[ -z "${code-}" ]] && die 100 "Empty response code."
357                 die "$((100 + code)) Unknown response code: $code."
358                 ;;
359         esac
360 }
361
362 display() {
363         # split header line
364         local -a hdr
365         local i
366         IFS=$'\n' read -d "" -ra hdr <<<"${1//;/$'\n'}"
367
368         mime="$(trim "${hdr[0],,}")"
369         for ((i = 1; i <= "${#hdr[@]}"; i++)); do
370                 h="$(trim "${hdr[$i]}")"
371                 case "$h" in
372                 charset=*) charset="${h#charset=}" ;;
373                 esac
374         done
375
376         [[ -z "$mime" ]] && mime="text/gemini"
377         if [[ -z "$charset" ]]; then
378                 charset="utf-8"
379         fi
380
381         log debug "mime='$mime'; charset='$charset'"
382
383         case "$mime" in
384         text/*)
385                 set_title "$BOLLUX_URL"
386                 less_cmd=(less -R)
387                 {
388                         [[ -r "$BOLLUX_LESSKEY" ]] || mklesskey "$BOLLUX_LESSKEY"
389                 } && less_cmd+=(-k "$BOLLUX_LESSKEY")
390                 less_cmd+=(
391                         -Pm'bollux$'
392                         -PM'o\:open, g\:goto, r\:refresh$'
393                         -M
394                 )
395
396                 submime="${mime#*/}"
397                 if declare -F | grep -q "$submime"; then
398                         log d "typeset_$submime"
399                         {
400                                 normalize_crlf |
401                                         iconv -f "${charset^^}" -t "UTF-8" |
402                                         tee "$BOLLUX_PAGESRC" |
403                                         run "typeset_$submime" |
404                                         run "${less_cmd[@]}" && bollux_quit
405                         } || run handle_keypress "$?"
406                 else
407                         log "cat"
408                         {
409                                 normalize_crlf |
410                                         iconv -f "${charset^^}" -t "UTF-8" |
411                                         tee "$BOLLUX_PAGESRC" |
412                                         run "${less_cmd[@]}" && bollux_quit
413                         } || run handle_keypress "$?"
414                 fi
415                 ;;
416         *) run download "$BOLLUX_URL" ;;
417         esac
418 }
419
420 mklesskey() {
421         lesskey -o "$1" - <<-END
422                 #command
423                 o quit 0 # 48 open a link
424                 g quit 1 # 49 goto a url
425                 [ quit 2 # 50 back
426                 ] quit 3 # 51 forward
427                 r quit 4 # 52 re-request / download
428                 # other keybinds
429                 \40 forw-screen-force
430         END
431 }
432
433 normalize_crlf() {
434         shopt -s extglob
435         while IFS= read -r; do
436                 printf '%s\n' "${REPLY//$'\r'?($'\n')/}"
437         done
438         shopt -u extglob
439 }
440
441 typeset_gemini() {
442         local pre=false
443         local ln=0 # link number
444
445         if ((T_WIDTH == 0)); then
446                 shopt -s checkwinsize
447                 (
448                         :
449                         :
450                 ) # XXX this doesn't work!?
451                 log d "LINES=$LINES; COLUMNS=$COLUMNS"
452                 T_WIDTH=$COLUMNS
453         fi
454         WIDTH=$((T_WIDTH - T_MARGIN))
455         ((WIDTH < 0)) && WIDTH=80  # default if dumb
456         S_MARGIN=$((T_MARGIN - 1)) # spacing
457
458         log d "T_WIDTH=$T_WIDTH"
459         log d "WIDTH=$WIDTH"
460
461         while IFS= read -r; do
462                 case "$REPLY" in
463                 '```'*)
464                         if $pre; then
465                                 pre=false
466                         else
467                                 pre=true
468                         fi
469                         continue
470                         ;;
471                 =\>*)
472                         : $((ln += 1))
473                         gemini_link "$REPLY" $pre "$ln"
474                         ;;
475                 \#*) gemini_header "$REPLY" $pre ;;
476                 \**)
477                         if [[ "$REPLY" =~ ^\*[[:space:]]+ ]]; then
478                                 gemini_list "$REPLY" $pre
479                         else
480                                 gemini_text "$REPLY" $pre
481                         fi
482                         ;;
483                 *) gemini_text "$REPLY" $pre ;;
484                 esac
485         done
486 }
487
488 gemini_link() {
489         local re="^(=>)[[:blank:]]*([^[:blank:]]+)[[:blank:]]*(.*)"
490         local s t a l # sigil, text, annotation(url), line
491         if ! ${2-false} && [[ "$1" =~ $re ]]; then
492                 s="${BASH_REMATCH[1]}"
493                 a="${BASH_REMATCH[2]}"
494                 t="${BASH_REMATCH[3]}"
495                 if [[ -z "$t" ]]; then
496                         t="$a"
497                         a=
498                 fi
499
500                 printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s"
501                 printf -v l "\e[${C_LINK_NUMBER}m[%d]${C_RESET} \
502                         \e[${C_LINK_TITLE}m%s${C_RESET} \
503                         \e[${C_LINK_URL}m%s${C_RESET}\n" \
504                         "$3" "$t" "$a"
505                 fold_line "$WIDTH" "$l"
506         else
507                 gemini_pre "$1"
508         fi
509 }
510
511 gemini_header() {
512         local re="^(#+)[[:blank:]]*(.*)"
513         local s t a l # sigil, text, annotation(lvl), line
514         if ! ${2-false} && [[ "$1" =~ $re ]]; then
515                 s="${BASH_REMATCH[1]}"
516                 a="${#BASH_REMATCH[1]}"
517                 t="${BASH_REMATCH[2]}"
518                 local hdrfmt
519                 hdrfmt="$(eval echo "\$C_HEADER$a")"
520
521                 printf "\e[${C_SIGIL}m%${S_MARGIN}s ${C_RESET}" "$s"
522                 printf -v l "\e[${hdrfmt}m%s${C_RESET}\n" "$t"
523                 fold_line "$WIDTH" "$l"
524         else
525                 gemini_pre "$1"
526         fi
527 }
528
529 gemini_list() {
530         local re="^(\*)[[:blank:]]*(.*)"
531         local s t a l # sigil, text, annotation(n/a), line
532         if ! ${2-false} && [[ "$1" =~ $re ]]; then
533                 s="${BASH_REMATCH[1]}"
534                 t="${BASH_REMATCH[2]}"
535
536                 printf "\e[${C_SIGIL}m%${S_MARGIN}s " "$s"
537                 printf -v l "\e[${C_LIST}m%s${C_RESET}\n" "$t"
538                 fold_line "$WIDTH" "$l"
539         else
540                 gemini_pre "$1"
541         fi
542 }
543
544 gemini_text() {
545         if ! ${2-false}; then
546                 printf "%${S_MARGIN}s " ' '
547                 fold_line "$WIDTH" "$1"
548         else
549                 gemini_pre "$1"
550         fi
551 }
552
553 gemini_pre() {
554         printf "\e[${C_SIGIL}m%${S_MARGIN}s " '```'
555         printf "\e[${C_PRE}m%s${C_RESET}\n" "$1"
556 }
557
558 fold_line() { # fold_line WIDTH TEXT
559         local width="$1"
560         local margin="${2%%[![:space:]]*}"
561         if [[ "$margin" ]]; then
562                 margin="${#margin}"
563         else
564                 margin="$T_MARGIN"
565         fi
566         local ll=0 wl plain
567         # shellcheck disable=2086
568         set -- $2 # TODO: is this the best way?
569
570         for word; do
571                 plain="${word//$'\x1b'\[*([0-9;])m/}"
572                 wl=$((${#plain} + 1))
573                 if (((ll + wl) >= width)); then
574                         printf "\n%${margin}s" ' '
575                         ll=$wl
576                 else
577                         ll=$((ll + wl))
578                 fi
579                 printf '%s ' "$word"
580         done
581         printf '\n'
582 }
583
584 handle_keypress() {
585         case "$1" in
586         48) # o - open a link -- show a menu of links on the page
587                 run select_url "$BOLLUX_PAGESRC"
588                 ;;
589         49) # g - goto a url -- input a new url
590                 prompt GO URL
591                 run blastoff -u "$URL"
592                 ;;
593         50) # [ - back in the history
594                 run history_back || {
595                         sleep 0.5
596                         run blastoff "$BOLLUX_URL"
597                 }
598                 ;;
599         51) # ] - forward in the history
600                 run history_forward || {
601                         sleep 0.5
602                         run blastoff "$BOLLUX_URL"
603                 }
604                 ;;
605         52) # r - re-request the current resource
606                 run blastoff "$BOLLUX_URL"
607                 ;;
608         *) # 53-57 -- still available for binding
609                 ;;
610         esac
611 }
612
613 select_url() {
614         run mapfile -t < <(extract_links <"$1")
615         select u in "${MAPFILE[@]}"; do
616                 case "$REPLY" in
617                 q) bollux_quit ;;
618                 esac
619                 run blastoff "$(gawk '{print $1}' <<<"$u")" && break
620         done </dev/tty
621 }
622
623 extract_links() {
624         gawk '
625         /^=>/ {
626                 sub(/=>[[:space:]]*/,"")
627                 if ($2) {
628                         rest=""
629                         for (i=2;i<=NF;i++) {
630                                 rest=rest (rest?" ":"")$i
631                         }
632                         printf "%s (\033[34m%s\033[0m)\n", $1, rest
633                 } else {
634                         printf "%s\n", $1
635                 }
636         }'
637 }
638
639 download() {
640         tn="$(mktemp)"
641         log x "Downloading: '$BOLLUX_URL' => '$tn'..."
642         dd status=progress >"$tn"
643         fn="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}"
644         if [[ -f "$fn" ]]; then
645                 log x "Saved '$tn'."
646         elif mv "$tn" "$fn"; then
647                 log x "Saved '$fn'."
648         else
649                 log error "Error saving '$fn': downloaded to '$tn'."
650         fi
651 }
652
653 history_init() {
654         declare -a HISTORY # history is kept in an array
655         HN=0               # position of history in the array
656 }
657
658 history_append() { # history_append URL
659         BOLLUX_URL="$1"
660         HISTORY[$HN]="$BOLLUX_URL"
661         log d "HN=$HN HISTORY: ${HISTORY[*]}"
662         ((HN += 1))
663 }
664
665 history_back() {
666         log d "HN=$HN"
667         ((HN -= 2))
668         if ((HN < 0)); then
669                 HN=0
670                 log e "Beginning of history."
671                 return 1
672         fi
673         blastoff "${HISTORY[$HN]}"
674 }
675 history_forward() {
676         log d "HN=$HN"
677         if ((HN >= ${#HISTORY[@]})); then
678                 HN="${#HISTORY[@]}"
679                 log e "End of history."
680                 return 1
681         fi
682         blastoff "${HISTORY[$HN]}"
683 }
684
685 if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
686         run bollux "$@"
687 else
688         BOLLUX_LOGLEVEL=DEBUG
689 fi