Major rewrite, v0.1
authorCase Duckworth <acdw@acdw.net>
Tue, 26 May 2020 02:37:55 +0000 (21:37 -0500)
committerCase Duckworth <acdw@acdw.net>
Tue, 26 May 2020 02:37:55 +0000 (21:37 -0500)
bollux

diff --git a/bollux b/bollux
index 7f7459c..abd0ab9 100755 (executable)
--- a/bollux
+++ b/bollux
 #!/usr/bin/env bash
-# bollux: a bash gemini client or whatever
-# Author: Case Duckworth <acdw@acdw.net>
-# License: MIT
-# Version: -0.7
-
-# set -euo pipefail              # strict mode
-
-### constants ###
-PRGN="${0##*/}"                # program name
-DLDR="${BOLLUX_DOWNDIR:=.}"    # where to download
-LOGL="${BOLLUX_LOGLEVEL:=3}"   # log level
-MAXR="${BOLLUX_MAXREDIR:=5}"   # max redirects
-PORT="${BOLLUX_PORT:=1965}"    # port number
-PROT="${BOLLUX_PROTO:=gemini}" # protocol
-RDRS=0                         # redirects
-VRSN=-0.7                      # version number
-
-# shellcheck disable=2120
-bollux_usage() {
-       cat <<END_USAGE >&2
-       $PRGN ($VRSN): a bash gemini client
-       usage:
-               $PRGN [-h]
-               $PRGN [-L LVL] [URL]
-       options:
-               -h      show this help
-               -L LVL  set the loglevel to LVL.
-                       Default: $BOLLUX_LOGLEVEL
-                       The loglevel is between 0 and 5, with
-                       lower levels being more dire.
-       parameters:
-               URL     the URL to navigate view or download
-END_USAGE
-       exit "${1:-0}"
+
+# Program information
+PRGN="${0##*/}"
+VRSN=0.1
+# State
+REDIRECTS=0
+
+run() {
+       log debug "$@"
+       "$@"
+}
+
+die() {
+       ec="$1"
+       shift
+       log error "$*"
+       exit "$ec"
 }
 
-# LOGLEVELS:
-# 0 - application fatal error
-# 1 - application warning
-# 2 - response error
-# 3 - response logging
-# 4 - application logging
-# 5 - diagnostic
-
-### utility functions ###
-# a better echo
-put() { printf '%s\n' "$*"; }
-
-# conditionally log events to stderr
-# lower = more important
-log() { # log [LEVEL] [<] MESSAGE
+trim() { sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'; }
+
+log() {
+       [[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return
        case "$1" in
-       -)
-               lvl="-1"
-               shift
+       d* | D*) # debug
+               [[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return
+               fmt=34
                ;;
-       [0-5])
-               lvl="$1"
-               shift
+       e* | E*) # error
+               fmt=31
                ;;
-       *) lvl=4 ;;
+       *) fmt=1 ;;
        esac
+       shift
+       printf >&2 '\e[%sm%s:\e[0m\t%s\n' "$fmt" "$PRGN" "$*"
+}
 
-       output="$*"
-       if ((lvl < LOGL)); then
-               if (($# == 0)); then
-                       while IFS= read -r line; do
-                               output="$output${output:+$'\n'}$line"
-                       done
-               fi
-               printf '\e[34m%s\e[0m:\t%s\n' "$PRGN" "$output" >&2
+# main entry point
+bollux() {
+       run bollux_args "$@"
+       run bollux_config
+
+       if [[ ! "${BOLLUX_URL:+isset}" ]]; then
+               run prompt GO BOLLUX_URL
        fi
+
+       run blastoff "$BOLLUX_URL"
 }
 
-# halt and catch fire
-die() { # die [EXIT-CODE] MESSAGE
-       case "$1" in
-       [0-9]*)
-               ec="$1"
-               shift
-               ;;
-       *) ec=1 ;;
-       esac
+bollux_args() {
+       while getopts :vq OPT; do
+               case "$OPT" in
+               v) BOLLUX_LOGLEVEL=DEBUG ;;
+               q) BOLLUX_LOGLEVEL=QUIET ;;
+               :) die 1 "Option -$OPTARG requires an argument" ;;
+               *) die 1 "Unknown option: -$OPTARG" ;;
+               esac
+       done
+       shift $((OPTIND - 1))
+       if (($# == 1)); then
+               BOLLUX_URL="$1"
+       fi
+}
 
-       log 0 "$*"
-       exit "$ec"
+bollux_config() {
+       : "${BOLLUX_CONFIG:=${XDG_CONFIG_DIR:-$HOME/.config}/bollux/config}"
+
+       if [ -f "$BOLLUX_CONFIG" ]; then
+               # shellcheck disable=1090
+               . "$BOLLUX_CONFIG"
+       else
+               log debug "Can't load config file '$BOLLUX_CONFIG'."
+       fi
+
+       : "${BOLLUX_DOWNDIR:=.}"                   # where to save downloads
+       : "${BOLLUX_LOGLEVEL:=3}"                  # log level
+       : "${BOLLUX_MAXREDIR:=5}"                  # max redirects
+       : "${BOLLUX_PORT:=1965}"                   # port number
+       : "${BOLLUX_PROTO:=gemini}"                # default protocol
+       : "${BOLLUX_LESSKEY:=/tmp/bollux-lesskey}" # where to store binds
+       : "${BOLLUX_PAGESRC:=/tmp/bollux-src}"     # where to save the page source
+       : "${BOLLUX_URL:=}"                        # start url
 }
 
-# ask the user for input
-ask() { # ask PROMPT [READ_OPT...]
+prompt() {
        prompt="$1"
        shift
        read </dev/tty -e -r -p "$prompt> " "$@"
 }
 
-# fail if something isn't installed
-require() { hash "$1" 2>/dev/null || die 127 "Requirement '$1' not found."; }
+blastoff() { # load a url
+       local well_formed=true
+       if [[ "$1" == "-u" ]]; then
+               well_formed=false
+               shift
+       fi
+       URL="$1"
+
+       if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
+               URL="$(run munge_url "$1" "$BOLLUX_URL")"
+       fi
+       [[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL"
+       URL="$(trim <<<"$URL")"
 
-# trim a string
-trim() { sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'; }
+       server="${URL#*://}"
+       server="${server%%/*}"
+
+       run request_url "$server" "$BOLLUX_PORT" "$URL" |
+               run handle_response "$URL"
+}
+
+munge_url() {
+       local -A new old u
+       eval "$(split_url new <<<"$1")"
+       for k in "${!new[@]}"; do log d "new[$k]=${new[$k]}"; done
+       eval "$(split_url old <<<"$2")"
+       for k in "${!old[@]}"; do log d "old[$k]=${old[$k]}"; done
+
+       u['scheme']="${new['scheme']:-${old['scheme']:-}}"
+       u['authority']="${new['authority']:-${old['authority']:-}}"
+       # XXX this whole path thing is wack
+       if [[ "${new['path']+isset}" ]]; then
+               log d 'new path set'
+               if [[ "${new['path']}" == /* ]]; then
+                       log d 'new path == /*'
+                       u['path']="${new['path']}"
+               elif [[ "${new['authority']}" == "${old['authority']}" || ! "${new['authority']+isset}" ]]; then
+                       p="${old['path']:-}/${new['path']}"
+                       log d "$p ( $(normalize_path <<<"$p") )"
+                       u['path']="$(normalize_path <<<"$p")"
+               else
+                       log d 'u path = new path'
+                       u['path']="${new['path']}"
+               fi
+       elif [[ "${new['query']+isset}" || "${new['fragment']+isset}" ]]; then
+               log d 'u path = old path'
+               u['path']="${old['path']}"
+       else
+               u['path']="/"
+       fi
+       u['query']="${new['query']:-}"
+       u['fragment']="${new['fragment']:-}"
+       for k in "${!u[@]}"; do log d "u[$k]=${u[$k]}"; done
 
-# stubs for when things aren't implemented (fully)
-NOT_IMPLEMENTED() { die 200 "NOT IMPLEMENTED!!!"; }
-NOT_FULLY_IMPLEMENTED() { log 1 "NOT FULLY IMPLEMENTED!!!"; }
+       run printf '%s%s%s%s%s\n' \
+               "${u['scheme']}" "${u['authority']}" "${u['path']}" \
+               "${u['query']}" "${u['fragment']}"
+}
 
-### gemini ###
-# url functions
-# normalize a path from /../ /./ /
-normalize_path() { # normalize_path <<< PATH
+normalize_path() {
        gawk '{
-       if ($0 == "" || $0 ~ /^\/\/[^\/]/) {
-               return -1
-       }
        split($0, path, /\//)
        for (c in path) {
                if (path[c] == "" || path[c] == ".") {
@@ -128,19 +166,18 @@ normalize_path() { # normalize_path <<< PATH
                }
                ret = ret slash path[c]
        }
-       print ret
+       print (ret ~ /^\// ? "" : "/") ret
        }'
 }
 
-# split a url into the URL array
 split_url() {
-       gawk '{
+       gawk -vvar="$1" '{
        if (match($0, /^[A-Za-z]+:/)) {
                arr["scheme"] = substr($0, RSTART, RLENGTH)
                $0 = substr($0, RLENGTH + 1)
        }
        if (match($0, /^\/\/[^\/?#]+?/) || (match($0, /^[^\/?#]+?/) && scheme)) {
-               arr["authority"] = substr($0, RSTART, RLENGTH)
+       arr["authority"] = substr($0, RSTART, RLENGTH)
                $0 = substr($0, RLENGTH + 1)
        }
        if (match($0, /^\/?[^?#]+/)) {
@@ -156,140 +193,72 @@ split_url() {
                $0 = substr($0, RLENGTH + 1)
        }
        for (part in arr) {
-               printf "URL[\"%s\"]=\"%s\"\n", part, arr[part]
+               sub(/[[:space:]]+$/, "", arr[part])
+               printf var "[\"%s\"]=\"%s\"\n", part, arr[part]
        }
        }'
 }
 
-# example.com => gemini://example.com/
-_address() { # _address URL
-       addr="$1"
+request_url() {
+       local server="$1"
+       local port="$2"
+       local url="$3"
 
-       [[ "$addr" != *://* ]] && addr="$PROT://$addr"
-       trim <<<"$addr"
+       ssl_cmd=(openssl s_client -crlf -quiet -connect "$server/$port")
+       ssl_cmd+=(-servername "$server") # SNI
+       run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null
 }
 
-# return only the server part from an address, with the port added
-# gemini://example.com/path/to/file => example.com:1965
-_server() {
-       serv="$(_address "$1")" # normalize first
-       serv="${serv#*://}"
-       serv="${serv%%/*}"
-       if [[ "$serv" != *:* ]]; then
-               serv="$serv:$PORT"
-       fi
-       trim <<<"$serv"
-}
-
-# request a gemini page
-# by default, extract the server from the url
-request() { # request [-s SERVER] URL
-       case "$1" in
-       -s)
-               serv="$(_server "$2")"
-               addr="$(_address "$3")"
-               ;;
-       *)
-               serv="$(_server "$1")"
-               addr="$(_address "$1")"
-               ;;
-       esac
-
-       log 5 "serv: $serv"
-       log 5 "addr: $addr"
-
-       sslcmd=(openssl s_client -crlf -ign_eof -quiet -connect "$serv")
-       # use SNI
-       sslcmd+=(-servername "${serv%:*}")
-       log "${sslcmd[@]}"
-       "${sslcmd[@]}" <<<"$addr" 2>/dev/null
-}
+handle_response() {
+       local url="$1" code meta
 
-# handle the response
-# cf. gemini://gemini.circumlunar.space/docs/spec-spec.txt
-handle() { # handle URL < RESPONSE
-       URL="$1"
-       while read -d $'\r' -r head; do
-               break # wait to read the first line
+       while read -r -d $'\r' hdr; do
+               code="$(gawk '{print $1}' <<<"$hdr")"
+               meta="$(
+                       gawk '{for(i=2;i<=NF;i++)printf "%s ",$i;printf "\n"}' <<<"$hdr"
+               )"
+               break
        done
-       code="$(gawk '{print $1}' <<<"$head")"
-       meta="$(gawk '{for(i=2;i<=NF;i++)printf "%s ",$i;printf "\n"}' <<<"$head")"
 
-       log 5 "[$code]  $meta"
+       log x "[$code] $meta"
 
        case "$code" in
-       1*) # INPUT
-               log 3 "Input"
-               RDRS=0 # this is not a redirect
-               ask "$meta" QUERY
-               bollux "$URL?$QUERY"
+       1*)
+               REDIRECTS=0
+               BOLLUX_URL="$URL"
+               run prompt "$meta" QUERY
+               run blastoff "?$QUERY"
                ;;
-       2*) # SUCCESS
-               log 3 "Success"
-               RDRS=0 # this is not a redirect
-               case "$code" in
-               20) log 5 "- OK" ;;
-               21) log 5 "- End of client certificate session" ;;
-               *) log 2 "- Unknown response code: '$code'." ;;
-               esac
-               display "$meta"
-               ;;
-       3*) # REDIRECT
-               log 3 "Redirecting"
-               case "$code" in
-               30) log 5 "- Temporary" ;;
-               31) log 5 "- Permanent" ;;
-               *) log 2 "- Unknown response code: '$code'." ;;
-               esac
-               ((RDRS += 1))
-               ((RDRS > MAXR)) && die "$code" "Too many redirects!"
-               bollux "$meta"
+       2*)
+               REDIRECTS=0
+               BOLLUX_URL="$URL"
+               run display "$meta"
                ;;
-       4*) # TEMPORARY FAILURE
-               log 2 "Temporary failure"
-               RDRS=0 # this is not a redirect
-               case "$code" in
-               41) log 5 "- Server unavailable" ;;
-               42) log 5 "- CGI error" ;;
-               43) log 5 "- Proxy error" ;;
-               44) log 5 "- Rate limited" ;;
-               *) log 2 "- Unknown response code: '$code'." ;;
-               esac
-               exit "$code"
+       3*)
+               ((REDIRECTS += 1))
+               if ((REDIRECTS > BOLLUX_MAXREDIR)); then
+                       die $((100 + code)) "Too many redirects!"
+               fi
+               BOLLUX_URL="$URL"
+               run blastoff "$meta"
                ;;
-       5*) # PERMANENT FAILURE
-               log 2 "Permanent failure"
-               RDRS=0 # this is not a redirect
-               case "$code" in
-               51) log 5 "- Not found" ;;
-               52) log 5 "- No longer available" ;;
-               53) log 5 "- Proxy request refused" ;;
-               59) log 5 "- Bad request" ;;
-               *) log 2 "- Unknown response code: '$code'." ;;
-               esac
-               exit "$code"
+       4*)
+               REDIRECTS=0
+               die "$((100 + code))" "$code"
                ;;
-       6*) # CLIENT CERT REQUIRED
-               log 2 "Client certificate required"
-               RDRS=0 # this is not a redirect
-               case "$code" in
-               61) log 5 "- Transient cert requested" ;;
-               62) log 5 "- Authorized cert required" ;;
-               63) log 5 "- Cert not accepted" ;;
-               64) log 5 "- Future cert rejected" ;;
-               65) log 5 "- Expired cert rejected" ;;
-               *) log 2 "- Unknown response code: '$code'." ;;
-               esac
-               exit "$code"
+       5*)
+               REDIRECTS=0
+               die "$((100 + code))" "$code"
                ;;
-       *) # ???
-               die "$code" "Unknown response code: '$code'."
+       6*)
+               REDIRECTS=0
+               die "$((100 + code))" "$code"
                ;;
+       *) die "$((100 + code)) Unknown response code: $code." ;;
        esac
 }
 
-# display the page
-display() { # display META < DOCUMENT
+display() {
        case "$1" in
        *\;*)
                mime="$(cut -d\; -f1 <<<"$1" | trim)"
@@ -297,125 +266,147 @@ display() { # display META < DOCUMENT
                ;;
        *) mime="$(trim <<<"$1")" ;;
        esac
-       [ -z "$mime" ] && mime="text/gemini"
-       if [ -z "$charset" ]; then
+
+       [[ -z "$mime" ]] && mime="text/gemini"
+       if [[ -z "$charset" ]]; then
                charset="utf-8"
        else
-               charset="${charset#*=}"
+               charset="${charset#charset=}"
        fi
 
-       log 5 "mime=$mime; charset=$charset"
+       log debug "mime=$mime; charset=$charset"
 
        case "$mime" in
-       text/gemini)
-               lc="/tmp/bollux-currentpage.gmi" # link copy
-               lfn | typeset_gemini | tee "$lc" | less -R ||
-                       cat "$lc" # TODO list out links on success
-               # lesskey:
-               # o #> Open a link (quit 1)
-               # q #> Quit (quit 0)
-               # cf. also prompt & filename
+       text/*)
+               less_cmd=(less -R)
+               {
+                       [[ -r "$BOLLUX_LESSKEY" ]] || mklesskey "$BOLLUX_LESSKEY"
+               } && less_cmd+=(-k "$BOLLUX_LESSKEY")
+
+               submime="${mime#*/}"
+               if declare -F | grep -q "$submime"; then
+                       log d "typeset_$submime"
+                       {
+                               normalize_crlf |
+                                       run "typeset_$submime" |
+                                       tee "$BOLLUX_PAGESRC" |
+                                       run "${less_cmd[@]}"
+                       } || run handle_keypress "$?"
+               else
+                       log "cat"
+                       {
+                               normalize_crlf |
+                                       tee "$BOLLUX_PAGESRC" |
+                                       run "${less_cmd[@]}"
+                       } || run handle_keypress "$?"
+               fi
                ;;
-       text/*) lfn ;;
-       *) download "$URL" ;;
+       *) run download "$BOLLUX_URL" ;;
        esac
 }
 
-# normalize line endings to \n (LF)
-lfn() {
+mklesskey() {
+       lesskey -o "$1" - <<-END
+               #command
+               o quit 0 # 48 open a link
+               g quit 1 # 49 goto a url
+               [ quit 2 # 50 back
+               ] quit 3 # 51 forward
+               r quit 4 # 52 re-request / download
+       END
+}
+
+normalize_crlf() {
        gawk 'BEGIN{RS="\n\n"}{gsub(/\r\n?/,"\n");print;print ""}'
 }
 
-# typeset text
-typeset_gemini() { # typeset_gemini < INPUT
+typeset_gemini() {
        gawk '
        BEGIN { pre = 0 }
        /^###/ { sub(/^#+[[:space:]]*/, ""); 
-               printf " \033[3m%s\033[0m\n", $0
+               printf "### \033[3m%s\033[0m\n", $0
        next }
        /^##/  { sub(/^#+[[:space:]]*/, ""); 
-               printf " \033[1m%s\033[0m\n", $0
+               printf "##  \033[1m%s\033[0m\n", $0
        next }
        /^#/   { sub(/^#+[[:space:]]*/, ""); 
-               printf " \033[1;4m%s\033[0m\n", $0
+               printf "#   \033[1;4m%s\033[0m\n", $0
        next }
        /^=>/  { 
                sub(/=>[[:space:]]*/, "")
                url = $1; desc = ""
                for (w=2;w<=NF;w++) 
                        desc = desc (desc?" ":"") $w
-               printf " \033[1m[%s]\033[0m \033[4m%s\033[0m \033[36m%s\033[0m\n", 
-                       (++ln), desc, "(" url ")"
+               printf "=>  \033[1m[%02d]\033[0m \033[4m%s\033[0m\t\033[36m%s\033[0m\n", 
+                       (++ln), desc, url
        next }
-       # /^\*/  { sub(/\*[[:space:]]*/, ""); }
        /```/  { pre = !pre; next }
+       pre { printf "``` %s\n", $0; next }
+       # /^\*/  { sub(/\*[[:space:]]*/, ""); }
        { sub(/^/, " "); print }
        '
 }
 
-download() { # download URL < FILE
-       tn="$(mktemp)"
-       dd status=progress >"$tn"
-       fn="$DLDR/${URL##*/}"
-       if [[ -f "$fn" ]]; then
-               log - "Saved '$tn'."
-       else
-               if mv "$tn" "$fn"; then
-                       log - "Saved '$fn'."
-               else
-                       log 0 "Error saving '$fn'."
-                       log - "Saved '$tn'."
-               fi
-       fi
+handle_keypress() {
+       case "$1" in
+       48) # o - open a link -- show a menu of links on the page
+               run select_url "$BOLLUX_PAGESRC"
+               ;;
+       49) # g - goto a url -- input a new url
+               prompt GO URL
+               run blastoff -u "$URL"
+               ;;
+       50) # [ - back in the history
+               run history_back
+               ;;
+       51) # ] - forward in the history
+               run history_forward
+               ;;
+       52) # r - re-request the current resource
+               run blastoff "$BOLLUX_URL"
+               ;;
+       *) # 53-57 -- still available for binding
+               ;;
+       esac
 }
 
-### main entry point ###
-bollux() {
-       OPTIND=0
-       process_cmdline "$@"
-       shift $((OPTIND - 1))
-
-       if (($# == 1)); then
-               URL="$1"
-       else
-               ask GO URL
-       fi
-
-       log 5 "URL : $URL"
-
-       request "$URL" | handle "$URL"
+select_url() {
+       run mapfile -t < <(extract_links <"$1")
+       select u in "${MAPFILE[@]}"; do
+               run blastoff "$(gawk '{print $1}' <<<"$u")" && break
+       done </dev/tty
 }
 
-process_cmdline() {
-       while getopts :hL: OPT; do
-               case "$OPT" in
-               h) bollux_usage ;;
-               L) LOGL="$OPTARG" ;;
-               :) die 1 "Option -$OPTARG requires an argument" ;;
-               *) die 1 "Unknown option: -$OPTARG" ;;
-               esac
-       done
+extract_links() {
+       gawk -F$'\t' '/^=>/ {
+               gsub("\033\\[[^m]*m", "")
+               sub(/=>[[:space:]]*\[[0-9]+\][[:space:]]*/,"")
+               if ($2) 
+                       printf "%s (\033[34m%s\033[0m)\n", $2, $1
+               else
+                       printf "%s\n", $1
+       }'
 }
 
-bollux_setup() {
-       trap bollux_cleanup INT QUIT EXIT
+download() {
+       tn="$(mktemp)"
+       log x "Downloading: '$BOLLUX_URL' => '$tn'..."
+       dd status=progress >"$tn"
+       fn="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}"
+       if [[ -f "$fn" ]]; then
+               log x "Saved '$tn'."
+       elif mv "$tn" "$fn"; then
+               log x "Saved '$fn'."
+       else
+               log error "Error saving '$fn': downloaded to '$tn'."
+       fi
 }
 
-bollux_cleanup() {
-       exit $?
-}
+history_back() { log error "Not implemented."; }
+history_forward() { log error "Not implemented."; }
 
 if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
-       # requirements here -- so they're only checked once
-       require gawk
-       require dd
-       require mv
-       require openssl
-       require sed
-
-       bollux_setup
-
-       bollux "$@"
-
-       bollux_cleanup
+       run bollux "$@"
+else
+       BOLLUX_LOGLEVEL=DEBUG
 fi