Add URL normalizing functions for use later
authorCase Duckworth <acdw@acdw.net>
Sun, 24 May 2020 00:46:58 +0000 (19:46 -0500)
committerCase Duckworth <acdw@acdw.net>
Sun, 24 May 2020 00:46:58 +0000 (19:46 -0500)
bollux

diff --git a/bollux b/bollux
index b9d1443..50b4a81 100755 (executable)
--- a/bollux
+++ b/bollux
@@ -105,7 +105,62 @@ NOT_IMPLEMENTED() { die 200 "NOT IMPLEMENTED!!!"; }
 NOT_FULLY_IMPLEMENTED() { log 1 "NOT FULLY IMPLEMENTED!!!"; }
 
 ### gemini ###
-# normalize a gemini address
+# url functions
+# normalize a path from /../ /./ /
+normalize_path() { # normalize_path <<< PATH
+       gawk '{
+       if ($0 == "" || $0 ~ /^\/\/[^\/]/) {
+               return -1
+       }
+       split($0, path, /\//)
+       for (c in path) {
+               if (path[c] == "" || path[c] == ".") {
+                       continue
+               }
+               if (path[c] == "..") {
+                       sub(/[^\/]+$/, "", ret)
+                       continue
+               }
+               if (! ret || match(ret, /\/$/)) {
+                       slash = ""
+               } else {
+                       slash = "/"
+               }
+               ret = ret slash path[c]
+       }
+       print ret
+       }'
+}
+
+# split a url into the URL array
+split_url() {
+       gawk '{
+       if (match($0, /^[A-Za-z]+:/)) {
+               arr["scheme"] = substr($0, RSTART, RLENGTH)
+               $0 = substr($0, RLENGTH + 1)
+       }
+       if (match($0, /^\/\/[^\/?#]+?/) || (match($0, /^[^\/?#]+?/) && scheme)) {
+               arr["authority"] = substr($0, RSTART, RLENGTH)
+               $0 = substr($0, RLENGTH + 1)
+       }
+       if (match($0, /^\/?[^?#]+/)) {
+               arr["path"] = substr($0, RSTART, RLENGTH)
+               $0 = substr($0, RLENGTH + 1)
+       }
+       if (match($0, /^\?[^#]+/)) {
+               arr["query"] = substr($0, RSTART, RLENGTH)
+               $0 = substr($0, RLENGTH + 1)
+       }
+       if (match($0, /^#.*/)) {
+               arr["fragment"] = substr($0, RSTART, RLENGTH)
+               $0 = substr($0, RLENGTH + 1)
+       }
+       for (part in arr) {
+               printf "URL[\"%s\"]=\"%s\"\n", part, arr[part]
+       }
+       }'
+}
+
 # example.com => gemini://example.com/
 _address() { # _address URL
        addr="$1"