e9c9fc99b9b76614d72f29669414c55de68280cc
[bollux.git/.git] / transform_uri.sh
1 #!/usr/bin/env bash
2 # transform-url
3 # cf. https://tools.ietf.org/html/rfc3986#section-5 and
4 # cf. https://tools.ietf.org/html/rfc3986#section-5.1
5 # cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex
6
7 # TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4
8
9 transform_resource() { # 5.2.2
10         declare -A R B T           # reference, base url, target
11         eval "$(parse_url R "$2")" # XXX CHANGE
12         eval "$(parse_url B "$1")"
13         # Basically going to follow the pseudocode in the spec.
14         # the '+x' bit after the fields of the arrays tests if they're set
15         if [[ "${R['scheme']+x}" ]]; then
16                 T['scheme']="${R['scheme']}"
17                 T['authority']="${R['authority']}"
18                 T['path']="$(remove_dot_segments "${R['path']}")"
19                 T['query']="${R['query']}"
20         else
21                 if [[ "${R['authority']+x}" ]]; then
22                         T['authority']="${R['authority']}"
23                         T['path']="$(remove_dot_segments "${R['path']}")"
24                         T['query']="${R['query']}"
25                 else
26                         if [[ "${R['path']-x}" == "" ]]; then
27                                 T['path']="${B['path']}"
28                                 if [[ "${R['query']-x}" ]]; then
29                                         T['query']="${R['query']}"
30                                 else
31                                         T['query']="${B['query']}"
32                                 fi
33                         else
34                                 if [[ "${R['path']}" == /* ]]; then
35                                         T['path']="$(remove_dot_segments "${R['path']}")"
36                                 else
37                                         T['path']="$(merge "${B['authority']-?}" \
38                                                 "${B['path']}" "${R['path']}")"
39                                         T['path']="$(remove_dot_segments "${T['path']}")"
40                                 fi
41                                 T['query']="${R['query']}"
42                         fi
43                         T['authority']="${B['authority']}"
44                 fi
45                 T['scheme']="${B['scheme']}"
46         fi
47         T['fragment']="${R['fragment']}"
48         # 5.3 -- recomposition
49         local r=""
50         [[ "${T['scheme']-x}" ]] &&
51                 r="$r${T['scheme']}:"
52         [[ "${T['authority']-x}" ]] &&
53                 r="$r//${T['authority']}"
54         r="$r${T['path']}"
55         [[ "${T['query']-x}" ]] &&
56                 r="$r?${T['query']}"
57         [[ "${T['fragment']-x}" ]] &&
58                 r="$r#${T['fragment']}"
59         printf '%s\n' "$r"
60 }
61
62 merge() { # 5.2.3
63         #>If the base URI has a defined authority component and an empty
64         #>path, then return a string consisting of "/" concatenated with the
65         #>reference's path; otherwise,
66         #>return a string consisting of the reference's path component
67         #>appended to all but the last segment of the base URI's path (i.e.,
68         #>excluding any characters after the right-most "/" in the base URI
69         #>path, or excluding the entire base URI path if it does not contain
70         #>any "/" characters).
71         B_authority="$1" # if ? is here, it means undefined (see caller)
72         B_path="$2"
73         R_path="$3"
74         if [[ -z "$R_path" ]]; then
75                 printf '%q\n' "$B_path" |
76                         sed 's,//,/,g' # XXX is this okay....?
77                 return
78         fi
79
80         if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then
81                 printf '/%q\n' "$R_path"
82         else
83                 if [[ "$B_path" == */* ]]; then
84                         B_path="${B_path%/*}/"
85                 else
86                         B_path=""
87                 fi
88                 printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s
89         fi
90 }
91
92 # I can probably just use normalize_path already in bollux here
93 remove_dot_segments() { # 5.2.4
94         local input="$1"
95         local output=
96         while [[ -n "$input" ]]; do
97                 if [[ "$input" == ../* || "$input" == ./* ]]; then
98                         input="${input#*/}"
99                 elif [[ "$input" == /./* ]]; then
100                         input="${input#/./}/"
101                 elif [[ "$input" == /.* ]]; then
102                         input="${input#/.}/b"
103                 elif [[ "$input" == /../* ]]; then
104                         input="${input#/../}/c"
105                         output="${output%/*}"
106                 elif [[ "$input" == /..* ]]; then
107                         input="${input#/..}/d"
108                         output="${output%/*}"
109                 elif [[ "$input" == . || "$input" == .. ]]; then
110                         input=
111                 else
112                         # move the first path segment in the input buffer to the end of
113                         # the output buffer, including the initial "/" character (if
114                         # any) and any subsequent characters up to, but not including,
115                         # the next "/" character or the end of the input buffer.
116                         [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
117                         output="$output${BASH_REMATCH[1]}"
118                         input="${BASH_REMATCH[2]}"
119                 fi
120         done
121         printf '%s\n' "$output" |
122                 sed 's,//,/,g' # XXX is this okay....?
123 }
124
125 # *FINDING* URLS ... IN PURE BASH !!!
126 parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
127         local name="$1"
128         local string="$2"
129         local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
130         [[ $string =~ $re ]] || return $?
131
132         local scheme="${BASH_REMATCH[2]}"
133         local authority="${BASH_REMATCH[4]}"
134         local path="${BASH_REMATCH[5]}"
135         local query="${BASH_REMATCH[7]}"
136         local fragment="${BASH_REMATCH[9]}"
137
138         for c in scheme authority path query fragment; do
139                 [[ "${!c}" ]] &&
140                         printf '%s[%s]=%s\n' "$name" "$c" "${!c}" |
141                         sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters
142         done
143 }
144
145 # ease-of-life functions
146 isdefined() { # isdefined NAME => tests if NAME is defined ONLY
147         [[ "${!1+x}" ]]
148 }
149 isempty() { # isempty NAME => tests if NAME is empty ONLY
150         [[ ! "${!1-x}" ]]
151 }
152
153 set -x
154 transform_resource "$@"
155
156 # NEXT ....
157 # NORMALIZATION !!!