3 # cf. https://tools.ietf.org/html/rfc3986#section-5 and
4 # cf. https://tools.ietf.org/html/rfc3986#section-5.1
5 # cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex
7 # TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4
9 transform_resource() { # 5.2.2
10 declare -A R B T # reference, base url, target
11 eval "$(parse_url R "$2")" # XXX CHANGE
12 eval "$(parse_url B "$1")"
13 # Basically going to follow the pseudocode in the spec.
14 # the '+x' bit after the fields of the arrays tests if they're set
15 if [[ "${R['scheme']+x}" ]]; then
16 T['scheme']="${R['scheme']}"
17 T['authority']="${R['authority']}"
18 T['path']="$(remove_dot_segments "${R['path']}")"
19 T['query']="${R['query']}"
21 if [[ "${R['authority']+x}" ]]; then
22 T['authority']="${R['authority']}"
23 T['path']="$(remove_dot_segments "${R['path']}")"
24 T['query']="${R['query']}"
26 if [[ "${R['path']-x}" == "" ]]; then
27 T['path']="${B['path']}"
28 if [[ "${R['query']-x}" ]]; then
29 T['query']="${R['query']}"
31 T['query']="${B['query']}"
34 if [[ "${R['path']}" == /* ]]; then
35 T['path']="$(remove_dot_segments "${R['path']}")"
37 T['path']="$(merge "${B['authority']-?}" \
38 "${B['path']}" "${R['path']}")"
39 T['path']="$(remove_dot_segments "${T['path']}")"
41 T['query']="${R['query']}"
43 T['authority']="${B['authority']}"
45 T['scheme']="${B['scheme']}"
47 T['fragment']="${R['fragment']}"
48 # 5.3 -- recomposition
50 [[ "${T['scheme']-x}" ]] &&
52 [[ "${T['authority']-x}" ]] &&
53 r="$r//${T['authority']}"
55 [[ "${T['query']-x}" ]] &&
57 [[ "${T['fragment']-x}" ]] &&
58 r="$r#${T['fragment']}"
63 #>If the base URI has a defined authority component and an empty
64 #>path, then return a string consisting of "/" concatenated with the
65 #>reference's path; otherwise,
66 #>return a string consisting of the reference's path component
67 #>appended to all but the last segment of the base URI's path (i.e.,
68 #>excluding any characters after the right-most "/" in the base URI
69 #>path, or excluding the entire base URI path if it does not contain
70 #>any "/" characters).
71 B_authority="$1" # if ? is here, it means undefined (see caller)
74 if [[ -z "$R_path" ]]; then
75 printf '%q\n' "$B_path" |
76 sed 's,//,/,g' # XXX is this okay....?
80 if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then
81 printf '/%q\n' "$R_path"
83 if [[ "$B_path" == */* ]]; then
84 B_path="${B_path%/*}/"
88 printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s
92 # I can probably just use normalize_path already in bollux here
93 remove_dot_segments() { # 5.2.4
96 while [[ -n "$input" ]]; do
97 if [[ "$input" == ../* || "$input" == ./* ]]; then
99 elif [[ "$input" == /./* ]]; then
100 input="${input#/./}/"
101 elif [[ "$input" == /.* ]]; then
102 input="${input#/.}/b"
103 elif [[ "$input" == /../* ]]; then
104 input="${input#/../}/c"
105 output="${output%/*}"
106 elif [[ "$input" == /..* ]]; then
107 input="${input#/..}/d"
108 output="${output%/*}"
109 elif [[ "$input" == . || "$input" == .. ]]; then
112 # move the first path segment in the input buffer to the end of
113 # the output buffer, including the initial "/" character (if
114 # any) and any subsequent characters up to, but not including,
115 # the next "/" character or the end of the input buffer.
116 [[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
117 output="$output${BASH_REMATCH[1]}"
118 input="${BASH_REMATCH[2]}"
121 printf '%s\n' "$output" |
122 sed 's,//,/,g' # XXX is this okay....?
125 # *FINDING* URLS ... IN PURE BASH !!!
126 parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
129 local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
130 [[ $string =~ $re ]] || return $?
132 local scheme="${BASH_REMATCH[2]}"
133 local authority="${BASH_REMATCH[4]}"
134 local path="${BASH_REMATCH[5]}"
135 local query="${BASH_REMATCH[7]}"
136 local fragment="${BASH_REMATCH[9]}"
138 for c in scheme authority path query fragment; do
140 printf '%s[%s]=%s\n' "$name" "$c" "${!c}" |
141 sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters
145 # ease-of-life functions
146 isdefined() { # isdefined NAME => tests if NAME is defined ONLY
149 isempty() { # isempty NAME => tests if NAME is empty ONLY
154 transform_resource "$@"