Skip to content

Commit 3b34d25

Browse files
committed
Allow configuration of query encoding in uri's
Uri.t can now be created with an optional `query_scheme` parameter that controls the encoding of the query string. If it's omitted then the encoding defaults to the uri's own scheme. An implementation is given for query encoding according to amazon's standards to be used when writing clients for AWS.
1 parent af0b30e commit 3b34d25

File tree

3 files changed

+100
-11
lines changed

3 files changed

+100
-11
lines changed

lib/uri.ml

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ module type Scheme = sig
7474
val canonicalize_path : string list -> string list
7575
end
7676

77-
module Generic : Scheme = struct
77+
module Generic = struct
7878
let sub_delims a =
7979
let subd = "!$&'()*+,;=" in
8080
for i = 0 to String.length subd - 1 do
@@ -201,12 +201,32 @@ module Urn : Scheme = struct
201201

202202
end
203203

204+
module Aws : Scheme = struct
205+
include Http
206+
207+
let safe_chars_query_key =
208+
let a = Array.copy Generic.safe_chars in
209+
a.(Char.code '/') <- true;
210+
a
211+
212+
let safe_chars_for_component = function
213+
| `Query_key -> safe_chars_query_key
214+
| `Query_value -> Generic.safe_chars
215+
| `Query -> Http.safe_chars_for_component `Query
216+
| `Path
217+
| `Userinfo
218+
| `Fragment
219+
| `Scheme -> failwith "Aws scheme is only for query encoding"
220+
| x -> Http.safe_chars_for_component x
221+
end
222+
204223
let module_of_scheme = function
205224
| Some s -> begin match String.lowercase s with
206225
| "http" -> (module Http : Scheme)
207226
| "https" -> (module Https : Scheme)
208227
| "file" -> (module File : Scheme)
209228
| "urn" -> (module Urn : Scheme)
229+
| "aws" -> (module Aws : Scheme)
210230
| _ -> (module Generic : Scheme)
211231
end
212232
| None -> (module Generic : Scheme)
@@ -509,6 +529,7 @@ let encoded_of_query ?scheme = Query.encoded_of_query ?scheme
509529

510530
(* Type of the URI, with most bits being optional *)
511531
type t = {
532+
query_scheme: Pct.decoded sexp_option;
512533
scheme: Pct.decoded sexp_option;
513534
userinfo: Userinfo.t sexp_option;
514535
host: Pct.decoded sexp_option;
@@ -519,6 +540,7 @@ type t = {
519540
} with sexp
520541

521542
let empty = {
543+
query_scheme = None;
522544
scheme = None;
523545
userinfo = None;
524546
host = None;
@@ -572,7 +594,7 @@ let normalize schem uri =
572594
* casting/uncasting (which isn't fully identity due to the option box), but it is
573595
* no big deal for now.
574596
*)
575-
let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
597+
let make ?query_scheme ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
576598
let decode = function
577599
|Some x -> Some (Pct.cast_decoded x) |None -> None in
578600
let host = match userinfo, host, port with
@@ -593,9 +615,10 @@ let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
593615
| Some p -> Query.KV p
594616
in
595617
let scheme = decode scheme in
618+
let query_scheme = decode query_scheme in
596619
normalize scheme
597-
{ scheme; userinfo;
598-
host=decode host; port; path; query; fragment=decode fragment }
620+
{ scheme ; query_scheme ; userinfo ; host=decode host ; port ; path
621+
; query ; fragment=decode fragment }
599622

600623
(** Parse a URI string into a structure *)
601624
let of_string s =
@@ -645,7 +668,8 @@ let of_string s =
645668
| None -> Query.Raw (None, Lazy.from_val [])
646669
in
647670
let fragment = get_opt subs 9 in
648-
normalize scheme { scheme; userinfo; host; port; path; query; fragment }
671+
normalize scheme { query_scheme=None ; scheme ; userinfo ; host ; port
672+
; path ; query ; fragment }
649673

650674
(** Convert a URI structure into a percent-encoded string
651675
<http://tools.ietf.org/html/rfc3986#section-5.3>
@@ -687,6 +711,14 @@ let to_string uri =
687711
Buffer.add_char buf ':';
688712
Buffer.add_string buf (string_of_int port)
689713
);
714+
let scheme =
715+
match uri.query_scheme with
716+
| None -> scheme
717+
| Some s ->
718+
let s' = Pct.uncast_decoded s in
719+
(if s' = "aws" then
720+
failwith @@ Printf.sprintf "using scheme %s\n" s');
721+
Some (Pct.uncast_decoded s) in
690722
(match uri.path with (* Handle relative paths correctly *)
691723
| [] -> ()
692724
| "/"::_ ->
@@ -725,6 +757,11 @@ let with_scheme uri =
725757
|Some scheme -> { uri with scheme=Some (Pct.cast_decoded scheme) }
726758
|None -> { uri with scheme=None }
727759

760+
let with_query_scheme uri =
761+
function
762+
|Some scheme -> { uri with query_scheme=Some (Pct.cast_decoded scheme) }
763+
|None -> { uri with query_scheme=None }
764+
728765
let host uri = get_decoded_opt uri.host
729766
let with_host uri =
730767
function
@@ -818,16 +855,21 @@ let remove_query_param uri k = Query.(
818855
{ uri with query=KV (List.filter (fun (k',_) -> k<>k') (kv uri.query)) }
819856
)
820857

858+
let q_scheme uri =
859+
match uri.query_scheme with
860+
| None -> uri.scheme
861+
| t -> t
862+
821863
(* Construct encoded path and query components *)
822864
let path_and_query uri =
823865
match (path uri), (query uri) with
824866
|"", [] -> "/" (* TODO: What about same document? (/) *)
825867
|"", q -> (* TODO: What about same document? (/) *)
826-
let scheme = uncast_opt uri.scheme in
868+
let scheme = uncast_opt (q_scheme uri) in
827869
Printf.sprintf "/?%s" (encoded_of_query ?scheme q)
828870
|p, [] -> p
829871
|p, q ->
830-
let scheme = uncast_opt uri.scheme in
872+
let scheme = uncast_opt (q_scheme uri) in
831873
Printf.sprintf "%s?%s" p (encoded_of_query ?scheme q)
832874

833875
(* TODO: functions to add and remove from a URI *)

lib/uri.mli

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ val canonicalize : t -> t
7979
supplied without host, an empty host is added. If path is supplied
8080
and userinfo, host, or port is also supplied, path is made
8181
absolute but not resolved. *)
82-
val make : ?scheme:string -> ?userinfo:string -> ?host:string ->
82+
val make : ?query_scheme:string -> ?scheme:string -> ?userinfo:string -> ?host:string ->
8383
?port:int -> ?path:string -> ?query:(string * string list) list ->
8484
?fragment:string -> unit -> t
8585

@@ -192,6 +192,8 @@ val scheme : t -> string option
192192
Input URI is not modified *)
193193
val with_scheme : t -> string option -> t
194194

195+
val with_query_scheme : t -> string option -> t
196+
195197
(** Get the userinfo component of a URI *)
196198
val userinfo : t -> string option
197199

lib_test/test_runner.ml

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ open OUnit
2020
open Printf
2121

2222
(* Tuples of decoded and encoded strings. The first element is a number to
23-
refer to the test, as the pcts_large version duplicates the second field
23+
refer to the test, as the pcts_large version duplicates the second field
2424
to a large size, so it cant be used as the name of the test *)
2525
let pcts = [
2626
(1, "hello world!", "hello%20world!");
@@ -46,7 +46,7 @@ let pcts_large =
4646
done;
4747
(n, Buffer.contents a', Buffer.contents b')
4848
) pcts
49-
49+
5050
(* Tuple of string URI and the decoded version *)
5151
let uri_encodes = [
5252
"https://user:[email protected]:123/wh/at/ever?foo=1&bar=5#5",
@@ -86,7 +86,7 @@ let map_pcts_tests size name test args =
8686
) args
8787

8888
let test_pct_small =
89-
(map_pcts_tests "small" "encode" (fun a b -> b, (Uri.pct_encode a)) pcts) @
89+
(map_pcts_tests "small" "encode" (fun a b -> b, (Uri.pct_encode a)) pcts) @
9090
(map_pcts_tests "small" "decode" (fun a b -> (Uri.pct_decode b), a) pcts)
9191

9292
let test_pct_large =
@@ -608,6 +608,50 @@ let test_canonicalize =
608608
)
609609
) canonical_map
610610

611+
let aws_encode_val =
612+
let safe_chars =
613+
let a = Array.make 256 false in
614+
let always_safe =
615+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~" in
616+
for i = 0 to String.length always_safe - 1 do
617+
let c = Char.code always_safe.[i] in
618+
a.(c) <- true
619+
done;
620+
a
621+
in
622+
fun s ->
623+
let len = (String.length s) in
624+
let b = Buffer.create len in
625+
for i = 0 to len - 1 do
626+
let code = Char.code s.[i] in
627+
if safe_chars.(code) then
628+
Buffer.add_char b s.[i]
629+
else
630+
Buffer.add_string b (Printf.sprintf "%%%02X" code);
631+
done;
632+
Buffer.contents b
633+
634+
let aws_encode kv =
635+
kv
636+
|> List.map (fun (x, y) -> x ^ "=" ^ aws_encode_val y)
637+
|> String.concat "&"
638+
639+
let test_aws_encode =
640+
let f q =
641+
let query = q |> List.map (fun (k, v) -> (k, [v])) in
642+
let u = Uri.make ~query_scheme:"aws" ~query () in
643+
Uri.path_and_query u
644+
in
645+
let test q =
646+
let q' = "/?" ^ aws_encode q in
647+
(q, q')
648+
in
649+
[ test ["q","*"]
650+
; test ["q","star wars"; "q.options", "{fields: ['title^5.0','description']}"]
651+
] |> List.map (fun (in_, out) ->
652+
out >:: (fun () -> assert_equal ~printer:(fun x -> x) out (f in_))
653+
)
654+
611655
(* Returns true if the result list contains successes only.
612656
Copied from oUnit source as it isnt exposed by the mli *)
613657
let rec was_successful =
@@ -639,6 +683,7 @@ let _ =
639683
@ test_sexping
640684
@ test_with_change
641685
@ test_canonicalize
686+
@ test_aws_encode
642687
) in
643688
let verbose = ref false in
644689
let set_verbose _ = verbose := true in

0 commit comments

Comments
 (0)