diff --git a/lib/uri.ml b/lib/uri.ml index 29d9c9a..b1d428a 100644 --- a/lib/uri.ml +++ b/lib/uri.ml @@ -60,7 +60,7 @@ let rec compare_list f t t' = match t, t' with | x::xs, y::ys -> match f x y with 0 -> compare_list f xs ys | c -> c -(** Safe characters that are always allowed in a URI +(** Safe characters that are always allowed in a URI * Unfortunately, this varies depending on which bit of the URI * is being parsed, so there are multiple variants (and this * set is probably not exhaustive. TODO: check. @@ -74,7 +74,7 @@ module type Scheme = sig val canonicalize_path : string list -> string list end -module Generic : Scheme = struct +module Generic = struct let sub_delims a = let subd = "!$&'()*+,;=" in for i = 0 to String.length subd - 1 do @@ -83,7 +83,7 @@ module Generic : Scheme = struct done; a - let safe_chars : safe_chars = + let safe_chars : safe_chars = let a = Array.make 256 false in let always_safe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~" in @@ -201,12 +201,28 @@ module Urn : Scheme = struct end +module Aws : Scheme = struct + include Http + + let safe_chars_query_key = + let a = Array.copy Generic.safe_chars in + a.(Char.code '/') <- true; + a + + let safe_chars_for_component = function + | `Query_key -> safe_chars_query_key + | `Query_value -> Generic.safe_chars + | `Query -> Http.safe_chars_for_component `Query + | x -> Http.safe_chars_for_component x +end + let module_of_scheme = function | Some s -> begin match String.lowercase s with | "http" -> (module Http : Scheme) | "https" -> (module Https : Scheme) | "file" -> (module File : Scheme) | "urn" -> (module Urn : Scheme) + | "aws" -> (module Aws : Scheme) | _ -> (module Generic : Scheme) end | None -> (module Generic : Scheme) @@ -215,10 +231,10 @@ let module_of_scheme = function * and this really, really shouldn't be mixed up. So this Pct module * defines abstract Pct.encoded and Pct.decoded types which sets the * state of the underlying string. There are functions to "cast" to - * and from these and normal strings, and this promotes a bit of - * internal safety. These types are not exposed to the external + * and from these and normal strings, and this promotes a bit of + * internal safety. These types are not exposed to the external * interface, as casting to-and-from is quite a bit of hassle and - * probably not a lot of use to the average consumer of this library + * probably not a lot of use to the average consumer of this library *) module Pct : sig type encoded with sexp @@ -255,7 +271,7 @@ end = struct let unlift_decoded f = f let unlift_decoded2 f = f - (** Scan for reserved characters and replace them with + (** Scan for reserved characters and replace them with percent-encoded equivalents. @return a percent-encoded string *) let encode ?scheme ?(component=`Path) b = @@ -509,6 +525,7 @@ let encoded_of_query ?scheme = Query.encoded_of_query ?scheme (* Type of the URI, with most bits being optional *) type t = { + encoding: Pct.decoded sexp_option; scheme: Pct.decoded sexp_option; userinfo: Userinfo.t sexp_option; host: Pct.decoded sexp_option; @@ -519,6 +536,7 @@ type t = { } with sexp let empty = { + encoding = None; scheme = None; userinfo = None; host = None; @@ -572,7 +590,7 @@ let normalize schem uri = * casting/uncasting (which isn't fully identity due to the option box), but it is * no big deal for now. *) -let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () = +let make ?encoding ?scheme ?userinfo ?host ?port ?path ?query ?fragment () = let decode = function |Some x -> Some (Pct.cast_decoded x) |None -> None in let host = match userinfo, host, port with @@ -593,9 +611,10 @@ let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () = | Some p -> Query.KV p in let scheme = decode scheme in + let encoding = decode encoding in normalize scheme - { scheme; userinfo; - host=decode host; port; path; query; fragment=decode fragment } + { scheme ; encoding ; userinfo ; host=decode host ; port ; path + ; query ; fragment=decode fragment } (** Parse a URI string into a structure *) let of_string s = @@ -645,7 +664,8 @@ let of_string s = | None -> Query.Raw (None, Lazy.from_val []) in let fragment = get_opt subs 9 in - normalize scheme { scheme; userinfo; host; port; path; query; fragment } + normalize scheme { scheme ; userinfo ; host ; port ; path + ; query ; fragment ; encoding=None } (** Convert a URI structure into a percent-encoded string @@ -687,6 +707,10 @@ let to_string uri = Buffer.add_char buf ':'; Buffer.add_string buf (string_of_int port) ); + let scheme = + match uri.encoding with + | None -> scheme + | Some s -> Some (Pct.uncast_decoded s) in (match uri.path with (* Handle relative paths correctly *) | [] -> () | "/"::_ -> @@ -720,11 +744,17 @@ let to_string uri = (* Various accessor functions, as the external uri type is abstract *) let get_decoded_opt = function None -> None |Some x -> Some (Pct.uncast_decoded x) let scheme uri = get_decoded_opt uri.scheme +let encoding uri = get_decoded_opt uri.encoding let with_scheme uri = function |Some scheme -> { uri with scheme=Some (Pct.cast_decoded scheme) } |None -> { uri with scheme=None } +let with_encoding uri = + function + |Some encoding -> { uri with encoding=Some (Pct.cast_decoded encoding) } + |None -> { uri with encoding=None } + let host uri = get_decoded_opt uri.host let with_host uri = function @@ -797,7 +827,7 @@ let query uri = Query.kv uri.query let verbatim_query uri = Query.(match uri.query with | Raw (qs,_) -> qs | KV [] -> None - | KV kv -> Some (encoded_of_query ?scheme:(scheme uri) kv) + | KV kv -> Some (encoded_of_query ?scheme:(encoding uri) kv) ) let get_query_param' uri k = Query.(find (kv uri.query) k) let get_query_param uri k = @@ -823,11 +853,11 @@ let path_and_query uri = match (path uri), (query uri) with |"", [] -> "/" (* TODO: What about same document? (/) *) |"", q -> (* TODO: What about same document? (/) *) - let scheme = uncast_opt uri.scheme in + let scheme = uncast_opt uri.encoding in Printf.sprintf "/?%s" (encoded_of_query ?scheme q) |p, [] -> p |p, q -> - let scheme = uncast_opt uri.scheme in + let scheme = uncast_opt uri.encoding in Printf.sprintf "%s?%s" p (encoded_of_query ?scheme q) (* TODO: functions to add and remove from a URI *) diff --git a/lib/uri.mli b/lib/uri.mli index 4e69555..31a01ef 100644 --- a/lib/uri.mli +++ b/lib/uri.mli @@ -79,9 +79,9 @@ val canonicalize : t -> t supplied without host, an empty host is added. If path is supplied and userinfo, host, or port is also supplied, path is made absolute but not resolved. *) -val make : ?scheme:string -> ?userinfo:string -> ?host:string -> - ?port:int -> ?path:string -> ?query:(string * string list) list -> - ?fragment:string -> unit -> t +val make : ?encoding:string -> ?scheme:string -> ?userinfo:string + -> ?host:string -> ?port:int -> ?path:string + -> ?query:(string * string list) list -> ?fragment:string -> unit -> t (** {3 Query functions } @@ -192,6 +192,8 @@ val scheme : t -> string option Input URI is not modified *) val with_scheme : t -> string option -> t +val with_encoding : t -> string option -> t + (** Get the userinfo component of a URI *) val userinfo : t -> string option diff --git a/lib_test/test_runner.ml b/lib_test/test_runner.ml index 8e1afca..9455a18 100644 --- a/lib_test/test_runner.ml +++ b/lib_test/test_runner.ml @@ -20,7 +20,7 @@ open OUnit open Printf (* Tuples of decoded and encoded strings. The first element is a number to - refer to the test, as the pcts_large version duplicates the second field + refer to the test, as the pcts_large version duplicates the second field to a large size, so it cant be used as the name of the test *) let pcts = [ (1, "hello world!", "hello%20world!"); @@ -46,7 +46,7 @@ let pcts_large = done; (n, Buffer.contents a', Buffer.contents b') ) pcts - + (* Tuple of string URI and the decoded version *) let uri_encodes = [ "https://user:pass@foo.com:123/wh/at/ever?foo=1&bar=5#5", @@ -86,7 +86,7 @@ let map_pcts_tests size name test args = ) args let test_pct_small = - (map_pcts_tests "small" "encode" (fun a b -> b, (Uri.pct_encode a)) pcts) @ + (map_pcts_tests "small" "encode" (fun a b -> b, (Uri.pct_encode a)) pcts) @ (map_pcts_tests "small" "decode" (fun a b -> (Uri.pct_decode b), a) pcts) let test_pct_large = @@ -608,6 +608,50 @@ let test_canonicalize = ) ) canonical_map +let aws_encode_val = + let safe_chars = + let a = Array.make 256 false in + let always_safe = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~" in + for i = 0 to String.length always_safe - 1 do + let c = Char.code always_safe.[i] in + a.(c) <- true + done; + a + in + fun s -> + let len = (String.length s) in + let b = Buffer.create len in + for i = 0 to len - 1 do + let code = Char.code s.[i] in + if safe_chars.(code) then + Buffer.add_char b s.[i] + else + Buffer.add_string b (Printf.sprintf "%%%02X" code); + done; + Buffer.contents b + +let aws_encode kv = + let kvs = List.map (fun (x, y) -> x ^ "=" ^ aws_encode_val y) kv in + String.concat "&" kvs + +let test_aws_encode = + let f q = + let query = List.map (fun (k, v) -> (k, [v])) q in + let u = Uri.make ~encoding:"aws" ~query () in + Uri.path_and_query u + in + let test q = + let q' = "/?" ^ aws_encode q in + (q, q') + in + List.map (fun (in_, out) -> + out >:: (fun () -> assert_equal ~printer:(fun x -> x) out (f in_))) + [ test [ "q","*" ] + ; test [ "q","star wars" + ; "q.options", "{fields: ['title^5.0','description']}"] + ] + (* Returns true if the result list contains successes only. Copied from oUnit source as it isnt exposed by the mli *) let rec was_successful = @@ -639,6 +683,7 @@ let _ = @ test_sexping @ test_with_change @ test_canonicalize + @ test_aws_encode ) in let verbose = ref false in let set_verbose _ = verbose := true in