Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 44 additions & 14 deletions lib/uri.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ let rec compare_list f t t' = match t, t' with
| x::xs, y::ys ->
match f x y with 0 -> compare_list f xs ys | c -> c

(** Safe characters that are always allowed in a URI
(** Safe characters that are always allowed in a URI
* Unfortunately, this varies depending on which bit of the URI
* is being parsed, so there are multiple variants (and this
* set is probably not exhaustive. TODO: check.
Expand All @@ -74,7 +74,7 @@ module type Scheme = sig
val canonicalize_path : string list -> string list
end

module Generic : Scheme = struct
module Generic = struct
let sub_delims a =
let subd = "!$&'()*+,;=" in
for i = 0 to String.length subd - 1 do
Expand All @@ -83,7 +83,7 @@ module Generic : Scheme = struct
done;
a

let safe_chars : safe_chars =
let safe_chars : safe_chars =
let a = Array.make 256 false in
let always_safe =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~" in
Expand Down Expand Up @@ -201,12 +201,28 @@ module Urn : Scheme = struct

end

module Aws : Scheme = struct
include Http

let safe_chars_query_key =
let a = Array.copy Generic.safe_chars in
a.(Char.code '/') <- true;
a

let safe_chars_for_component = function
| `Query_key -> safe_chars_query_key
| `Query_value -> Generic.safe_chars
| `Query -> Http.safe_chars_for_component `Query
| x -> Http.safe_chars_for_component x
end

let module_of_scheme = function
| Some s -> begin match String.lowercase s with
| "http" -> (module Http : Scheme)
| "https" -> (module Https : Scheme)
| "file" -> (module File : Scheme)
| "urn" -> (module Urn : Scheme)
| "aws" -> (module Aws : Scheme)
| _ -> (module Generic : Scheme)
end
| None -> (module Generic : Scheme)
Expand All @@ -215,10 +231,10 @@ let module_of_scheme = function
* and this really, really shouldn't be mixed up. So this Pct module
* defines abstract Pct.encoded and Pct.decoded types which sets the
* state of the underlying string. There are functions to "cast" to
* and from these and normal strings, and this promotes a bit of
* internal safety. These types are not exposed to the external
* and from these and normal strings, and this promotes a bit of
* internal safety. These types are not exposed to the external
* interface, as casting to-and-from is quite a bit of hassle and
* probably not a lot of use to the average consumer of this library
* probably not a lot of use to the average consumer of this library
*)
module Pct : sig
type encoded with sexp
Expand Down Expand Up @@ -255,7 +271,7 @@ end = struct
let unlift_decoded f = f
let unlift_decoded2 f = f

(** Scan for reserved characters and replace them with
(** Scan for reserved characters and replace them with
percent-encoded equivalents.
@return a percent-encoded string *)
let encode ?scheme ?(component=`Path) b =
Expand Down Expand Up @@ -509,6 +525,7 @@ let encoded_of_query ?scheme = Query.encoded_of_query ?scheme

(* Type of the URI, with most bits being optional *)
type t = {
encoding: Pct.decoded sexp_option;
scheme: Pct.decoded sexp_option;
userinfo: Userinfo.t sexp_option;
host: Pct.decoded sexp_option;
Expand All @@ -519,6 +536,7 @@ type t = {
} with sexp

let empty = {
encoding = None;
scheme = None;
userinfo = None;
host = None;
Expand Down Expand Up @@ -572,7 +590,7 @@ let normalize schem uri =
* casting/uncasting (which isn't fully identity due to the option box), but it is
* no big deal for now.
*)
let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
let make ?encoding ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
let decode = function
|Some x -> Some (Pct.cast_decoded x) |None -> None in
let host = match userinfo, host, port with
Expand All @@ -593,9 +611,10 @@ let make ?scheme ?userinfo ?host ?port ?path ?query ?fragment () =
| Some p -> Query.KV p
in
let scheme = decode scheme in
let encoding = decode encoding in
normalize scheme
{ scheme; userinfo;
host=decode host; port; path; query; fragment=decode fragment }
{ scheme ; encoding ; userinfo ; host=decode host ; port ; path
; query ; fragment=decode fragment }

(** Parse a URI string into a structure *)
let of_string s =
Expand Down Expand Up @@ -645,7 +664,8 @@ let of_string s =
| None -> Query.Raw (None, Lazy.from_val [])
in
let fragment = get_opt subs 9 in
normalize scheme { scheme; userinfo; host; port; path; query; fragment }
normalize scheme { scheme ; userinfo ; host ; port ; path
; query ; fragment ; encoding=None }

(** Convert a URI structure into a percent-encoded string
<http://tools.ietf.org/html/rfc3986#section-5.3>
Expand Down Expand Up @@ -687,6 +707,10 @@ let to_string uri =
Buffer.add_char buf ':';
Buffer.add_string buf (string_of_int port)
);
let scheme =
match uri.encoding with
| None -> scheme
| Some s -> Some (Pct.uncast_decoded s) in
(match uri.path with (* Handle relative paths correctly *)
| [] -> ()
| "/"::_ ->
Expand Down Expand Up @@ -720,11 +744,17 @@ let to_string uri =
(* Various accessor functions, as the external uri type is abstract *)
let get_decoded_opt = function None -> None |Some x -> Some (Pct.uncast_decoded x)
let scheme uri = get_decoded_opt uri.scheme
let encoding uri = get_decoded_opt uri.encoding
let with_scheme uri =
function
|Some scheme -> { uri with scheme=Some (Pct.cast_decoded scheme) }
|None -> { uri with scheme=None }

let with_encoding uri =
function
|Some encoding -> { uri with encoding=Some (Pct.cast_decoded encoding) }
|None -> { uri with encoding=None }

let host uri = get_decoded_opt uri.host
let with_host uri =
function
Expand Down Expand Up @@ -797,7 +827,7 @@ let query uri = Query.kv uri.query
let verbatim_query uri = Query.(match uri.query with
| Raw (qs,_) -> qs
| KV [] -> None
| KV kv -> Some (encoded_of_query ?scheme:(scheme uri) kv)
| KV kv -> Some (encoded_of_query ?scheme:(encoding uri) kv)
)
let get_query_param' uri k = Query.(find (kv uri.query) k)
let get_query_param uri k =
Expand All @@ -823,11 +853,11 @@ let path_and_query uri =
match (path uri), (query uri) with
|"", [] -> "/" (* TODO: What about same document? (/) *)
|"", q -> (* TODO: What about same document? (/) *)
let scheme = uncast_opt uri.scheme in
let scheme = uncast_opt uri.encoding in
Printf.sprintf "/?%s" (encoded_of_query ?scheme q)
|p, [] -> p
|p, q ->
let scheme = uncast_opt uri.scheme in
let scheme = uncast_opt uri.encoding in
Printf.sprintf "%s?%s" p (encoded_of_query ?scheme q)

(* TODO: functions to add and remove from a URI *)
Expand Down
8 changes: 5 additions & 3 deletions lib/uri.mli
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ val canonicalize : t -> t
supplied without host, an empty host is added. If path is supplied
and userinfo, host, or port is also supplied, path is made
absolute but not resolved. *)
val make : ?scheme:string -> ?userinfo:string -> ?host:string ->
?port:int -> ?path:string -> ?query:(string * string list) list ->
?fragment:string -> unit -> t
val make : ?encoding:string -> ?scheme:string -> ?userinfo:string
-> ?host:string -> ?port:int -> ?path:string
-> ?query:(string * string list) list -> ?fragment:string -> unit -> t

(** {3 Query functions }

Expand Down Expand Up @@ -192,6 +192,8 @@ val scheme : t -> string option
Input URI is not modified *)
val with_scheme : t -> string option -> t

val with_encoding : t -> string option -> t

(** Get the userinfo component of a URI *)
val userinfo : t -> string option

Expand Down
51 changes: 48 additions & 3 deletions lib_test/test_runner.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ open OUnit
open Printf

(* Tuples of decoded and encoded strings. The first element is a number to
refer to the test, as the pcts_large version duplicates the second field
refer to the test, as the pcts_large version duplicates the second field
to a large size, so it cant be used as the name of the test *)
let pcts = [
(1, "hello world!", "hello%20world!");
Expand All @@ -46,7 +46,7 @@ let pcts_large =
done;
(n, Buffer.contents a', Buffer.contents b')
) pcts

(* Tuple of string URI and the decoded version *)
let uri_encodes = [
"https://user:[email protected]:123/wh/at/ever?foo=1&bar=5#5",
Expand Down Expand Up @@ -86,7 +86,7 @@ let map_pcts_tests size name test args =
) args

let test_pct_small =
(map_pcts_tests "small" "encode" (fun a b -> b, (Uri.pct_encode a)) pcts) @
(map_pcts_tests "small" "encode" (fun a b -> b, (Uri.pct_encode a)) pcts) @
(map_pcts_tests "small" "decode" (fun a b -> (Uri.pct_decode b), a) pcts)

let test_pct_large =
Expand Down Expand Up @@ -608,6 +608,50 @@ let test_canonicalize =
)
) canonical_map

let aws_encode_val =
let safe_chars =
let a = Array.make 256 false in
let always_safe =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~" in
for i = 0 to String.length always_safe - 1 do
let c = Char.code always_safe.[i] in
a.(c) <- true
done;
a
in
fun s ->
let len = (String.length s) in
let b = Buffer.create len in
for i = 0 to len - 1 do
let code = Char.code s.[i] in
if safe_chars.(code) then
Buffer.add_char b s.[i]
else
Buffer.add_string b (Printf.sprintf "%%%02X" code);
done;
Buffer.contents b

let aws_encode kv =
let kvs = List.map (fun (x, y) -> x ^ "=" ^ aws_encode_val y) kv in
String.concat "&" kvs

let test_aws_encode =
let f q =
let query = List.map (fun (k, v) -> (k, [v])) q in
let u = Uri.make ~encoding:"aws" ~query () in
Uri.path_and_query u
in
let test q =
let q' = "/?" ^ aws_encode q in
(q, q')
in
List.map (fun (in_, out) ->
out >:: (fun () -> assert_equal ~printer:(fun x -> x) out (f in_)))
[ test [ "q","*" ]
; test [ "q","star wars"
; "q.options", "{fields: ['title^5.0','description']}"]
]

(* Returns true if the result list contains successes only.
Copied from oUnit source as it isnt exposed by the mli *)
let rec was_successful =
Expand Down Expand Up @@ -639,6 +683,7 @@ let _ =
@ test_sexping
@ test_with_change
@ test_canonicalize
@ test_aws_encode
) in
let verbose = ref false in
let set_verbose _ = verbose := true in
Expand Down