From 443c11db2ff983024df35109bbe3267addf1ceee Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 11:25:15 +0100 Subject: [PATCH] feat: drop usage of the url crate --- core/Cargo.lock | 273 +--------------------------- core/Cargo.toml | 3 - core/src/raw/http_util/mod.rs | 1 + core/src/raw/http_util/uri.rs | 19 ++ core/src/types/builder.rs | 23 +-- core/src/types/operator/registry.rs | 12 +- 6 files changed, 41 insertions(+), 290 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index e4b60409e368..28417ce9e28b 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -2549,17 +2549,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "dlv-list" version = "0.5.2" @@ -3897,124 +3886,6 @@ dependencies = [ "cc", ] -[[package]] -name = "icu_collections" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locid" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" - -[[package]] -name = "icu_normalizer" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" - -[[package]] -name = "icu_properties" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid_transform", - "icu_properties_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" - -[[package]] -name = "icu_provider" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "ident_case" version = "1.0.1" @@ -4033,23 +3904,12 @@ dependencies = [ [[package]] name = "idna" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ - "icu_normalizer", - "icu_properties", + "unicode-bidi", + "unicode-normalization", ] [[package]] @@ -4449,12 +4309,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "litemap" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" - [[package]] name = "lock_api" version = "0.4.12" @@ -5213,7 +5067,6 @@ dependencies = [ "tracing", "tracing-opentelemetry", "tracing-subscriber", - "url", "uuid", ] @@ -8258,17 +8111,6 @@ dependencies = [ "futures-core", ] -[[package]] -name = "synstructure" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "tagptr" version = "0.2.0" @@ -8464,16 +8306,6 @@ dependencies = [ "crunchy", ] -[[package]] -name = "tinystr" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" -dependencies = [ - "displaydoc", - "zerovec", -] - [[package]] name = "tinytemplate" version = "1.2.1" @@ -9062,12 +8894,12 @@ dependencies = [ [[package]] name = "url" -version = "2.5.4" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", - "idna 1.0.3", + "idna 0.5.0", "percent-encoding", ] @@ -9083,18 +8915,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf16_iter" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" @@ -9697,18 +9517,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "write16" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" - -[[package]] -name = "writeable" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" - [[package]] name = "ws_stream_wasm" version = "0.7.4" @@ -9761,30 +9569,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" -[[package]] -name = "yoke" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", - "synstructure", -] - [[package]] name = "zerocopy" version = "0.7.35" @@ -9806,55 +9590,12 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "zerofrom" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", - "synstructure", -] - [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" -[[package]] -name = "zerovec" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "zigzag" version = "0.1.0" diff --git a/core/Cargo.toml b/core/Cargo.toml index 5597f339a309..d404845dd3e2 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -255,9 +255,6 @@ reqwest = { version = "0.12.2", features = [ serde = { version = "1", features = ["derive"] } serde_json = "1" tokio = { version = "1.27", features = ["sync", "io-util"] } -# TODO: I added this dependency in order to not re-implement the Url::query_pairs function. -# Is it okay to include it? (we are using that crate in the ofs binary https://github.com/apache/opendal/blob/main/bin/ofs/Cargo.toml#L45 -url = "2.5.4" uuid = { version = "1", features = ["serde", "v4"] } # Test only dependencies diff --git a/core/src/raw/http_util/mod.rs b/core/src/raw/http_util/mod.rs index c90b1e485845..6fa39f9f68b0 100644 --- a/core/src/raw/http_util/mod.rs +++ b/core/src/raw/http_util/mod.rs @@ -55,6 +55,7 @@ pub use header::parse_prefixed_headers; mod uri; pub use uri::percent_decode_path; pub use uri::percent_encode_path; +pub use uri::query_pairs; mod error; pub use error::new_request_build_error; diff --git a/core/src/raw/http_util/uri.rs b/core/src/raw/http_util/uri.rs index 1f3b893e035e..1339494f8fb3 100644 --- a/core/src/raw/http_util/uri.rs +++ b/core/src/raw/http_util/uri.rs @@ -58,6 +58,25 @@ pub fn percent_decode_path(path: &str) -> String { } } +/// query_pairs will parse a query string encoded as key-value pairs separated by `&` to a vector of key-value pairs. +/// It also does percent decoding for both key and value. +/// +/// Note that `?` is not allowed in the query string, and it will be treated as a part of the first key if included. +pub fn query_pairs(query: &str) -> Vec<(String, String)> { + query + .split('&') + .filter_map(|pair| { + let mut iter = pair.splitn(2, '='); + // TODO: should we silently ignore invalid pairs and filter them out without the user noticing? + // or should we return an error in the whole `query_pairs` function so the caller knows it failed? + let key = iter.next()?; + let value = iter.next().unwrap_or(""); + Some((key, value)) + }) + .map(|(key, value)| (percent_decode_path(key), percent_decode_path(value))) + .collect() +} + #[cfg(test)] mod tests { use super::*; diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index cc2a4895286b..6f65c50d6446 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -17,6 +17,7 @@ use std::fmt::Debug; +use http::Uri; use serde::de::DeserializeOwned; use serde::Serialize; @@ -149,27 +150,19 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { /// TODO: document this. fn from_uri(uri: &str, options: impl IntoIterator) -> Result { - // TODO: We are using the `url::Url` struct instead of `http:Uri`, because `http::Uri` does not implement - // the `query_pairs` method, which we need to extract the options from the uri. - // `http::Uri` has the `query` method (https://docs.rs/http/latest/http/uri/struct.Uri.html#method.query) - // but it outputs the raw query string. Otherwise, we would need to implement the parsing of the query string - // ourselves. - - // TODO: we are using `url::Url`, not an URI. Should we rename this method to `from_url`? (all of the rest of the PR) - // The rfc stated that we would use URIs and not URL, but I wonder if we should refer to just URL instead. - // See the goal section of https://url.spec.whatwg.org/ (the `url` crate implements that spec). - // There they say that the term URI is confusing and that URL - // should be used instead. The `url::Url` crate supports URL fragments, so it should be enough for our use case. - let parsed_url = url::Url::parse(uri).map_err(|err| { + let parsed_uri = uri.parse::().map_err(|err| { Error::new(ErrorKind::ConfigInvalid, "uri is invalid") .with_context("uri", uri) .set_source(err) })?; - // TODO: I have some doubts about this + + // TODO: I have some doubts about this default implementation // It was inspired from https://github.com/apache/opendal/blob/52c96bb8e8cb4d024ccab1f415c4756447c726dd/bin/ofs/src/main.rs#L60 // Parameters should be specified in uri's query param. Example: 'fs://?root=' // this is very similar to https://github.com/apache/opendal/blob/52c96bb8e8cb4d024ccab1f415c4756447c726dd/bin/ofs/README.md?plain=1#L45 - let url_options = parsed_url.query_pairs().into_owned(); + let query_pairs = parsed_uri.query().map(query_pairs).unwrap_or_default(); + + // TODO: should we log a warning if the query_pairs vector is empty? // TODO: we are not interpreting the host or path params // the `let op = Operator::from_uri("fs:///tmp/test", vec![])?;` statement from the RFC wont work. @@ -177,7 +170,7 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { // in `ofs`. The `fs` service should override this default implementation if it wants to use the host or path params? // TODO: should we merge it this way? - let merged_options = url_options.into_iter().chain(options); + let merged_options = query_pairs.into_iter().chain(options); Self::from_iter(merged_options) } diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index c43094e3c5e7..f85257a6a10d 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -18,6 +18,8 @@ use std::cell::LazyCell; use std::collections::HashMap; +use http::Uri; + use crate::services::*; use crate::*; @@ -57,16 +59,15 @@ impl OperatorRegistry { ) -> Result { // TODO: we use the `url::Url` struct instead of `http:Uri`, because // we needed it in `Configurator::from_uri` method. - let parsed_url = url::Url::parse(uri).map_err(|err| { + let parsed_uri = uri.parse::().map_err(|err| { Error::new(ErrorKind::ConfigInvalid, "uri is invalid") .with_context("uri", uri) .set_source(err) })?; - // TODO: with the `url::Url` struct, we always have the scheme (it is not an Option) - // but with the `http::Uri` crate, it can be missing https://docs.rs/http/latest/http/uri/struct.Uri.html#method.scheme - // which one should we use? - let scheme = parsed_url.scheme(); + let scheme = parsed_uri.scheme_str().ok_or_else(|| { + Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) + })?; let factory = self.registry.get(scheme).ok_or_else(|| { Error::new( @@ -81,7 +82,6 @@ impl OperatorRegistry { // however, impl Traits in type aliases is unstable and also are not allowed in fn pointers let options = options.into_iter().collect(); - // TODO: `OperatorFactory` should use `&str` instead of `String`? we are cloning it anyway factory(uri, options) }